From d38d0f30c4860dfae4091d4e064f9dcc804794b7 Mon Sep 17 00:00:00 2001
From: Qwerasd <qwerasd205@users.noreply.github.com>
Date: Wed, 9 Oct 2024 19:31:13 -0400
Subject: [PATCH] font/sprite: replace pixman with z2d, extend Box coverage

More complete coverage of the Symbols For Legacy Computing block,
including characters from Unicode 16.0.

Pixman and the web canvas impl for Canvas have been removed in favor of
z2d for drawing, since it has a nicer API with more powerful methods,
and is in Zig with no specific platform optimizations so should compile
to wasm no problem.
---
 build.zig                                     |   16 +-
 build.zig.zon                                 |    5 +-
 nix/devShell.nix                              |    3 -
 nix/package.nix                               |    2 -
 pkg/pixman/build.zig                          |  122 -
 pkg/pixman/build.zig.zon                      |   13 -
 pkg/pixman/c.zig                              |    3 -
 pkg/pixman/error.zig                          |    4 -
 pkg/pixman/format.zig                         |  118 -
 pkg/pixman/image.zig                          |  211 -
 pkg/pixman/main.zig                           |   23 -
 pkg/pixman/pixman-version.h                   |   54 -
 pkg/pixman/types.zig                          |  131 -
 src/font/sprite.zig                           |    1 +
 src/font/sprite/Box.zig                       | 1120 ++-
 src/font/sprite/Face.zig                      |   80 +-
 src/font/sprite/Powerline.zig                 |  158 +-
 src/font/sprite/canvas.zig                    |  513 +-
 src/font/sprite/testdata/Box.ppm              |  Bin 1048593 -> 1048593 bytes
 src/font/sprite/underline.zig                 |   26 +-
 vendor/pixman/.editorconfig                   |   14 -
 vendor/pixman/.gitignore                      |   56 -
 vendor/pixman/.gitlab-ci.yml                  |   19 -
 vendor/pixman/AUTHORS                         |    0
 vendor/pixman/CODING_STYLE                    |  199 -
 vendor/pixman/COPYING                         |   42 -
 vendor/pixman/ChangeLog                       |    0
 vendor/pixman/INSTALL                         |  234 -
 vendor/pixman/Makefile.am                     |  143 -
 vendor/pixman/Makefile.win32                  |   25 -
 vendor/pixman/Makefile.win32.common           |   73 -
 vendor/pixman/NEWS                            |    0
 vendor/pixman/README                          |  140 -
 vendor/pixman/RELEASING                       |   59 -
 vendor/pixman/a64-neon-test.S                 |    5 -
 vendor/pixman/arm-simd-test.S                 |   10 -
 vendor/pixman/autogen.sh                      |   14 -
 vendor/pixman/configure.ac                    | 1199 ---
 vendor/pixman/meson.build                     |  581 --
 vendor/pixman/meson_options.txt               |  128 -
 vendor/pixman/neon-test.S                     |   12 -
 vendor/pixman/pixman-1-uninstalled.pc.in      |    5 -
 vendor/pixman/pixman-1.pc.in                  |   11 -
 vendor/pixman/pixman/Makefile.am              |  158 -
 vendor/pixman/pixman/Makefile.sources         |   43 -
 vendor/pixman/pixman/Makefile.win32           |   93 -
 .../pixman/pixman/dither/blue-noise-64x64.h   |   77 -
 vendor/pixman/pixman/dither/make-blue-noise.c |  679 --
 vendor/pixman/pixman/loongson-mmintrin.h      |  412 --
 vendor/pixman/pixman/make-srgb.pl             |  115 -
 vendor/pixman/pixman/meson.build              |  143 -
 .../pixman/pixman/pixman-access-accessors.c   |    3 -
 vendor/pixman/pixman/pixman-access.c          | 1715 -----
 vendor/pixman/pixman/pixman-accessor.h        |   25 -
 vendor/pixman/pixman/pixman-arm-asm.h         |   37 -
 vendor/pixman/pixman/pixman-arm-common.h      |  419 --
 .../pixman/pixman/pixman-arm-detect-win32.asm |   21 -
 .../pixman/pixman-arm-neon-asm-bilinear.S     | 1358 ----
 vendor/pixman/pixman/pixman-arm-neon-asm.S    | 3627 ---------
 vendor/pixman/pixman/pixman-arm-neon-asm.h    | 1184 ---
 vendor/pixman/pixman/pixman-arm-neon.c        |  493 --
 .../pixman/pixman-arm-simd-asm-scaled.S       |  156 -
 vendor/pixman/pixman/pixman-arm-simd-asm.S    | 1179 ---
 vendor/pixman/pixman/pixman-arm-simd-asm.h    |  966 ---
 vendor/pixman/pixman/pixman-arm-simd.c        |  291 -
 vendor/pixman/pixman/pixman-arm.c             |  256 -
 .../pixman/pixman-arma64-neon-asm-bilinear.S  | 1275 ----
 vendor/pixman/pixman/pixman-arma64-neon-asm.S | 3704 ----------
 vendor/pixman/pixman/pixman-arma64-neon-asm.h | 1310 ----
 vendor/pixman/pixman/pixman-bits-image.c      | 1383 ----
 vendor/pixman/pixman/pixman-combine-float.c   | 1158 ---
 vendor/pixman/pixman/pixman-combine32.c       | 1189 ---
 vendor/pixman/pixman/pixman-combine32.h       |  272 -
 vendor/pixman/pixman/pixman-compiler.h        |  234 -
 .../pixman/pixman/pixman-conical-gradient.c   |  220 -
 vendor/pixman/pixman/pixman-edge-accessors.c  |    4 -
 vendor/pixman/pixman/pixman-edge-imp.h        |  182 -
 vendor/pixman/pixman/pixman-edge.c            |  385 -
 vendor/pixman/pixman/pixman-fast-path.c       | 3298 ---------
 vendor/pixman/pixman/pixman-filter.c          |  491 --
 vendor/pixman/pixman/pixman-general.c         |  264 -
 vendor/pixman/pixman/pixman-glyph.c           |  676 --
 vendor/pixman/pixman/pixman-gradient-walker.c |  264 -
 vendor/pixman/pixman/pixman-image.c           |  994 ---
 vendor/pixman/pixman/pixman-implementation.c  |  417 --
 vendor/pixman/pixman/pixman-inlines.h         | 1365 ----
 vendor/pixman/pixman/pixman-linear-gradient.c |  292 -
 vendor/pixman/pixman/pixman-matrix.c          | 1073 ---
 vendor/pixman/pixman/pixman-mips-dspr2-asm.S  | 4283 -----------
 vendor/pixman/pixman/pixman-mips-dspr2-asm.h  |  711 --
 vendor/pixman/pixman/pixman-mips-dspr2.c      |  459 --
 vendor/pixman/pixman/pixman-mips-dspr2.h      |  432 --
 vendor/pixman/pixman/pixman-mips-memcpy-asm.S |  382 -
 vendor/pixman/pixman/pixman-mips.c            |   94 -
 vendor/pixman/pixman/pixman-mmx.c             | 4153 -----------
 vendor/pixman/pixman/pixman-noop.c            |  161 -
 vendor/pixman/pixman/pixman-ppc.c             |  173 -
 vendor/pixman/pixman/pixman-private.h         | 1193 ---
 vendor/pixman/pixman/pixman-radial-gradient.c |  509 --
 vendor/pixman/pixman/pixman-region.c          | 2800 -------
 vendor/pixman/pixman/pixman-region16.c        |   67 -
 vendor/pixman/pixman/pixman-region32.c        |   47 -
 vendor/pixman/pixman/pixman-solid-fill.c      |   67 -
 vendor/pixman/pixman/pixman-sse2.c            | 6528 -----------------
 vendor/pixman/pixman/pixman-ssse3.c           |  351 -
 vendor/pixman/pixman/pixman-timer.c           |   66 -
 vendor/pixman/pixman/pixman-trap.c            |  711 --
 vendor/pixman/pixman/pixman-utils.c           |  330 -
 vendor/pixman/pixman/pixman-version.h.in      |   54 -
 vendor/pixman/pixman/pixman-vmx.c             | 3159 --------
 vendor/pixman/pixman/pixman-x86.c             |  249 -
 vendor/pixman/pixman/pixman.c                 | 1134 ---
 vendor/pixman/pixman/pixman.h                 | 1426 ----
 vendor/pixman/pixman/rounding.txt             |  168 -
 vendor/pixman/pixman/solaris-hwcap.mapfile    |   30 -
 115 files changed, 1029 insertions(+), 68180 deletions(-)
 delete mode 100644 pkg/pixman/build.zig
 delete mode 100644 pkg/pixman/build.zig.zon
 delete mode 100644 pkg/pixman/c.zig
 delete mode 100644 pkg/pixman/error.zig
 delete mode 100644 pkg/pixman/format.zig
 delete mode 100644 pkg/pixman/image.zig
 delete mode 100644 pkg/pixman/main.zig
 delete mode 100644 pkg/pixman/pixman-version.h
 delete mode 100644 pkg/pixman/types.zig
 delete mode 100644 vendor/pixman/.editorconfig
 delete mode 100644 vendor/pixman/.gitignore
 delete mode 100644 vendor/pixman/.gitlab-ci.yml
 delete mode 100644 vendor/pixman/AUTHORS
 delete mode 100644 vendor/pixman/CODING_STYLE
 delete mode 100644 vendor/pixman/COPYING
 delete mode 100644 vendor/pixman/ChangeLog
 delete mode 100644 vendor/pixman/INSTALL
 delete mode 100644 vendor/pixman/Makefile.am
 delete mode 100644 vendor/pixman/Makefile.win32
 delete mode 100644 vendor/pixman/Makefile.win32.common
 delete mode 100644 vendor/pixman/NEWS
 delete mode 100644 vendor/pixman/README
 delete mode 100644 vendor/pixman/RELEASING
 delete mode 100644 vendor/pixman/a64-neon-test.S
 delete mode 100644 vendor/pixman/arm-simd-test.S
 delete mode 100755 vendor/pixman/autogen.sh
 delete mode 100644 vendor/pixman/configure.ac
 delete mode 100644 vendor/pixman/meson.build
 delete mode 100644 vendor/pixman/meson_options.txt
 delete mode 100644 vendor/pixman/neon-test.S
 delete mode 100644 vendor/pixman/pixman-1-uninstalled.pc.in
 delete mode 100644 vendor/pixman/pixman-1.pc.in
 delete mode 100644 vendor/pixman/pixman/Makefile.am
 delete mode 100644 vendor/pixman/pixman/Makefile.sources
 delete mode 100644 vendor/pixman/pixman/Makefile.win32
 delete mode 100644 vendor/pixman/pixman/dither/blue-noise-64x64.h
 delete mode 100644 vendor/pixman/pixman/dither/make-blue-noise.c
 delete mode 100644 vendor/pixman/pixman/loongson-mmintrin.h
 delete mode 100644 vendor/pixman/pixman/make-srgb.pl
 delete mode 100644 vendor/pixman/pixman/meson.build
 delete mode 100644 vendor/pixman/pixman/pixman-access-accessors.c
 delete mode 100644 vendor/pixman/pixman/pixman-access.c
 delete mode 100644 vendor/pixman/pixman/pixman-accessor.h
 delete mode 100644 vendor/pixman/pixman/pixman-arm-asm.h
 delete mode 100644 vendor/pixman/pixman/pixman-arm-common.h
 delete mode 100644 vendor/pixman/pixman/pixman-arm-detect-win32.asm
 delete mode 100644 vendor/pixman/pixman/pixman-arm-neon-asm-bilinear.S
 delete mode 100644 vendor/pixman/pixman/pixman-arm-neon-asm.S
 delete mode 100644 vendor/pixman/pixman/pixman-arm-neon-asm.h
 delete mode 100644 vendor/pixman/pixman/pixman-arm-neon.c
 delete mode 100644 vendor/pixman/pixman/pixman-arm-simd-asm-scaled.S
 delete mode 100644 vendor/pixman/pixman/pixman-arm-simd-asm.S
 delete mode 100644 vendor/pixman/pixman/pixman-arm-simd-asm.h
 delete mode 100644 vendor/pixman/pixman/pixman-arm-simd.c
 delete mode 100644 vendor/pixman/pixman/pixman-arm.c
 delete mode 100644 vendor/pixman/pixman/pixman-arma64-neon-asm-bilinear.S
 delete mode 100644 vendor/pixman/pixman/pixman-arma64-neon-asm.S
 delete mode 100644 vendor/pixman/pixman/pixman-arma64-neon-asm.h
 delete mode 100644 vendor/pixman/pixman/pixman-bits-image.c
 delete mode 100644 vendor/pixman/pixman/pixman-combine-float.c
 delete mode 100644 vendor/pixman/pixman/pixman-combine32.c
 delete mode 100644 vendor/pixman/pixman/pixman-combine32.h
 delete mode 100644 vendor/pixman/pixman/pixman-compiler.h
 delete mode 100644 vendor/pixman/pixman/pixman-conical-gradient.c
 delete mode 100644 vendor/pixman/pixman/pixman-edge-accessors.c
 delete mode 100644 vendor/pixman/pixman/pixman-edge-imp.h
 delete mode 100644 vendor/pixman/pixman/pixman-edge.c
 delete mode 100644 vendor/pixman/pixman/pixman-fast-path.c
 delete mode 100644 vendor/pixman/pixman/pixman-filter.c
 delete mode 100644 vendor/pixman/pixman/pixman-general.c
 delete mode 100644 vendor/pixman/pixman/pixman-glyph.c
 delete mode 100644 vendor/pixman/pixman/pixman-gradient-walker.c
 delete mode 100644 vendor/pixman/pixman/pixman-image.c
 delete mode 100644 vendor/pixman/pixman/pixman-implementation.c
 delete mode 100644 vendor/pixman/pixman/pixman-inlines.h
 delete mode 100644 vendor/pixman/pixman/pixman-linear-gradient.c
 delete mode 100644 vendor/pixman/pixman/pixman-matrix.c
 delete mode 100644 vendor/pixman/pixman/pixman-mips-dspr2-asm.S
 delete mode 100644 vendor/pixman/pixman/pixman-mips-dspr2-asm.h
 delete mode 100644 vendor/pixman/pixman/pixman-mips-dspr2.c
 delete mode 100644 vendor/pixman/pixman/pixman-mips-dspr2.h
 delete mode 100644 vendor/pixman/pixman/pixman-mips-memcpy-asm.S
 delete mode 100644 vendor/pixman/pixman/pixman-mips.c
 delete mode 100644 vendor/pixman/pixman/pixman-mmx.c
 delete mode 100644 vendor/pixman/pixman/pixman-noop.c
 delete mode 100644 vendor/pixman/pixman/pixman-ppc.c
 delete mode 100644 vendor/pixman/pixman/pixman-private.h
 delete mode 100644 vendor/pixman/pixman/pixman-radial-gradient.c
 delete mode 100644 vendor/pixman/pixman/pixman-region.c
 delete mode 100644 vendor/pixman/pixman/pixman-region16.c
 delete mode 100644 vendor/pixman/pixman/pixman-region32.c
 delete mode 100644 vendor/pixman/pixman/pixman-solid-fill.c
 delete mode 100644 vendor/pixman/pixman/pixman-sse2.c
 delete mode 100644 vendor/pixman/pixman/pixman-ssse3.c
 delete mode 100644 vendor/pixman/pixman/pixman-timer.c
 delete mode 100644 vendor/pixman/pixman/pixman-trap.c
 delete mode 100644 vendor/pixman/pixman/pixman-utils.c
 delete mode 100644 vendor/pixman/pixman/pixman-version.h.in
 delete mode 100644 vendor/pixman/pixman/pixman-vmx.c
 delete mode 100644 vendor/pixman/pixman/pixman-x86.c
 delete mode 100644 vendor/pixman/pixman/pixman.c
 delete mode 100644 vendor/pixman/pixman/pixman.h
 delete mode 100644 vendor/pixman/pixman/rounding.txt
 delete mode 100644 vendor/pixman/pixman/solaris-hwcap.mapfile

diff --git a/build.zig b/build.zig
index 0c9cc9441..25b5e5453 100644
--- a/build.zig
+++ b/build.zig
@@ -1007,10 +1007,6 @@ fn addDeps(
         .optimize = optimize,
     });
     const opengl_dep = b.dependency("opengl", .{});
-    const pixman_dep = b.dependency("pixman", .{
-        .target = target,
-        .optimize = optimize,
-    });
     const sentry_dep = b.dependency("sentry", .{
         .target = target,
         .optimize = optimize,
@@ -1044,6 +1040,7 @@ fn addDeps(
         .target = target,
         .optimize = optimize,
     });
+    const z2d_dep = b.dependency("z2d", .{});
 
     // Wasm we do manually since it is such a different build.
     if (step.rootModuleTarget().cpu.arch == .wasm32) {
@@ -1125,12 +1122,16 @@ fn addDeps(
     step.root_module.addImport("spirv_cross", spirv_cross_dep.module("spirv_cross"));
     step.root_module.addImport("xev", libxev_dep.module("xev"));
     step.root_module.addImport("opengl", opengl_dep.module("opengl"));
-    step.root_module.addImport("pixman", pixman_dep.module("pixman"));
     step.root_module.addImport("sentry", sentry_dep.module("sentry"));
     step.root_module.addImport("ziglyph", ziglyph_dep.module("ziglyph"));
     step.root_module.addImport("vaxis", vaxis_dep.module("vaxis"));
     step.root_module.addImport("wuffs", wuffs_dep.module("wuffs"));
     step.root_module.addImport("zf", zf_dep.module("zf"));
+    step.root_module.addImport("z2d", b.addModule("z2d", .{
+        .root_source_file = z2d_dep.path("src/z2d.zig"),
+        .target = target,
+        .optimize = optimize,
+    }));
 
     // Mac Stuff
     if (step.rootModuleTarget().isDarwin()) {
@@ -1196,7 +1197,6 @@ fn addDeps(
         step.linkSystemLibrary2("freetype2", dynamic_link_opts);
         step.linkSystemLibrary2("libpng", dynamic_link_opts);
         step.linkSystemLibrary2("oniguruma", dynamic_link_opts);
-        step.linkSystemLibrary2("pixman-1", dynamic_link_opts);
         step.linkSystemLibrary2("zlib", dynamic_link_opts);
 
         if (config.font_backend.hasFontconfig()) {
@@ -1222,10 +1222,6 @@ fn addDeps(
         step.linkLibrary(freetype_dep.artifact("freetype"));
         try static_libs.append(freetype_dep.artifact("freetype").getEmittedBin());
 
-        // Pixman
-        step.linkLibrary(pixman_dep.artifact("pixman"));
-        try static_libs.append(pixman_dep.artifact("pixman").getEmittedBin());
-
         // Harfbuzz
         if (config.font_backend.hasHarfbuzz()) {
             step.linkLibrary(harfbuzz_dep.artifact("harfbuzz"));
diff --git a/build.zig.zon b/build.zig.zon
index 3722bacc2..b0c409778 100644
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -36,7 +36,6 @@
         .macos = .{ .path = "./pkg/macos" },
         .oniguruma = .{ .path = "./pkg/oniguruma" },
         .opengl = .{ .path = "./pkg/opengl" },
-        .pixman = .{ .path = "./pkg/pixman" },
         .sentry = .{ .path = "./pkg/sentry" },
         .simdutf = .{ .path = "./pkg/simdutf" },
         .utfcpp = .{ .path = "./pkg/utfcpp" },
@@ -61,5 +60,9 @@
             .url = "git+https://github.com/natecraddock/zf.git?ref=main#bb27a917c3513785c6a91f0b1c10002a5029cacc",
             .hash = "1220a74107c7f153a2f809e41c7fa7e8dbf75c91043e39fad998247804e5edac2cc8",
         },
+        .z2d = .{
+            .url = "git+https://github.com/vancluever/z2d?ref=main#285a796eb9c25a2389f087d008f0e60faf0b8eda",
+            .hash = "12206445aa45bcf0170ace371905f705aec1d8d4f61e7dd77839c6621b8c407680a5",
+        },
     },
 }
diff --git a/nix/devShell.nix b/nix/devShell.nix
index ed83407fb..a924d853e 100644
--- a/nix/devShell.nix
+++ b/nix/devShell.nix
@@ -41,7 +41,6 @@
   libXi,
   libXinerama,
   libXrandr,
-  pixman,
   zlib,
   alejandra,
   pandoc,
@@ -61,7 +60,6 @@
       harfbuzz
       libpng
       oniguruma
-      pixman
       zlib
 
       libX11
@@ -126,7 +124,6 @@ in
         harfbuzz
         libpng
         oniguruma
-        pixman
         zlib
 
         libX11
diff --git a/nix/package.nix b/nix/package.nix
index af7587900..668663dc7 100644
--- a/nix/package.nix
+++ b/nix/package.nix
@@ -7,7 +7,6 @@
   freetype,
   harfbuzz,
   libpng,
-  pixman,
   zlib,
   libGL,
   libX11,
@@ -133,7 +132,6 @@ in
         freetype
         harfbuzz
         libpng
-        pixman
         zlib
 
         libX11
diff --git a/pkg/pixman/build.zig b/pkg/pixman/build.zig
deleted file mode 100644
index b1338ed25..000000000
--- a/pkg/pixman/build.zig
+++ /dev/null
@@ -1,122 +0,0 @@
-const std = @import("std");
-
-pub fn build(b: *std.Build) !void {
-    const target = b.standardTargetOptions(.{});
-    const optimize = b.standardOptimizeOption(.{});
-
-    const module = b.addModule("pixman", .{ .root_source_file = b.path("main.zig") });
-
-    const upstream = b.dependency("pixman", .{});
-    const lib = b.addStaticLibrary(.{
-        .name = "pixman",
-        .target = target,
-        .optimize = optimize,
-    });
-    lib.linkLibC();
-    if (target.result.os.tag != .windows) {
-        lib.linkSystemLibrary("pthread");
-    }
-    if (target.result.isDarwin()) {
-        const apple_sdk = @import("apple_sdk");
-        try apple_sdk.addPaths(b, &lib.root_module);
-    }
-
-    lib.addIncludePath(upstream.path(""));
-    lib.addIncludePath(b.path(""));
-    module.addIncludePath(upstream.path("pixman"));
-    module.addIncludePath(b.path(""));
-
-    var flags = std.ArrayList([]const u8).init(b.allocator);
-    defer flags.deinit();
-    try flags.appendSlice(&.{
-        "-DHAVE_SIGACTION=1",
-        "-DHAVE_ALARM=1",
-        "-DHAVE_MPROTECT=1",
-        "-DHAVE_GETPAGESIZE=1",
-        "-DHAVE_MMAP=1",
-        "-DHAVE_GETISAX=1",
-        "-DHAVE_GETTIMEOFDAY=1",
-
-        "-DHAVE_FENV_H=1",
-        "-DHAVE_SYS_MMAN_H=1",
-        "-DHAVE_UNISTD_H=1",
-
-        "-DSIZEOF_LONG=8",
-        "-DPACKAGE=foo",
-
-        // There is ubsan
-        "-fno-sanitize=undefined",
-        "-fno-sanitize-trap=undefined",
-    });
-    if (!(target.result.os.tag == .windows)) {
-        try flags.appendSlice(&.{
-            "-DHAVE_PTHREADS=1",
-
-            "-DHAVE_POSIX_MEMALIGN=1",
-        });
-    }
-
-    lib.addCSourceFiles(.{
-        .root = upstream.path(""),
-        .files = srcs,
-        .flags = flags.items,
-    });
-
-    lib.installHeader(b.path("pixman-version.h"), "pixman-version.h");
-    lib.installHeadersDirectory(
-        upstream.path("pixman"),
-        "",
-        .{ .include_extensions = &.{".h"} },
-    );
-
-    b.installArtifact(lib);
-
-    if (target.query.isNative()) {
-        const test_exe = b.addTest(.{
-            .name = "test",
-            .root_source_file = b.path("main.zig"),
-            .target = target,
-            .optimize = optimize,
-        });
-        test_exe.linkLibrary(lib);
-        var it = module.import_table.iterator();
-        while (it.next()) |entry| test_exe.root_module.addImport(entry.key_ptr.*, entry.value_ptr.*);
-
-        const tests_run = b.addRunArtifact(test_exe);
-        const test_step = b.step("test", "Run tests");
-        test_step.dependOn(&tests_run.step);
-    }
-}
-
-const srcs: []const []const u8 = &.{
-    "pixman/pixman.c",
-    "pixman/pixman-access.c",
-    "pixman/pixman-access-accessors.c",
-    "pixman/pixman-bits-image.c",
-    "pixman/pixman-combine32.c",
-    "pixman/pixman-combine-float.c",
-    "pixman/pixman-conical-gradient.c",
-    "pixman/pixman-filter.c",
-    "pixman/pixman-x86.c",
-    "pixman/pixman-mips.c",
-    "pixman/pixman-arm.c",
-    "pixman/pixman-ppc.c",
-    "pixman/pixman-edge.c",
-    "pixman/pixman-edge-accessors.c",
-    "pixman/pixman-fast-path.c",
-    "pixman/pixman-glyph.c",
-    "pixman/pixman-general.c",
-    "pixman/pixman-gradient-walker.c",
-    "pixman/pixman-image.c",
-    "pixman/pixman-implementation.c",
-    "pixman/pixman-linear-gradient.c",
-    "pixman/pixman-matrix.c",
-    "pixman/pixman-noop.c",
-    "pixman/pixman-radial-gradient.c",
-    "pixman/pixman-region16.c",
-    "pixman/pixman-region32.c",
-    "pixman/pixman-solid-fill.c",
-    //"pixman/pixman-timer.c",
-    "pixman/pixman-trap.c",
-    "pixman/pixman-utils.c",
-};
diff --git a/pkg/pixman/build.zig.zon b/pkg/pixman/build.zig.zon
deleted file mode 100644
index af6813e07..000000000
--- a/pkg/pixman/build.zig.zon
+++ /dev/null
@@ -1,13 +0,0 @@
-.{
-    .name = "pixman",
-    .version = "0.42.2",
-    .paths = .{""},
-    .dependencies = .{
-        .pixman = .{
-            .url = "https://deps.files.ghostty.dev/pixman-pixman-0.42.2.tar.gz",
-            .hash = "12209b9206f9a5d31ccd9a2312cc72cb9dfc3e034aee1883c549dc1d753fae457230",
-        },
-
-        .apple_sdk = .{ .path = "../apple-sdk" },
-    },
-}
diff --git a/pkg/pixman/c.zig b/pkg/pixman/c.zig
deleted file mode 100644
index 912dd7fbc..000000000
--- a/pkg/pixman/c.zig
+++ /dev/null
@@ -1,3 +0,0 @@
-pub const c = @cImport({
-    @cInclude("pixman.h");
-});
diff --git a/pkg/pixman/error.zig b/pkg/pixman/error.zig
deleted file mode 100644
index 2fa569aed..000000000
--- a/pkg/pixman/error.zig
+++ /dev/null
@@ -1,4 +0,0 @@
-pub const Error = error{
-    // Pixman doesn't really have errors so we just have a single error.
-    PixmanFailure,
-};
diff --git a/pkg/pixman/format.zig b/pkg/pixman/format.zig
deleted file mode 100644
index 1176d5212..000000000
--- a/pkg/pixman/format.zig
+++ /dev/null
@@ -1,118 +0,0 @@
-const std = @import("std");
-const c = @import("c.zig").c;
-const pixman = @import("main.zig");
-
-pub const FormatCode = enum(c_uint) {
-    // 128bpp formats
-    rgba_float = c.PIXMAN_FORMAT_BYTE(128, c.PIXMAN_TYPE_RGBA_FLOAT, 32, 32, 32, 32),
-
-    // 96bpp formats
-    rgb_float = c.PIXMAN_FORMAT_BYTE(96, c.PIXMAN_TYPE_RGBA_FLOAT, 0, 32, 32, 32),
-
-    // 32bpp formats
-    a8r8g8b8 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ARGB, 8, 8, 8, 8),
-    x8r8g8b8 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ARGB, 0, 8, 8, 8),
-    a8b8g8r8 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ABGR, 8, 8, 8, 8),
-    x8b8g8r8 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ABGR, 0, 8, 8, 8),
-    b8g8r8a8 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_BGRA, 8, 8, 8, 8),
-    b8g8r8x8 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_BGRA, 0, 8, 8, 8),
-    r8g8b8a8 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_RGBA, 8, 8, 8, 8),
-    r8g8b8x8 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_RGBA, 0, 8, 8, 8),
-    x14r6g6b6 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ARGB, 0, 6, 6, 6),
-    x2r10g10b10 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ARGB, 0, 10, 10, 10),
-    a2r10g10b10 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ARGB, 2, 10, 10, 10),
-    x2b10g10r10 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ABGR, 0, 10, 10, 10),
-    a2b10g10r10 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ABGR, 2, 10, 10, 10),
-
-    // sRGB formats
-    a8r8g8b8_sRGB = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ARGB_SRGB, 8, 8, 8, 8),
-    r8g8b8_sRGB = c.PIXMAN_FORMAT(24, c.PIXMAN_TYPE_ARGB_SRGB, 0, 8, 8, 8),
-
-    // 24bpp formats
-    r8g8b8 = c.PIXMAN_FORMAT(24, c.PIXMAN_TYPE_ARGB, 0, 8, 8, 8),
-    b8g8r8 = c.PIXMAN_FORMAT(24, c.PIXMAN_TYPE_ABGR, 0, 8, 8, 8),
-
-    // 16bpp formats
-    r5g6b5 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ARGB, 0, 5, 6, 5),
-    b5g6r5 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ABGR, 0, 5, 6, 5),
-
-    a1r5g5b5 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ARGB, 1, 5, 5, 5),
-    x1r5g5b5 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ARGB, 0, 5, 5, 5),
-    a1b5g5r5 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ABGR, 1, 5, 5, 5),
-    x1b5g5r5 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ABGR, 0, 5, 5, 5),
-    a4r4g4b4 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ARGB, 4, 4, 4, 4),
-    x4r4g4b4 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ARGB, 0, 4, 4, 4),
-    a4b4g4r4 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ABGR, 4, 4, 4, 4),
-    x4b4g4r4 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ABGR, 0, 4, 4, 4),
-
-    // 8bpp formats
-    a8 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_A, 8, 0, 0, 0),
-    r3g3b2 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_ARGB, 0, 3, 3, 2),
-    b2g3r3 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_ABGR, 0, 3, 3, 2),
-    a2r2g2b2 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_ARGB, 2, 2, 2, 2),
-    a2b2g2r2 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_ABGR, 2, 2, 2, 2),
-
-    c8 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_COLOR, 0, 0, 0, 0),
-    g8 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_GRAY, 0, 0, 0, 0),
-
-    x4a4 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_A, 4, 0, 0, 0),
-
-    // c8/g8 equivalent
-    // x4c4 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_COLOR, 0, 0, 0, 0),
-    // x4g4 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_GRAY, 0, 0, 0, 0),
-
-    // 4bpp formats
-    a4 = c.PIXMAN_FORMAT(4, c.PIXMAN_TYPE_A, 4, 0, 0, 0),
-    r1g2b1 = c.PIXMAN_FORMAT(4, c.PIXMAN_TYPE_ARGB, 0, 1, 2, 1),
-    b1g2r1 = c.PIXMAN_FORMAT(4, c.PIXMAN_TYPE_ABGR, 0, 1, 2, 1),
-    a1r1g1b1 = c.PIXMAN_FORMAT(4, c.PIXMAN_TYPE_ARGB, 1, 1, 1, 1),
-    a1b1g1r1 = c.PIXMAN_FORMAT(4, c.PIXMAN_TYPE_ABGR, 1, 1, 1, 1),
-
-    c4 = c.PIXMAN_FORMAT(4, c.PIXMAN_TYPE_COLOR, 0, 0, 0, 0),
-    g4 = c.PIXMAN_FORMAT(4, c.PIXMAN_TYPE_GRAY, 0, 0, 0, 0),
-
-    // 1bpp formats
-    a1 = c.PIXMAN_FORMAT(1, c.PIXMAN_TYPE_A, 1, 0, 0, 0),
-
-    g1 = c.PIXMAN_FORMAT(1, c.PIXMAN_TYPE_GRAY, 0, 0, 0, 0),
-
-    // YUV formats
-    yuy2 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_YUY2, 0, 0, 0, 0),
-    yv12 = c.PIXMAN_FORMAT(12, c.PIXMAN_TYPE_YV12, 0, 0, 0, 0),
-
-    pub inline fn bpp(self: FormatCode) u32 {
-        return self.reshift(24, 8);
-    }
-
-    /// Calculates a valid stride for the bpp and width. Based on Cairo.
-    pub fn strideForWidth(self: FormatCode, width: u32) c_int {
-        const alignment = @sizeOf(u32);
-        const val = @as(c_int, @intCast((self.bpp() * width + 7) / 8));
-        return val + (alignment - 1) & -alignment;
-    }
-
-    // Converted from pixman.h
-    fn reshift(self: FormatCode, ofs: u5, num: u5) u32 {
-        const val = @intFromEnum(self);
-        const v1 = val >> ofs;
-        const v2 = @as(c_uint, 1) << num;
-        const v3 = @as(u5, @intCast((val >> 22) & 3));
-        return ((v1 & (v2 - 1)) << v3);
-    }
-};
-
-test "bpp" {
-    const testing = std.testing;
-
-    try testing.expectEqual(@as(u32, 1), FormatCode.g1.bpp());
-    try testing.expectEqual(@as(u32, 4), FormatCode.g4.bpp());
-    try testing.expectEqual(@as(u32, 8), FormatCode.g8.bpp());
-}
-
-test "stride" {
-    const testing = std.testing;
-
-    try testing.expectEqual(@as(c_int, 4), FormatCode.g1.strideForWidth(10));
-    try testing.expectEqual(@as(c_int, 8), FormatCode.g4.strideForWidth(10));
-    try testing.expectEqual(@as(c_int, 12), FormatCode.g8.strideForWidth(10));
-}
diff --git a/pkg/pixman/image.zig b/pkg/pixman/image.zig
deleted file mode 100644
index 76cb45922..000000000
--- a/pkg/pixman/image.zig
+++ /dev/null
@@ -1,211 +0,0 @@
-const std = @import("std");
-const c = @import("c.zig").c;
-const pixman = @import("main.zig");
-
-pub const Image = opaque {
-    pub fn createBitsNoClear(
-        format: pixman.FormatCode,
-        width: c_int,
-        height: c_int,
-        bits: [*]u32,
-        stride: c_int,
-    ) pixman.Error!*Image {
-        return @as(?*Image, @ptrCast(c.pixman_image_create_bits_no_clear(
-            @intFromEnum(format),
-            width,
-            height,
-            bits,
-            stride,
-        ))) orelse return pixman.Error.PixmanFailure;
-    }
-
-    pub fn createSolidFill(
-        color: pixman.Color,
-    ) pixman.Error!*Image {
-        return @as(?*Image, @ptrCast(c.pixman_image_create_solid_fill(
-            @ptrCast(&color),
-        ))) orelse return pixman.Error.PixmanFailure;
-    }
-
-    pub fn unref(self: *Image) bool {
-        return c.pixman_image_unref(@ptrCast(self)) == 1;
-    }
-
-    /// A variant of getDataUnsafe that sets the length of the slice to
-    /// height * stride. Its possible the buffer is larger but this is the
-    /// known safe values. If you KNOW the buffer is larger you can use the
-    /// unsafe variant.
-    pub fn getData(self: *Image) []u32 {
-        const height = self.getHeight();
-        const stride = self.getStride();
-        const ptr = self.getDataUnsafe();
-        const len = @as(usize, @intCast(height * stride));
-        return ptr[0..len];
-    }
-
-    pub fn getDataUnsafe(self: *Image) [*]u32 {
-        return c.pixman_image_get_data(@ptrCast(self));
-    }
-
-    pub fn getHeight(self: *Image) c_int {
-        return c.pixman_image_get_height(@ptrCast(self));
-    }
-
-    pub fn getWidth(self: *Image) c_int {
-        return c.pixman_image_get_width(@ptrCast(self));
-    }
-
-    pub fn getStride(self: *Image) c_int {
-        return c.pixman_image_get_stride(@ptrCast(self));
-    }
-
-    pub fn fillBoxes(
-        self: *Image,
-        op: pixman.Op,
-        color: pixman.Color,
-        boxes: []const pixman.Box32,
-    ) pixman.Error!void {
-        if (c.pixman_image_fill_boxes(
-            @intFromEnum(op),
-            @ptrCast(self),
-            @ptrCast(&color),
-            @intCast(boxes.len),
-            @ptrCast(boxes.ptr),
-        ) == 0) return pixman.Error.PixmanFailure;
-    }
-
-    pub fn fillRectangles(
-        self: *Image,
-        op: pixman.Op,
-        color: pixman.Color,
-        rects: []const pixman.Rectangle16,
-    ) pixman.Error!void {
-        if (c.pixman_image_fill_rectangles(
-            @intFromEnum(op),
-            @ptrCast(self),
-            @ptrCast(&color),
-            @intCast(rects.len),
-            @ptrCast(rects.ptr),
-        ) == 0) return pixman.Error.PixmanFailure;
-    }
-
-    pub fn rasterizeTrapezoid(
-        self: *Image,
-        trap: pixman.Trapezoid,
-        x_off: c_int,
-        y_off: c_int,
-    ) void {
-        c.pixman_rasterize_trapezoid(
-            @ptrCast(self),
-            @ptrCast(&trap),
-            x_off,
-            y_off,
-        );
-    }
-
-    pub fn composite(
-        self: *Image,
-        op: pixman.Op,
-        src: *Image,
-        mask: ?*Image,
-        src_x: i16,
-        src_y: i16,
-        mask_x: i16,
-        mask_y: i16,
-        dest_x: i16,
-        dest_y: i16,
-        width: u16,
-        height: u16,
-    ) void {
-        c.pixman_image_composite(
-            @intFromEnum(op),
-            @ptrCast(src),
-            @ptrCast(mask),
-            @ptrCast(self),
-            src_x,
-            src_y,
-            mask_x,
-            mask_y,
-            dest_x,
-            dest_y,
-            width,
-            height,
-        );
-    }
-
-    pub fn compositeTriangles(
-        self: *Image,
-        op: pixman.Op,
-        src: *Image,
-        mask_format: pixman.FormatCode,
-        x_src: c_int,
-        y_src: c_int,
-        x_dst: c_int,
-        y_dst: c_int,
-        tris: []const pixman.Triangle,
-    ) void {
-        c.pixman_composite_triangles(
-            @intFromEnum(op),
-            @ptrCast(src),
-            @ptrCast(self),
-            @intFromEnum(mask_format),
-            x_src,
-            y_src,
-            x_dst,
-            y_dst,
-            @intCast(tris.len),
-            @ptrCast(tris.ptr),
-        );
-    }
-};
-
-test "create and destroy" {
-    const testing = std.testing;
-    const alloc = testing.allocator;
-
-    const width = 10;
-    const height = 10;
-    const format: pixman.FormatCode = .g1;
-    const stride = format.strideForWidth(width);
-
-    const len = height * @as(usize, @intCast(stride));
-    const data = try alloc.alloc(u32, len);
-    defer alloc.free(data);
-    @memset(data, 0);
-    const img = try Image.createBitsNoClear(.g1, width, height, data.ptr, stride);
-    try testing.expectEqual(@as(c_int, height), img.getHeight());
-    try testing.expectEqual(@as(c_int, stride), img.getStride());
-    try testing.expect(img.getData().len == height * stride);
-    try testing.expect(img.unref());
-}
-
-test "fill boxes a1" {
-    const testing = std.testing;
-    const alloc = testing.allocator;
-
-    // Dimensions
-    const width = 100;
-    const height = 100;
-    const format: pixman.FormatCode = .a1;
-    const stride = format.strideForWidth(width);
-
-    // Image
-    const len = height * @as(usize, @intCast(stride));
-    const data = try alloc.alloc(u32, len);
-    defer alloc.free(data);
-    @memset(data, 0);
-    const img = try Image.createBitsNoClear(format, width, height, data.ptr, stride);
-    defer _ = img.unref();
-
-    // Fill
-    const color: pixman.Color = .{ .red = 0xFFFF, .green = 0xFFFF, .blue = 0xFFFF, .alpha = 0xFFFF };
-    const boxes = &[_]pixman.Box32{
-        .{
-            .x1 = 0,
-            .y1 = 0,
-            .x2 = width,
-            .y2 = height,
-        },
-    };
-    try img.fillBoxes(.src, color, boxes);
-}
diff --git a/pkg/pixman/main.zig b/pkg/pixman/main.zig
deleted file mode 100644
index 33ea19127..000000000
--- a/pkg/pixman/main.zig
+++ /dev/null
@@ -1,23 +0,0 @@
-const std = @import("std");
-const format = @import("format.zig");
-const image = @import("image.zig");
-const types = @import("types.zig");
-
-pub const c = @import("c.zig").c;
-pub const Color = types.Color;
-pub const Error = @import("error.zig").Error;
-pub const Fixed = types.Fixed;
-pub const FormatCode = format.FormatCode;
-pub const Image = image.Image;
-pub const Op = types.Op;
-pub const PointFixed = types.PointFixed;
-pub const LineFixed = types.LineFixed;
-pub const Triangle = types.Triangle;
-pub const Trapezoid = types.Trapezoid;
-pub const Rectangle16 = types.Rectangle16;
-pub const Box32 = types.Box32;
-pub const Indexed = types.Indexed;
-
-test {
-    std.testing.refAllDecls(@This());
-}
diff --git a/pkg/pixman/pixman-version.h b/pkg/pixman/pixman-version.h
deleted file mode 100644
index c2342d3d5..000000000
--- a/pkg/pixman/pixman-version.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright © 2008 Red Hat, Inc.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Author: Carl D. Worth <cworth@cworth.org>
- */
-
-#ifndef PIXMAN_VERSION_H__
-#define PIXMAN_VERSION_H__
-
-#ifndef PIXMAN_H__
-#  error pixman-version.h should only be included by pixman.h
-#endif
-
-#define PIXMAN_VERSION_MAJOR 999
-#define PIXMAN_VERSION_MINOR 999
-#define PIXMAN_VERSION_MICRO 999
-
-#define PIXMAN_VERSION_STRING "999.999.999"
-
-#define PIXMAN_VERSION_ENCODE(major, minor, micro) (	\
-	  ((major) * 10000)				\
-	+ ((minor) *   100)				\
-	+ ((micro) *     1))
-
-#define PIXMAN_VERSION PIXMAN_VERSION_ENCODE(	\
-	PIXMAN_VERSION_MAJOR,			\
-	PIXMAN_VERSION_MINOR,			\
-	PIXMAN_VERSION_MICRO)
-
-#ifndef PIXMAN_API
-# define PIXMAN_API
-#endif
-
-#endif /* PIXMAN_VERSION_H__ */
diff --git a/pkg/pixman/types.zig b/pkg/pixman/types.zig
deleted file mode 100644
index 3547bd1d5..000000000
--- a/pkg/pixman/types.zig
+++ /dev/null
@@ -1,131 +0,0 @@
-const std = @import("std");
-const c = @import("c.zig").c;
-const pixman = @import("main.zig");
-
-pub const Op = enum(c_uint) {
-    clear = 0x00,
-    src = 0x01,
-    dst = 0x02,
-    over = 0x03,
-    over_reverse = 0x04,
-    in = 0x05,
-    in_reverse = 0x06,
-    out = 0x07,
-    out_reverse = 0x08,
-    atop = 0x09,
-    atop_reverse = 0x0a,
-    xor = 0x0b,
-    add = 0x0c,
-    saturate = 0x0d,
-
-    disjoint_clear = 0x10,
-    disjoint_src = 0x11,
-    disjoint_dst = 0x12,
-    disjoint_over = 0x13,
-    disjoint_over_reverse = 0x14,
-    disjoint_in = 0x15,
-    disjoint_in_reverse = 0x16,
-    disjoint_out = 0x17,
-    disjoint_out_reverse = 0x18,
-    disjoint_atop = 0x19,
-    disjoint_atop_reverse = 0x1a,
-    disjoint_xor = 0x1b,
-
-    conjoint_clear = 0x20,
-    conjoint_src = 0x21,
-    conjoint_dst = 0x22,
-    conjoint_over = 0x23,
-    conjoint_over_reverse = 0x24,
-    conjoint_in = 0x25,
-    conjoint_in_reverse = 0x26,
-    conjoint_out = 0x27,
-    conjoint_out_reverse = 0x28,
-    conjoint_atop = 0x29,
-    conjoint_atop_reverse = 0x2a,
-    conjoint_xor = 0x2b,
-
-    multiply = 0x30,
-    screen = 0x31,
-    overlay = 0x32,
-    darken = 0x33,
-    lighten = 0x34,
-    color_dodge = 0x35,
-    color_burn = 0x36,
-    hard_light = 0x37,
-    soft_light = 0x38,
-    difference = 0x39,
-    exclusion = 0x3a,
-    hsl_hue = 0x3b,
-    hsl_saturation = 0x3c,
-    hsl_color = 0x3d,
-    hsl_luminosity = 0x3e,
-};
-
-pub const Color = extern struct {
-    red: u16,
-    green: u16,
-    blue: u16,
-    alpha: u16,
-};
-
-pub const Fixed = enum(i32) {
-    _,
-
-    pub fn init(v: anytype) Fixed {
-        return switch (@TypeOf(v)) {
-            comptime_int, i32, u32 => @enumFromInt(v << 16),
-            f64 => @enumFromInt(@as(i32, @intFromFloat(v * 65536))),
-            else => {
-                @compileLog(@TypeOf(v));
-                @compileError("unsupported type");
-            },
-        };
-    }
-};
-
-pub const PointFixed = extern struct {
-    x: Fixed,
-    y: Fixed,
-};
-
-pub const LineFixed = extern struct {
-    p1: PointFixed,
-    p2: PointFixed,
-};
-
-pub const Triangle = extern struct {
-    p1: PointFixed,
-    p2: PointFixed,
-    p3: PointFixed,
-};
-
-pub const Trapezoid = extern struct {
-    top: Fixed,
-    bottom: Fixed,
-    left: LineFixed,
-    right: LineFixed,
-};
-
-pub const Rectangle16 = extern struct {
-    x: i16,
-    y: i16,
-    width: u16,
-    height: u16,
-};
-
-pub const Box32 = extern struct {
-    x1: i32,
-    y1: i32,
-    x2: i32,
-    y2: i32,
-};
-
-pub const Indexed = extern struct {
-    color: bool,
-    rgba: [256]u32,
-    ent: [32768]u8,
-};
-
-test {
-    std.testing.refAllDecls(@This());
-}
diff --git a/src/font/sprite.zig b/src/font/sprite.zig
index 00462c205..d71e777bf 100644
--- a/src/font/sprite.zig
+++ b/src/font/sprite.zig
@@ -3,6 +3,7 @@ const canvas = @import("sprite/canvas.zig");
 pub const Face = @import("sprite/Face.zig");
 
 pub const Box = canvas.Box;
+pub const Point = canvas.Point;
 pub const Canvas = canvas.Canvas;
 pub const Color = canvas.Color;
 
diff --git a/src/font/sprite/Box.zig b/src/font/sprite/Box.zig
index 109ff2353..4343f0be1 100644
--- a/src/font/sprite/Box.zig
+++ b/src/font/sprite/Box.zig
@@ -23,6 +23,8 @@ const Allocator = std.mem.Allocator;
 const font = @import("../main.zig");
 const Sprite = @import("../sprite.zig").Sprite;
 
+const z2d = @import("z2d");
+
 const log = std.log.scoped(.box_font);
 
 /// The cell width and height because the boxes are fit perfectly
@@ -101,19 +103,54 @@ const Alignment = struct {
     const lower_left: Alignment = .{ .vertical = .bottom, .horizontal = .left };
     const lower_right: Alignment = .{ .vertical = .bottom, .horizontal = .right };
 
+    const center: Alignment = .{};
+
+    const upper_center = upper;
+    const lower_center = lower;
+    const middle_left = left;
+    const middle_right = right;
+    const middle_center: Alignment = center;
+
     const top = upper;
     const bottom = lower;
+    const center_top = top;
+    const center_bottom = bottom;
+
+    const top_left = upper_left;
+    const top_right = upper_right;
+    const bottom_left = lower_left;
+    const bottom_right = lower_right;
+};
+
+const Corner = enum {
+    tl,
+    tr,
+    bl,
+    br,
 };
 
 // Utility names for common fractions
 const one_eighth: f64 = 0.125;
 const one_quarter: f64 = 0.25;
+const one_third: f64 = (1.0 / 3.0);
 const three_eighths: f64 = 0.375;
 const half: f64 = 0.5;
 const five_eighths: f64 = 0.625;
+const two_thirds: f64 = (2.0 / 3.0);
 const three_quarters: f64 = 0.75;
 const seven_eighths: f64 = 0.875;
 
+/// Shades
+const Shade = enum(u8) {
+    off = 0x00,
+    light = 0x40,
+    medium = 0x80,
+    dark = 0xc0,
+    on = 0xff,
+
+    _,
+};
+
 pub fn renderGlyph(
     self: Box,
     alloc: Allocator,
@@ -160,6 +197,7 @@ pub fn unadjustedCodepoint(cp: u32) bool {
 }
 
 fn draw(self: Box, alloc: Allocator, canvas: *font.sprite.Canvas, cp: u32) !void {
+    _ = alloc;
     switch (cp) {
         // '─'
         0x2500 => self.draw_lines(canvas, .{ .left = .light, .right = .light }),
@@ -385,8 +423,15 @@ fn draw(self: Box, alloc: Allocator, canvas: *font.sprite.Canvas, cp: u32) !void
         0x256b => self.draw_lines(canvas, .{ .up = .double, .down = .double, .left = .light, .right = .light }),
         // '╬'
         0x256c => self.draw_lines(canvas, .{ .up = .double, .down = .double, .left = .double, .right = .double }),
-        0x256d...0x2570 => try self.draw_light_arc(alloc, canvas, cp),
+        // '╭'
+        0x256d => try self.draw_light_arc(canvas, .br),
+        // '╮'
+        0x256e => try self.draw_light_arc(canvas, .bl),
+        // '╯'
+        0x256f => try self.draw_light_arc(canvas, .tl),
 
+        // '╰'
+        0x2570 => try self.draw_light_arc(canvas, .tr),
         // '╱'
         0x2571 => self.draw_light_diagonal_upper_right_to_lower_left(canvas),
         // '╲'
@@ -500,7 +545,7 @@ fn draw(self: Box, alloc: Allocator, canvas: *font.sprite.Canvas, cp: u32) !void
         0x1fb52...0x1fb56,
         0x1fb5d...0x1fb61,
         0x1fb68...0x1fb6b,
-        => try self.draw_wedge_triangle_inverted(alloc, canvas, cp),
+        => try self.draw_wedge_triangle_inverted(canvas, cp),
 
         // '🭆'
         0x1fb46,
@@ -599,6 +644,294 @@ fn draw(self: Box, alloc: Allocator, canvas: *font.sprite.Canvas, cp: u32) !void
         0x1fb8a => self.draw_block(canvas, Alignment.right, three_quarters, 1),
         // '🮋' RIGHT SEVEN EIGHTHS BLOCK
         0x1fb8b => self.draw_block(canvas, Alignment.right, seven_eighths, 1),
+        // '🮌'
+        0x1fb8c => self.draw_block_shade(canvas, Alignment.left, half, 1, .medium),
+        // '🮍'
+        0x1fb8d => self.draw_block_shade(canvas, Alignment.right, half, 1, .medium),
+        // '🮎'
+        0x1fb8e => self.draw_block_shade(canvas, Alignment.upper, 1, half, .medium),
+        // '🮏'
+        0x1fb8f => self.draw_block_shade(canvas, Alignment.lower, 1, half, .medium),
+
+        // '🮐'
+        0x1fb90 => self.draw_medium_shade(canvas),
+        // '🮑'
+        0x1fb91 => {
+            self.draw_medium_shade(canvas);
+            self.draw_block(canvas, Alignment.upper, 1, half);
+        },
+        // '🮒'
+        0x1fb92 => {
+            self.draw_medium_shade(canvas);
+            self.draw_block(canvas, Alignment.lower, 1, half);
+        },
+        // '🮔'
+        0x1fb94 => {
+            self.draw_medium_shade(canvas);
+            self.draw_block(canvas, Alignment.right, half, 1);
+        },
+        // '🮕'
+        0x1fb95 => self.draw_checkerboard_fill(canvas, 0),
+        // '🮖'
+        0x1fb96 => self.draw_checkerboard_fill(canvas, 1),
+        // '🮗'
+        0x1fb97 => {
+            self.draw_horizontal_one_eighth_block_n(canvas, 2);
+            self.draw_horizontal_one_eighth_block_n(canvas, 3);
+            self.draw_horizontal_one_eighth_block_n(canvas, 6);
+            self.draw_horizontal_one_eighth_block_n(canvas, 7);
+        },
+        // '🮘'
+        0x1fb98 => self.draw_upper_left_to_lower_right_fill(canvas),
+        // '🮙'
+        0x1fb99 => self.draw_upper_right_to_lower_left_fill(canvas),
+        // '🮜'
+        0x1fb9c => self.draw_corner_triangle_shade(canvas, .tl, .medium),
+        // '🮝'
+        0x1fb9d => self.draw_corner_triangle_shade(canvas, .tr, .medium),
+        // '🮞'
+        0x1fb9e => self.draw_corner_triangle_shade(canvas, .br, .medium),
+        // '🮟'
+        0x1fb9f => self.draw_corner_triangle_shade(canvas, .bl, .medium),
+
+        // '🮠'
+        0x1fba0 => self.draw_corner_diagonal_lines(canvas, .{ .tl = true }),
+        // '🮡'
+        0x1fba1 => self.draw_corner_diagonal_lines(canvas, .{ .tr = true }),
+        // '🮢'
+        0x1fba2 => self.draw_corner_diagonal_lines(canvas, .{ .bl = true }),
+        // '🮣'
+        0x1fba3 => self.draw_corner_diagonal_lines(canvas, .{ .br = true }),
+        // '🮤'
+        0x1fba4 => self.draw_corner_diagonal_lines(canvas, .{ .tl = true, .bl = true }),
+        // '🮥'
+        0x1fba5 => self.draw_corner_diagonal_lines(canvas, .{ .tr = true, .br = true }),
+        // '🮦'
+        0x1fba6 => self.draw_corner_diagonal_lines(canvas, .{ .bl = true, .br = true }),
+        // '🮧'
+        0x1fba7 => self.draw_corner_diagonal_lines(canvas, .{ .tl = true, .tr = true }),
+        // '🮨'
+        0x1fba8 => self.draw_corner_diagonal_lines(canvas, .{ .tl = true, .br = true }),
+        // '🮩'
+        0x1fba9 => self.draw_corner_diagonal_lines(canvas, .{ .tr = true, .bl = true }),
+        // '🮪'
+        0x1fbaa => self.draw_corner_diagonal_lines(canvas, .{ .tr = true, .bl = true, .br = true }),
+        // '🮫'
+        0x1fbab => self.draw_corner_diagonal_lines(canvas, .{ .tl = true, .bl = true, .br = true }),
+        // '🮬'
+        0x1fbac => self.draw_corner_diagonal_lines(canvas, .{ .tl = true, .tr = true, .br = true }),
+        // '🮭'
+        0x1fbad => self.draw_corner_diagonal_lines(canvas, .{ .tl = true, .tr = true, .bl = true }),
+        // '🮮'
+        0x1fbae => self.draw_corner_diagonal_lines(canvas, .{ .tl = true, .tr = true, .bl = true, .br = true }),
+        // '🮯'
+        0x1fbaf => self.draw_lines(canvas, .{ .up = .heavy, .down = .heavy, .left = .light, .right = .light }),
+
+        // '🮽'
+        0x1fbbd => {
+            self.draw_light_diagonal_cross(canvas);
+            canvas.invert();
+        },
+        // '🮾'
+        0x1fbbe => {
+            self.draw_corner_diagonal_lines(canvas, .{ .br = true });
+            canvas.invert();
+        },
+        // '🮿'
+        0x1fbbf => {
+            self.draw_corner_diagonal_lines(canvas, .{ .tl = true, .tr = true, .bl = true, .br = true });
+            canvas.invert();
+        },
+
+        // '🯎'
+        0x1fbce => self.draw_block(canvas, Alignment.left, two_thirds, 1),
+        // '🯏'
+        0x1fbcf => self.draw_block(canvas, Alignment.left, one_third, 1),
+        // '🯐'
+        0x1fbd0 => self.draw_cell_diagonal(
+            canvas,
+            Alignment.middle_right,
+            Alignment.lower_left,
+        ),
+        // '🯑'
+        0x1fbd1 => self.draw_cell_diagonal(
+            canvas,
+            Alignment.upper_right,
+            Alignment.middle_left,
+        ),
+        // '🯒'
+        0x1fbd2 => self.draw_cell_diagonal(
+            canvas,
+            Alignment.upper_left,
+            Alignment.middle_right,
+        ),
+        // '🯓'
+        0x1fbd3 => self.draw_cell_diagonal(
+            canvas,
+            Alignment.middle_left,
+            Alignment.lower_right,
+        ),
+        // '🯔'
+        0x1fbd4 => self.draw_cell_diagonal(
+            canvas,
+            Alignment.upper_left,
+            Alignment.lower_center,
+        ),
+        // '🯕'
+        0x1fbd5 => self.draw_cell_diagonal(
+            canvas,
+            Alignment.upper_center,
+            Alignment.lower_right,
+        ),
+        // '🯖'
+        0x1fbd6 => self.draw_cell_diagonal(
+            canvas,
+            Alignment.upper_right,
+            Alignment.lower_center,
+        ),
+        // '🯗'
+        0x1fbd7 => self.draw_cell_diagonal(
+            canvas,
+            Alignment.upper_center,
+            Alignment.lower_left,
+        ),
+        // '🯘'
+        0x1fbd8 => {
+            self.draw_cell_diagonal(
+                canvas,
+                Alignment.upper_left,
+                Alignment.middle_center,
+            );
+            self.draw_cell_diagonal(
+                canvas,
+                Alignment.middle_center,
+                Alignment.upper_right,
+            );
+        },
+        // '🯙'
+        0x1fbd9 => {
+            self.draw_cell_diagonal(
+                canvas,
+                Alignment.upper_right,
+                Alignment.middle_center,
+            );
+            self.draw_cell_diagonal(
+                canvas,
+                Alignment.middle_center,
+                Alignment.lower_right,
+            );
+        },
+        // '🯚'
+        0x1fbda => {
+            self.draw_cell_diagonal(
+                canvas,
+                Alignment.lower_left,
+                Alignment.middle_center,
+            );
+            self.draw_cell_diagonal(
+                canvas,
+                Alignment.middle_center,
+                Alignment.lower_right,
+            );
+        },
+        // '🯛'
+        0x1fbdb => {
+            self.draw_cell_diagonal(
+                canvas,
+                Alignment.upper_left,
+                Alignment.middle_center,
+            );
+            self.draw_cell_diagonal(
+                canvas,
+                Alignment.middle_center,
+                Alignment.lower_left,
+            );
+        },
+        // '🯜'
+        0x1fbdc => {
+            self.draw_cell_diagonal(
+                canvas,
+                Alignment.upper_left,
+                Alignment.lower_center,
+            );
+            self.draw_cell_diagonal(
+                canvas,
+                Alignment.lower_center,
+                Alignment.upper_right,
+            );
+        },
+        // '🯝'
+        0x1fbdd => {
+            self.draw_cell_diagonal(
+                canvas,
+                Alignment.upper_right,
+                Alignment.middle_left,
+            );
+            self.draw_cell_diagonal(
+                canvas,
+                Alignment.middle_left,
+                Alignment.lower_right,
+            );
+        },
+        // '🯞'
+        0x1fbde => {
+            self.draw_cell_diagonal(
+                canvas,
+                Alignment.lower_left,
+                Alignment.upper_center,
+            );
+            self.draw_cell_diagonal(
+                canvas,
+                Alignment.upper_center,
+                Alignment.lower_right,
+            );
+        },
+        // '🯟'
+        0x1fbdf => {
+            self.draw_cell_diagonal(
+                canvas,
+                Alignment.upper_left,
+                Alignment.middle_right,
+            );
+            self.draw_cell_diagonal(
+                canvas,
+                Alignment.middle_right,
+                Alignment.lower_left,
+            );
+        },
+
+        // '🯠'
+        0x1fbe0 => self.draw_circle(canvas, Alignment.top, false),
+        // '🯡'
+        0x1fbe1 => self.draw_circle(canvas, Alignment.right, false),
+        // '🯢'
+        0x1fbe2 => self.draw_circle(canvas, Alignment.bottom, false),
+        // '🯣'
+        0x1fbe3 => self.draw_circle(canvas, Alignment.left, false),
+        // '🯤'
+        0x1fbe4 => self.draw_block(canvas, Alignment.upper_center, 0.5, 0.5),
+        // '🯥'
+        0x1fbe5 => self.draw_block(canvas, Alignment.lower_center, 0.5, 0.5),
+        // '🯦'
+        0x1fbe6 => self.draw_block(canvas, Alignment.middle_left, 0.5, 0.5),
+        // '🯧'
+        0x1fbe7 => self.draw_block(canvas, Alignment.middle_right, 0.5, 0.5),
+        // '🯨'
+        0x1fbe8 => self.draw_circle(canvas, Alignment.top, true),
+        // '🯩'
+        0x1fbe9 => self.draw_circle(canvas, Alignment.right, true),
+        // '🯪'
+        0x1fbea => self.draw_circle(canvas, Alignment.bottom, true),
+        // '🯫'
+        0x1fbeb => self.draw_circle(canvas, Alignment.left, true),
+        // '🯬'
+        0x1fbec => self.draw_circle(canvas, Alignment.top_right, true),
+        // '🯭'
+        0x1fbed => self.draw_circle(canvas, Alignment.bottom_left, true),
+        // '🯮'
+        0x1fbee => self.draw_circle(canvas, Alignment.bottom_right, true),
+        // '🯯'
+        0x1fbef => self.draw_circle(canvas, Alignment.top_left, true),
 
         // Not official box characters but special characters we hide
         // in the high bits of a unicode codepoint.
@@ -610,7 +943,11 @@ fn draw(self: Box, alloc: Allocator, canvas: *font.sprite.Canvas, cp: u32) !void
     }
 }
 
-fn draw_lines(self: Box, canvas: *font.sprite.Canvas, comptime lines: Lines) void {
+fn draw_lines(
+    self: Box,
+    canvas: *font.sprite.Canvas,
+    comptime lines: Lines,
+) void {
     const light_px = Thickness.light.height(self.thickness);
     const heavy_px = Thickness.heavy.height(self.thickness);
 
@@ -858,63 +1195,20 @@ fn draw_heavy_double_dash_vertical(self: Box, canvas: *font.sprite.Canvas) void
 }
 
 fn draw_light_diagonal_upper_right_to_lower_left(self: Box, canvas: *font.sprite.Canvas) void {
-    const thick_px = Thickness.light.height(self.thickness);
-    canvas.trapezoid(.{
-        .top = 0,
-        .bottom = @as(i32, @intCast(self.height)),
-        .left = .{
-            .p1 = .{
-                .x = @as(i32, @intFromFloat(@as(f64, @floatFromInt(self.width)) - @as(f64, @floatFromInt(thick_px)) / 2)),
-                .y = 0,
-            },
-
-            .p2 = .{
-                .x = @as(i32, @intFromFloat(0 - @as(f64, @floatFromInt(thick_px)) / 2)),
-                .y = @as(i32, @intCast(self.height)),
-            },
-        },
-        .right = .{
-            .p1 = .{
-                .x = @as(i32, @intFromFloat(@as(f64, @floatFromInt(self.width)) + @as(f64, @floatFromInt(thick_px)) / 2)),
-                .y = 0,
-            },
-
-            .p2 = .{
-                .x = @as(i32, @intFromFloat(0 + @as(f64, @floatFromInt(thick_px)) / 2)),
-                .y = @as(i32, @intCast(self.height)),
-            },
-        },
-    });
+    canvas.line(.{
+        .p0 = .{ .x = @floatFromInt(self.width), .y = 0 },
+        .p1 = .{ .x = 0, .y = @floatFromInt(self.height) },
+    }, @floatFromInt(Thickness.light.height(self.thickness)), .on) catch {};
 }
 
 fn draw_light_diagonal_upper_left_to_lower_right(self: Box, canvas: *font.sprite.Canvas) void {
-    const thick_px = Thickness.light.height(self.thickness);
-    canvas.trapezoid(.{
-        .top = 0,
-        .bottom = @as(i32, @intCast(self.height)),
-        .left = .{
-            .p1 = .{
-                .x = @as(i32, @intFromFloat(0 - @as(f64, @floatFromInt(thick_px)) / 2)),
-                .y = 0,
-            },
-
-            .p2 = .{
-                .x = @as(i32, @intFromFloat(@as(f64, @floatFromInt(self.width)) - @as(f64, @floatFromInt(thick_px)) / 2)),
-                .y = @as(i32, @intCast(self.height)),
-            },
+    canvas.line(.{
+        .p0 = .{ .x = 0, .y = 0 },
+        .p1 = .{
+            .x = @floatFromInt(self.width),
+            .y = @floatFromInt(self.height),
         },
-        .right = .{
-            .p1 = .{
-                .x = @as(i32, @intFromFloat(0 + @as(f64, @floatFromInt(thick_px)) / 2)),
-                .y = 0,
-            },
-
-            .p2 = .{
-                .x = @as(i32, @intFromFloat(@as(f64, @floatFromInt(self.width)) + @as(f64, @floatFromInt(thick_px)) / 2)),
-                .y = @as(i32, @intCast(self.height)),
-            },
-        },
-    });
+    }, @floatFromInt(Thickness.light.height(self.thickness)), .on) catch {};
 }
 
 fn draw_light_diagonal_cross(self: Box, canvas: *font.sprite.Canvas) void {
@@ -925,9 +1219,20 @@ fn draw_light_diagonal_cross(self: Box, canvas: *font.sprite.Canvas) void {
 fn draw_block(
     self: Box,
     canvas: *font.sprite.Canvas,
-    alignment: Alignment,
-    width: f64,
-    height: f64,
+    comptime alignment: Alignment,
+    comptime width: f64,
+    comptime height: f64,
+) void {
+    self.draw_block_shade(canvas, alignment, width, height, .on);
+}
+
+fn draw_block_shade(
+    self: Box,
+    canvas: *font.sprite.Canvas,
+    comptime alignment: Alignment,
+    comptime width: f64,
+    comptime height: f64,
+    comptime shade: Shade,
 ) void {
     const float_width: f64 = @floatFromInt(self.width);
     const float_height: f64 = @floatFromInt(self.height);
@@ -947,11 +1252,31 @@ fn draw_block(
     };
 
     canvas.rect(.{
-        .x = @intCast(x),
-        .y = @intCast(y),
-        .width = w,
-        .height = h,
-    }, .on);
+        .x = @floatFromInt(x),
+        .y = @floatFromInt(y),
+        .width = @floatFromInt(w),
+        .height = @floatFromInt(h),
+    }, @as(font.sprite.Color, @enumFromInt(@intFromEnum(shade))));
+}
+
+fn draw_corner_triangle_shade(
+    self: Box,
+    canvas: *font.sprite.Canvas,
+    comptime corner: Corner,
+    comptime shade: Shade,
+) void {
+    const x0, const y0, const x1, const y1, const x2, const y2 = switch (corner) {
+        .tl => .{ 0, 0, 0, self.height, self.width, 0 },
+        .tr => .{ 0, 0, self.width, self.height, self.width, 0 },
+        .bl => .{ 0, 0, 0, self.height, self.width, self.height },
+        .br => .{ 0, self.height, self.width, self.height, self.width, 0 },
+    };
+
+    canvas.triangle(.{
+        .p0 = .{ .x = @floatFromInt(x0), .y = @floatFromInt(y0) },
+        .p1 = .{ .x = @floatFromInt(x1), .y = @floatFromInt(y1) },
+        .p2 = .{ .x = @floatFromInt(x2), .y = @floatFromInt(y2) },
+    }, @as(font.sprite.Color, @enumFromInt(@intFromEnum(shade)))) catch {};
 }
 
 fn draw_full_block(self: Box, canvas: *font.sprite.Canvas) void {
@@ -964,25 +1289,220 @@ fn draw_vertical_one_eighth_block_n(self: Box, canvas: *font.sprite.Canvas, n: u
     self.rect(canvas, x, 0, x + w, self.height);
 }
 
-fn draw_pixman_shade(self: Box, canvas: *font.sprite.Canvas, v: u16) void {
+fn draw_checkerboard_fill(self: Box, canvas: *font.sprite.Canvas, parity: u1) void {
+    const float_width: f64 = @floatFromInt(self.width);
+    const float_height: f64 = @floatFromInt(self.height);
+    const x_size: usize = 4;
+    const y_size: usize = @intFromFloat(@round(4 * (float_height / float_width)));
+    for (0..x_size) |x| {
+        const x0 = (self.width * x) / x_size;
+        const x1 = (self.width * (x + 1)) / x_size;
+        for (0..y_size) |y| {
+            const y0 = (self.height * y) / y_size;
+            const y1 = (self.height * (y + 1)) / y_size;
+            if ((x + y) % 2 == parity) {
+                canvas.rect(.{
+                    .x = @floatFromInt(x0),
+                    .y = @floatFromInt(y0),
+                    .width = @floatFromInt(x1 -| x0),
+                    .height = @floatFromInt(y1 -| y0),
+                }, .on);
+            }
+        }
+    }
+}
+
+fn draw_upper_left_to_lower_right_fill(self: Box, canvas: *font.sprite.Canvas) void {
+    const thick_px = Thickness.light.height(self.thickness);
+    const line_count = self.width / (2 * thick_px);
+
+    const float_width: f64 = @floatFromInt(self.width);
+    const float_height: f64 = @floatFromInt(self.height);
+    const float_thick: f64 = @floatFromInt(thick_px);
+    const stride = @round(float_width / @as(f64, @floatFromInt(line_count)));
+
+    for (0..line_count * 2 + 1) |_i| {
+        const i = @as(i32, @intCast(_i)) - @as(i32, @intCast(line_count));
+        const top_x = @as(f64, @floatFromInt(i)) * stride;
+        const bottom_x = float_width + top_x;
+        canvas.line(.{
+            .p0 = .{ .x = top_x, .y = 0 },
+            .p1 = .{ .x = bottom_x, .y = float_height },
+        }, float_thick, .on) catch {};
+    }
+}
+
+fn draw_upper_right_to_lower_left_fill(self: Box, canvas: *font.sprite.Canvas) void {
+    const thick_px = Thickness.light.height(self.thickness);
+    const line_count = self.width / (2 * thick_px);
+
+    const float_width: f64 = @floatFromInt(self.width);
+    const float_height: f64 = @floatFromInt(self.height);
+    const float_thick: f64 = @floatFromInt(thick_px);
+    const stride = @round(float_width / @as(f64, @floatFromInt(line_count)));
+
+    for (0..line_count * 2 + 1) |_i| {
+        const i = @as(i32, @intCast(_i)) - @as(i32, @intCast(line_count));
+        const bottom_x = @as(f64, @floatFromInt(i)) * stride;
+        const top_x = float_width + bottom_x;
+        canvas.line(.{
+            .p0 = .{ .x = top_x, .y = 0 },
+            .p1 = .{ .x = bottom_x, .y = float_height },
+        }, float_thick, .on) catch {};
+    }
+}
+
+fn draw_corner_diagonal_lines(
+    self: Box,
+    canvas: *font.sprite.Canvas,
+    comptime corners: Quads,
+) void {
+    const thick_px = Thickness.light.height(self.thickness);
+
+    const float_width: f64 = @floatFromInt(self.width);
+    const float_height: f64 = @floatFromInt(self.height);
+    const float_thick: f64 = @floatFromInt(thick_px);
+    const center_x: f64 = @floatFromInt(self.width / 2 + self.width % 2);
+    const center_y: f64 = @floatFromInt(self.height / 2 + self.height % 2);
+
+    if (corners.tl) canvas.line(.{
+        .p0 = .{ .x = center_x, .y = 0 },
+        .p1 = .{ .x = 0, .y = center_y },
+    }, float_thick, .on) catch {};
+
+    if (corners.tr) canvas.line(.{
+        .p0 = .{ .x = center_x, .y = 0 },
+        .p1 = .{ .x = float_width, .y = center_y },
+    }, float_thick, .on) catch {};
+
+    if (corners.bl) canvas.line(.{
+        .p0 = .{ .x = center_x, .y = float_height },
+        .p1 = .{ .x = 0, .y = center_y },
+    }, float_thick, .on) catch {};
+
+    if (corners.br) canvas.line(.{
+        .p0 = .{ .x = center_x, .y = float_height },
+        .p1 = .{ .x = float_width, .y = center_y },
+    }, float_thick, .on) catch {};
+}
+
+fn draw_cell_diagonal(
+    self: Box,
+    canvas: *font.sprite.Canvas,
+    comptime from: Alignment,
+    comptime to: Alignment,
+) void {
+    const float_width: f64 = @floatFromInt(self.width);
+    const float_height: f64 = @floatFromInt(self.height);
+
+    const x0: f64 = switch (from.horizontal) {
+        .left => 0,
+        .right => float_width,
+        .center => float_width / 2,
+    };
+    const y0: f64 = switch (from.vertical) {
+        .top => 0,
+        .bottom => float_height,
+        .middle => float_height / 2,
+    };
+    const x1: f64 = switch (to.horizontal) {
+        .left => 0,
+        .right => float_width,
+        .center => float_width / 2,
+    };
+    const y1: f64 = switch (to.vertical) {
+        .top => 0,
+        .bottom => float_height,
+        .middle => float_height / 2,
+    };
+
+    self.draw_line(
+        canvas,
+        .{ .x = x0, .y = y0 },
+        .{ .x = x1, .y = y1 },
+        .light,
+    ) catch {};
+}
+
+fn draw_circle(
+    self: Box,
+    canvas: *font.sprite.Canvas,
+    comptime position: Alignment,
+    comptime filled: bool,
+) void {
+    const float_width: f64 = @floatFromInt(self.width);
+    const float_height: f64 = @floatFromInt(self.height);
+
+    const x: f64 = switch (position.horizontal) {
+        .left => 0,
+        .right => float_width,
+        .center => float_width / 2,
+    };
+    const y: f64 = switch (position.vertical) {
+        .top => 0,
+        .bottom => float_height,
+        .middle => float_height / 2,
+    };
+    const r: f64 = 0.5 * @min(float_width, float_height);
+
+    var ctx: z2d.Context = .{
+        .surface = canvas.sfc,
+        .pattern = .{
+            .opaque_pattern = .{
+                .pixel = .{ .alpha8 = .{ .a = @intFromEnum(Shade.on) } },
+            },
+        },
+        .line_width = @floatFromInt(Thickness.light.height(self.thickness)),
+    };
+
+    var path = z2d.Path.init(canvas.alloc);
+    defer path.deinit();
+
+    if (filled) {
+        path.arc(x, y, r, 0, std.math.pi * 2, false, null) catch return;
+        path.close() catch return;
+        ctx.fill(canvas.alloc, path) catch return;
+    } else {
+        path.arc(x, y, r - ctx.line_width / 2, 0, std.math.pi * 2, false, null) catch return;
+        path.close() catch return;
+        ctx.stroke(canvas.alloc, path) catch return;
+    }
+}
+
+fn draw_line(
+    self: Box,
+    canvas: *font.sprite.Canvas,
+    p0: font.sprite.Point,
+    p1: font.sprite.Point,
+    comptime thickness: Thickness,
+) !void {
+    canvas.line(
+        .{ .p0 = p0, .p1 = p1 },
+        @floatFromInt(thickness.height(self.thickness)),
+        .on,
+    ) catch {};
+}
+
+fn draw_shade(self: Box, canvas: *font.sprite.Canvas, v: u16) void {
     canvas.rect((font.sprite.Box{
-        .x1 = 0,
-        .y1 = 0,
-        .x2 = @as(i32, @intCast(self.width)),
-        .y2 = @as(i32, @intCast(self.height)),
+        .p0 = .{ .x = 0, .y = 0 },
+        .p1 = .{
+            .x = @floatFromInt(self.width),
+            .y = @floatFromInt(self.height),
+        },
     }).rect(), @as(font.sprite.Color, @enumFromInt(v)));
 }
 
 fn draw_light_shade(self: Box, canvas: *font.sprite.Canvas) void {
-    self.draw_pixman_shade(canvas, 0x40);
+    self.draw_shade(canvas, 0x40);
 }
 
 fn draw_medium_shade(self: Box, canvas: *font.sprite.Canvas) void {
-    self.draw_pixman_shade(canvas, 0x80);
+    self.draw_shade(canvas, 0x80);
 }
 
 fn draw_dark_shade(self: Box, canvas: *font.sprite.Canvas) void {
-    self.draw_pixman_shade(canvas, 0xc0);
+    self.draw_shade(canvas, 0xc0);
 }
 
 fn draw_horizontal_one_eighth_block_n(self: Box, canvas: *font.sprite.Canvas, n: u32) void {
@@ -1495,29 +2015,20 @@ fn draw_wedge_triangle(self: Box, canvas: *font.sprite.Canvas, cp: u32) !void {
         else => unreachable,
     }
 
-    canvas.triangle(.{
-        .p1 = .{ .x = @as(i32, @intCast(p1_x)), .y = @as(i32, @intCast(p1_y)) },
-        .p2 = .{ .x = @as(i32, @intCast(p2_x)), .y = @as(i32, @intCast(p2_y)) },
-        .p3 = .{ .x = @as(i32, @intCast(p3_x)), .y = @as(i32, @intCast(p3_y)) },
+    try canvas.triangle(.{
+        .p0 = .{ .x = @floatFromInt(p1_x), .y = @floatFromInt(p1_y) },
+        .p1 = .{ .x = @floatFromInt(p2_x), .y = @floatFromInt(p2_y) },
+        .p2 = .{ .x = @floatFromInt(p3_x), .y = @floatFromInt(p3_y) },
     }, .on);
 }
 
 fn draw_wedge_triangle_inverted(
     self: Box,
-    alloc: Allocator,
     canvas: *font.sprite.Canvas,
     cp: u32,
 ) !void {
     try self.draw_wedge_triangle(canvas, cp);
-
-    var src = try font.sprite.Canvas.init(alloc, self.width, self.height);
-    src.rect(.{ .x = 0, .y = 0, .width = self.width, .height = self.height }, .on);
-    defer src.deinit(alloc);
-    canvas.composite(
-        .source_out,
-        &src,
-        .{ .x = 0, .y = 0, .width = self.width, .height = self.height },
-    );
+    canvas.invert();
 }
 
 fn draw_wedge_triangle_and_box(self: Box, canvas: *font.sprite.Canvas, cp: u32) !void {
@@ -1526,17 +2037,19 @@ fn draw_wedge_triangle_and_box(self: Box, canvas: *font.sprite.Canvas, cp: u32)
     const y_thirds = self.yThirds();
     const box: font.sprite.Box = switch (cp) {
         0x1fb46, 0x1fb51 => .{
-            .x1 = 0,
-            .y1 = @as(i32, @intCast(y_thirds[1])),
-            .x2 = @as(i32, @intCast(self.width)),
-            .y2 = @as(i32, @intCast(self.height)),
+            .p0 = .{ .x = 0, .y = @floatFromInt(y_thirds[1]) },
+            .p1 = .{
+                .x = @floatFromInt(self.width),
+                .y = @floatFromInt(self.height),
+            },
         },
 
         0x1fb5c, 0x1fb67 => .{
-            .x1 = 0,
-            .y1 = 0,
-            .x2 = @as(i32, @intCast(self.width)),
-            .y2 = @as(i32, @intCast(y_thirds[0])),
+            .p0 = .{ .x = 0, .y = 0 },
+            .p1 = .{
+                .x = @floatFromInt(self.width),
+                .y = @floatFromInt(y_thirds[0]),
+            },
         },
 
         else => unreachable,
@@ -1547,246 +2060,106 @@ fn draw_wedge_triangle_and_box(self: Box, canvas: *font.sprite.Canvas, cp: u32)
 
 fn draw_light_arc(
     self: Box,
-    alloc: Allocator,
     canvas: *font.sprite.Canvas,
-    cp: u32,
+    comptime corner: Corner,
 ) !void {
-    const supersample = 4;
-    const height = self.height * supersample;
-    const width = self.width * supersample;
+    const thick_px = Thickness.light.height(self.thickness);
+    const float_width: f64 = @floatFromInt(self.width);
+    const float_height: f64 = @floatFromInt(self.height);
+    const float_thick: f64 = @floatFromInt(thick_px);
+    const center_x: f64 = @floatFromInt(self.width / 2 + self.width % 2);
+    const center_y: f64 = @floatFromInt(self.height / 2 + self.height % 2);
 
-    // Allocate our supersample sized canvas
-    var ss_data = try alloc.alloc(u8, height * width);
-    defer alloc.free(ss_data);
-    @memset(ss_data, 0);
+    const r = @min(float_width, float_height) / 2;
 
-    const height_pixels = self.height;
-    const width_pixels = self.width;
-    const thick_pixels = Thickness.light.height(self.thickness);
-    const thick = thick_pixels * supersample;
+    // Fraction away from the center to place the middle control points,
+    const s: f64 = 0.25;
 
-    const circle_inner_edge = (@min(width_pixels, height_pixels) -| thick_pixels) / 2;
-
-    // We want to draw the quartercircle by filling small circles (with r =
-    // thickness/2.) whose centers are on its edge. This means to get the
-    // radius of the quartercircle, we add the exact half thickness to the
-    // radius of the inner circle.
-    var c_r: f64 = @as(f64, @floatFromInt(circle_inner_edge)) + @as(f64, @floatFromInt(thick_pixels)) / 2;
-
-    // We need to draw short lines from the end of the quartercircle to the
-    // box-edges, store one endpoint (the other is the edge of the
-    // quartercircle) in these vars.
-    var vert_to: u32 = 0;
-    var hor_to: u32 = 0;
-
-    // Coordinates of the circle-center.
-    var c_x: u32 = 0;
-    var c_y: u32 = 0;
-
-    // For a given y there are up to two solutions for the circle-equation.
-    // Set to -1 for the left, and 1 for the right hemisphere.
-    var circle_hemisphere: i32 = 0;
-
-    // The quarter circle only has to be evaluated for a small range of
-    // y-values.
-    var y_min: u32 = 0;
-    var y_max: u32 = 0;
-
-    switch (cp) {
-        '╭' => {
-            // Don't use supersampled coordinates yet, we want to align actual
-            // pixels.
-            //
-            // pixel-coordinates of the lower edge of the right line and the
-            // right edge of the bottom line.
-            const right_bottom_edge = (height_pixels + thick_pixels) / 2;
-            const bottom_right_edge = (width_pixels + thick_pixels) / 2;
-
-            // find coordinates of circle-center.
-            c_y = right_bottom_edge + circle_inner_edge;
-            c_x = bottom_right_edge + circle_inner_edge;
-
-            // we want to render the left, not the right hemisphere of the circle.
-            circle_hemisphere = -1;
-
-            // don't evaluate beyond c_y, the vertical line is drawn there.
-            y_min = 0;
-            y_max = c_y;
-
-            // the vertical line should extend to the bottom of the box, the
-            // horizontal to the right.
-            vert_to = height_pixels;
-            hor_to = width_pixels;
+    var ctx: z2d.Context = .{
+        .surface = canvas.sfc,
+        .pattern = .{
+            .opaque_pattern = .{
+                .pixel = .{ .alpha8 = .{ .a = @intFromEnum(Shade.on) } },
+            },
         },
-        '╮' => {
-            const left_bottom_edge = (height_pixels + thick_pixels) / 2;
-            const bottom_left_edge = (width_pixels -| thick_pixels) / 2;
+        .line_width = float_thick,
+        .line_cap_mode = .round,
+    };
 
-            c_y = left_bottom_edge + circle_inner_edge;
-            c_x = bottom_left_edge -| circle_inner_edge;
+    var path = z2d.Path.init(canvas.alloc);
+    defer path.deinit();
 
-            circle_hemisphere = 1;
-
-            y_min = 0;
-            y_max = c_y;
-
-            vert_to = height_pixels;
-            hor_to = 0;
-        },
-        '╰' => {
-            const right_top_edge = (height_pixels -| thick_pixels) / 2;
-            const top_right_edge = (width_pixels + thick_pixels) / 2;
-
-            c_y = right_top_edge -| circle_inner_edge;
-            c_x = top_right_edge + circle_inner_edge;
-
-            circle_hemisphere = -1;
-
-            y_min = c_y;
-            y_max = height_pixels;
-
-            vert_to = 0;
-            hor_to = width_pixels;
-        },
-        '╯' => {
-            const left_top_edge = (height_pixels -| thick_pixels) / 2;
-            const top_left_edge = (width_pixels -| thick_pixels) / 2;
-
-            c_y = left_top_edge -| circle_inner_edge;
-            c_x = top_left_edge -| circle_inner_edge;
-
-            circle_hemisphere = 1;
-
-            y_min = c_y;
-            y_max = height_pixels;
-
-            vert_to = 0;
-            hor_to = 0;
-        },
-
-        else => {},
-    }
-
-    // store for horizontal+vertical line.
-    const c_x_pixels = c_x;
-    const c_y_pixels = c_y;
-
-    // Bring coordinates from pixel-grid to supersampled grid.
-    c_r *= supersample;
-    c_x *= supersample;
-    c_y *= supersample;
-
-    y_min *= supersample;
-    y_max *= supersample;
-
-    const c_r2 = c_r * c_r;
-
-    // To prevent gaps in the circle, each pixel is sampled multiple times.
-    // As the quartercircle ends (vertically) in the middle of a pixel, an
-    // uneven number helps hit that exactly.
-    {
-        var i: f64 = @as(f64, @floatFromInt(y_min)) * 16;
-        while (i <= @as(f64, @floatFromInt(y_max)) * 16) : (i += 1) {
-            const y = i / 16;
-            const x = x: {
-                // circle_hemisphere * sqrt(c_r2 - (y - c_y) * (y - c_y)) + c_x;
-                const hemi = @as(f64, @floatFromInt(circle_hemisphere));
-                const y_part = y - @as(f64, @floatFromInt(c_y));
-                const y_squared = y_part * y_part;
-                const sqrt = @sqrt(c_r2 - y_squared);
-                const f_c_x = @as(f64, @floatFromInt(c_x));
-
-                // We need to detect overflows and just skip this i
-                const a = hemi * sqrt;
-                const b = a + f_c_x;
-
-                // If the float math didn't work, ignore.
-                if (std.math.isNan(b)) continue;
-
-                break :x b;
-            };
-
-            const row = @as(i32, @intFromFloat(@round(y)));
-            const col = @as(i32, @intFromFloat(@round(x)));
-            if (col < 0) continue;
-
-            // rectangle big enough to fit entire circle with radius thick/2.
-            const row1 = row - @as(i32, @intCast(thick / 2 + 1));
-            const row2 = row + @as(i32, @intCast(thick / 2 + 1));
-            const col1 = col - @as(i32, @intCast(thick / 2 + 1));
-            const col2 = col + @as(i32, @intCast(thick / 2 + 1));
-
-            const row_start = @min(row1, row2);
-            const row_end = @max(row1, row2);
-            const col_start = @min(col1, col2);
-            const col_end = @max(col1, col2);
-
-            assert(row_end > row_start);
-            assert(col_end > col_start);
-
-            // draw circle with radius thick/2 around x,y.
-            // this is accomplished by rejecting pixels where the distance from
-            // their center to x,y is greater than thick/2.
-            var r: i32 = @max(row_start, 0);
-            const r_end = @max(@min(row_end, @as(i32, @intCast(height))), 0);
-            while (r < r_end) : (r += 1) {
-                const r_midpoint = @as(f64, @floatFromInt(r)) + 0.5;
-
-                var c: i32 = @max(col_start, 0);
-                const c_end = @max(@min(col_end, @as(i32, @intCast(width))), 0);
-                while (c < c_end) : (c += 1) {
-                    const c_midpoint = @as(f64, @floatFromInt(c)) + 0.5;
-
-                    // vector from point on quartercircle to midpoint of the current pixel.
-                    const center_midpoint_x = c_midpoint - x;
-                    const center_midpoint_y = r_midpoint - y;
-
-                    // distance from current point to circle-center.
-                    const dist = @sqrt(center_midpoint_x * center_midpoint_x + center_midpoint_y * center_midpoint_y);
-                    // skip if midpoint of pixel is outside the circle.
-                    if (dist > @as(f64, @floatFromInt(thick)) / 2) continue;
-
-                    // Set our pixel
-                    const idx = @as(usize, @intCast(r * @as(i32, @intCast(width)) + c));
-                    ss_data[idx] = 0xFF;
-                }
+    switch (corner) {
+        .tl => {
+            path.moveTo(center_x, 0) catch return;
+            if (self.height > self.width) {
+                path.lineTo(center_x, center_y - r) catch return;
             }
-        }
-    }
-
-    // Downsample
-    {
-        var r: u32 = 0;
-        while (r < self.height) : (r += 1) {
-            var c: u32 = 0;
-            while (c < self.width) : (c += 1) {
-                var total: u32 = 0;
-                var i: usize = 0;
-                while (i < supersample) : (i += 1) {
-                    var j: usize = 0;
-                    while (j < supersample) : (j += 1) {
-                        const idx = (r * supersample + i) * width + (c * supersample + j);
-                        total += ss_data[idx];
-                    }
-                }
-
-                const average = @as(u8, @intCast(@min(total / (supersample * supersample), 0xff)));
-                canvas.rect(
-                    .{
-                        .x = @as(i32, @intCast(c)),
-                        .y = @as(i32, @intCast(r)),
-                        .width = 1,
-                        .height = 1,
-                    },
-                    @as(font.sprite.Color, @enumFromInt(average)),
-                );
+            path.curveTo(
+                center_x,
+                center_y - s * r,
+                center_x - s * r,
+                center_y,
+                center_x - r,
+                center_y,
+            ) catch return;
+            if (self.width > self.height) {
+                path.lineTo(0, center_y) catch return;
             }
-        }
+        },
+        .tr => {
+            path.moveTo(center_x, 0) catch return;
+            if (self.height > self.width) {
+                path.lineTo(center_x, center_y - r) catch return;
+            }
+            path.curveTo(
+                center_x,
+                center_y - s * r,
+                center_x + s * r,
+                center_y,
+                center_x + r,
+                center_y,
+            ) catch return;
+            if (self.width > self.height) {
+                path.lineTo(float_width, center_y) catch return;
+            }
+        },
+        .bl => {
+            path.moveTo(center_x, float_height) catch return;
+            if (self.height > self.width) {
+                path.lineTo(center_x, center_y + r) catch return;
+            }
+            path.curveTo(
+                center_x,
+                center_y + s * r,
+                center_x - s * r,
+                center_y,
+                center_x - r,
+                center_y,
+            ) catch return;
+            if (self.width > self.height) {
+                path.lineTo(0, center_y) catch return;
+            }
+        },
+        .br => {
+            path.moveTo(center_x, float_height) catch return;
+            if (self.height > self.width) {
+                path.lineTo(center_x, center_y + r) catch return;
+            }
+            path.curveTo(
+                center_x,
+                center_y + s * r,
+                center_x + s * r,
+                center_y,
+                center_x + r,
+                center_y,
+            ) catch return;
+            if (self.width > self.height) {
+                path.lineTo(float_width, center_y) catch return;
+            }
+        },
     }
-
-    // draw vertical/horizontal lines from quartercircle-edge to box-edge.
-    self.vline(canvas, @min(c_y_pixels, vert_to), @max(c_y_pixels, vert_to), (width_pixels - thick_pixels) / 2, thick_pixels);
-    self.hline(canvas, @min(c_x_pixels, hor_to), @max(c_x_pixels, hor_to), (height_pixels - thick_pixels) / 2, thick_pixels);
+    ctx.stroke(canvas.alloc, path) catch return;
 }
 
 fn draw_dash_horizontal(
@@ -1978,12 +2351,13 @@ fn vline(
     x: u32,
     thickness_px: u32,
 ) void {
-    canvas.rect((font.sprite.Box{
-        .x1 = @as(i32, @intCast(@min(@max(x, 0), self.width))),
-        .x2 = @as(i32, @intCast(@min(@max(x + thickness_px, 0), self.width))),
-        .y1 = @as(i32, @intCast(@min(@max(y1, 0), self.height))),
-        .y2 = @as(i32, @intCast(@min(@max(y2, 0), self.height))),
-    }).rect(), .on);
+    canvas.rect((font.sprite.Box{ .p0 = .{
+        .x = @floatFromInt(@min(@max(x, 0), self.width)),
+        .y = @floatFromInt(@min(@max(y1, 0), self.height)),
+    }, .p1 = .{
+        .x = @floatFromInt(@min(@max(x + thickness_px, 0), self.width)),
+        .y = @floatFromInt(@min(@max(y2, 0), self.height)),
+    } }).rect(), .on);
 }
 
 fn hline(
@@ -1994,12 +2368,13 @@ fn hline(
     y: u32,
     thickness_px: u32,
 ) void {
-    canvas.rect((font.sprite.Box{
-        .x1 = @as(i32, @intCast(@min(@max(x1, 0), self.width))),
-        .x2 = @as(i32, @intCast(@min(@max(x2, 0), self.width))),
-        .y1 = @as(i32, @intCast(@min(@max(y, 0), self.height))),
-        .y2 = @as(i32, @intCast(@min(@max(y + thickness_px, 0), self.height))),
-    }).rect(), .on);
+    canvas.rect((font.sprite.Box{ .p0 = .{
+        .x = @floatFromInt(@min(@max(x1, 0), self.width)),
+        .y = @floatFromInt(@min(@max(y, 0), self.height)),
+    }, .p1 = .{
+        .x = @floatFromInt(@min(@max(x2, 0), self.width)),
+        .y = @floatFromInt(@min(@max(y + thickness_px, 0), self.height)),
+    } }).rect(), .on);
 }
 
 fn rect(
@@ -2010,12 +2385,13 @@ fn rect(
     x2: u32,
     y2: u32,
 ) void {
-    canvas.rect((font.sprite.Box{
-        .x1 = @as(i32, @intCast(@min(@max(x1, 0), self.width))),
-        .y1 = @as(i32, @intCast(@min(@max(y1, 0), self.height))),
-        .x2 = @as(i32, @intCast(@min(@max(x2, 0), self.width))),
-        .y2 = @as(i32, @intCast(@min(@max(y2, 0), self.height))),
-    }).rect(), .on);
+    canvas.rect((font.sprite.Box{ .p0 = .{
+        .x = @floatFromInt(@min(@max(x1, 0), self.width)),
+        .y = @floatFromInt(@min(@max(y1, 0), self.height)),
+    }, .p1 = .{
+        .x = @floatFromInt(@min(@max(x2, 0), self.width)),
+        .y = @floatFromInt(@min(@max(y2, 0), self.height)),
+    } }).rect(), .on);
 }
 
 test "all" {
@@ -2073,26 +2449,50 @@ test "render all sprites" {
 
     // Symbols for Legacy Computing.
     cp = 0x1fb00;
-    while (cp <= 0x1fb9b) : (cp += 1) {
+    while (cp <= 0x1fbef) : (cp += 1) {
         switch (cp) {
-            0x1FB00...0x1FB3B,
-            0x1FB3C...0x1FB40,
-            0x1FB47...0x1FB4B,
-            0x1FB57...0x1FB5B,
-            0x1FB62...0x1FB66,
-            0x1FB6C...0x1FB6F,
-            0x1FB41...0x1FB45,
-            0x1FB4C...0x1FB50,
-            0x1FB52...0x1FB56,
-            0x1FB5D...0x1FB61,
-            0x1FB68...0x1FB6B,
-            0x1FB70...0x1FB8B,
-            0x1FB46,
-            0x1FB51,
-            0x1FB5C,
-            0x1FB67,
-            0x1FB9A,
-            0x1FB9B,
+            // (Block Mosaics / "Sextants")
+            // 🬀 🬁 🬂 🬃 🬄 🬅 🬆 🬇 🬈 🬉 🬊 🬋 🬌 🬍 🬎 🬏 🬐 🬑 🬒 🬓 🬔 🬕 🬖 🬗 🬘 🬙 🬚 🬛 🬜 🬝 🬞 🬟 🬠
+            // 🬡 🬢 🬣 🬤 🬥 🬦 🬧 🬨 🬩 🬪 🬫 🬬 🬭 🬮 🬯 🬰 🬱 🬲 🬳 🬴 🬵 🬶 🬷 🬸 🬹 🬺 🬻
+            // (Smooth Mosaics)
+            // 🬼 🬽 🬾 🬿 🭀 🭁 🭂 🭃 🭄 🭅 🭆
+            // 🭇 🭈 🭉 🭊 🭋 🭌 🭍 🭎 🭏 🭐 🭑
+            // 🭒 🭓 🭔 🭕 🭖 🭗 🭘 🭙 🭚 🭛 🭜
+            // 🭝 🭞 🭟 🭠 🭡 🭢 🭣 🭤 🭥 🭦 🭧
+            // 🭨 🭩 🭪 🭫 🭬 🭭 🭮 🭯
+            // (Block Elements)
+            // 🭰 🭱 🭲 🭳 🭴 🭵 🭶 🭷 🭸 🭹 🭺 🭻
+            // 🭼 🭽 🭾 🭿 🮀 🮁
+            // 🮂 🮃 🮄 🮅 🮆
+            // 🮇 🮈 🮉 🮊 🮋
+            // (Rectangular Shade Characters)
+            // 🮌 🮍 🮎 🮏 🮐 🮑 🮒
+            0x1FB00...0x1FB92,
+            // (Rectangular Shade Characters)
+            // 🮔
+            // (Fill Characters)
+            // 🮕 🮖 🮗
+            // (Diagonal Fill Characters)
+            // 🮘 🮙
+            // (Smooth Mosaics)
+            // 🮚 🮛
+            // (Triangular Shade Characters)
+            // 🮜 🮝 🮞 🮟
+            // (Character Cell Diagonals)
+            // 🮠 🮡 🮢 🮣 🮤 🮥 🮦 🮧 🮨 🮩 🮪 🮫 🮬 🮭 🮮
+            // (Light Solid Line With Stroke)
+            // 🮯
+            0x1FB94...0x1FBAF,
+            // (Negative Terminal Characters)
+            // 🮽 🮾 🮿
+            0x1FBBD...0x1FBBF,
+            // (Block Elements)
+            // 🯎 🯏
+            // (Character Cell Diagonals)
+            // 🯐 🯑 🯒 🯓 🯔 🯕 🯖 🯗 🯘 🯙 🯚 🯛 🯜 🯝 🯞 🯟
+            // (Geometric Shapes)
+            // 🯠 🯡 🯢 🯣 🯤 🯥 🯦 🯧 🯨 🯩 🯪 🯫 🯬 🯭 🯮 🯯
+            0x1FBCE...0x1FBEF,
             => _ = try face.renderGlyph(
                 alloc,
                 &atlas_grayscale,
diff --git a/src/font/sprite/Face.zig b/src/font/sprite/Face.zig
index adbe9bece..f183192dc 100644
--- a/src/font/sprite/Face.zig
+++ b/src/font/sprite/Face.zig
@@ -188,29 +188,65 @@ const Kind = enum {
                 => .box,
             },
 
-            // Box fonts
-            0x2500...0x257F, // "Box Drawing" block
-            0x2580...0x259F, // "Block Elements" block
-            0x2800...0x28FF, // "Braille" block
+            // == Box fonts ==
 
-            0x1FB00...0x1FB3B, // "Symbols for Legacy Computing" block
-            0x1FB3C...0x1FB40,
-            0x1FB47...0x1FB4B,
-            0x1FB57...0x1FB5B,
-            0x1FB62...0x1FB66,
-            0x1FB6C...0x1FB6F,
-            0x1FB41...0x1FB45,
-            0x1FB4C...0x1FB50,
-            0x1FB52...0x1FB56,
-            0x1FB5D...0x1FB61,
-            0x1FB68...0x1FB6B,
-            0x1FB70...0x1FB8B,
-            0x1FB46,
-            0x1FB51,
-            0x1FB5C,
-            0x1FB67,
-            0x1FB9A,
-            0x1FB9B,
+            // "Box Drawing" block
+            // ─ ━ │ ┃ ┄ ┅ ┆ ┇ ┈ ┉ ┊ ┋ ┌ ┍ ┎ ┏ ┐ ┑ ┒ ┓ └ ┕ ┖ ┗ ┘ ┙ ┚ ┛ ├ ┝ ┞ ┟ ┠
+            // ┡ ┢ ┣ ┤ ┥ ┦ ┧ ┨ ┩ ┪ ┫ ┬ ┭ ┮ ┯ ┰ ┱ ┲ ┳ ┴ ┵ ┶ ┷ ┸ ┹ ┺ ┻ ┼ ┽ ┾ ┿ ╀ ╁
+            // ╂ ╃ ╄ ╅ ╆ ╇ ╈ ╉ ╊ ╋ ╌ ╍ ╎ ╏ ═ ║ ╒ ╓ ╔ ╕ ╖ ╗ ╘ ╙ ╚ ╛ ╜ ╝ ╞ ╟ ╠ ╡ ╢
+            // ╣ ╤ ╥ ╦ ╧ ╨ ╩ ╪ ╫ ╬ ╭ ╮ ╯ ╰ ╱ ╲ ╳ ╴ ╵ ╶ ╷ ╸ ╹ ╺ ╻ ╼ ╽ ╾ ╿
+            0x2500...0x257F,
+
+            // "Block Elements" block
+            // ▀ ▁ ▂ ▃ ▄ ▅ ▆ ▇ █ ▉ ▊ ▋ ▌ ▍ ▎ ▏ ▐ ░ ▒ ▓ ▔ ▕ ▖ ▗ ▘ ▙ ▚ ▛ ▜ ▝ ▞ ▟
+            0x2580...0x259F,
+
+            // "Braille" block
+            0x2800...0x28FF,
+
+            // "Symbols for Legacy Computing" block
+            // (Block Mosaics / "Sextants")
+            // 🬀 🬁 🬂 🬃 🬄 🬅 🬆 🬇 🬈 🬉 🬊 🬋 🬌 🬍 🬎 🬏 🬐 🬑 🬒 🬓 🬔 🬕 🬖 🬗 🬘 🬙 🬚 🬛 🬜 🬝 🬞 🬟 🬠
+            // 🬡 🬢 🬣 🬤 🬥 🬦 🬧 🬨 🬩 🬪 🬫 🬬 🬭 🬮 🬯 🬰 🬱 🬲 🬳 🬴 🬵 🬶 🬷 🬸 🬹 🬺 🬻
+            // (Smooth Mosaics)
+            // 🬼 🬽 🬾 🬿 🭀 🭁 🭂 🭃 🭄 🭅 🭆
+            // 🭇 🭈 🭉 🭊 🭋 🭌 🭍 🭎 🭏 🭐 🭑
+            // 🭒 🭓 🭔 🭕 🭖 🭗 🭘 🭙 🭚 🭛 🭜
+            // 🭝 🭞 🭟 🭠 🭡 🭢 🭣 🭤 🭥 🭦 🭧
+            // 🭨 🭩 🭪 🭫 🭬 🭭 🭮 🭯
+            // (Block Elements)
+            // 🭰 🭱 🭲 🭳 🭴 🭵 🭶 🭷 🭸 🭹 🭺 🭻
+            // 🭼 🭽 🭾 🭿 🮀 🮁
+            // 🮂 🮃 🮄 🮅 🮆
+            // 🮇 🮈 🮉 🮊 🮋
+            // (Rectangular Shade Characters)
+            // 🮌 🮍 🮎 🮏 🮐 🮑 🮒
+            0x1FB00...0x1FB92,
+            // (Rectangular Shade Characters)
+            // 🮔
+            // (Fill Characters)
+            // 🮕 🮖 🮗
+            // (Diagonal Fill Characters)
+            // 🮘 🮙
+            // (Smooth Mosaics)
+            // 🮚 🮛
+            // (Triangular Shade Characters)
+            // 🮜 🮝 🮞 🮟
+            // (Character Cell Diagonals)
+            // 🮠 🮡 🮢 🮣 🮤 🮥 🮦 🮧 🮨 🮩 🮪 🮫 🮬 🮭 🮮
+            // (Light Solid Line With Stroke)
+            // 🮯
+            0x1FB94...0x1FBAF,
+            // (Negative Terminal Characters)
+            // 🮽 🮾 🮿
+            0x1FBBD...0x1FBBF,
+            // (Block Elements)
+            // 🯎 🯏
+            // (Character Cell Diagonals)
+            // 🯐 🯑 🯒 🯓 🯔 🯕 🯖 🯗 🯘 🯙 🯚 🯛 🯜 🯝 🯞 🯟
+            // (Geometric Shapes)
+            // 🯠 🯡 🯢 🯣 🯤 🯥 🯦 🯧 🯨 🯩 🯪 🯫 🯬 🯭 🯮 🯯
+            0x1FBCE...0x1FBEF,
             => .box,
 
             // Powerline fonts
diff --git a/src/font/sprite/Powerline.zig b/src/font/sprite/Powerline.zig
index f32fdb01b..ba56eb38a 100644
--- a/src/font/sprite/Powerline.zig
+++ b/src/font/sprite/Powerline.zig
@@ -11,7 +11,7 @@ const std = @import("std");
 const Allocator = std.mem.Allocator;
 
 const font = @import("../main.zig");
-const Trapezoid = @import("canvas.zig").Trapezoid;
+const Quad = @import("canvas.zig").Quad;
 
 const log = std.log.scoped(.powerline_font);
 
@@ -176,10 +176,10 @@ fn draw_wedge_triangle(self: Powerline, canvas: *font.sprite.Canvas, cp: u32) !v
         else => unreachable,
     }
 
-    canvas.triangle(.{
-        .p1 = .{ .x = @as(i32, @intCast(p1_x)), .y = @as(i32, @intCast(p1_y)) },
-        .p2 = .{ .x = @as(i32, @intCast(p2_x)), .y = @as(i32, @intCast(p2_y)) },
-        .p3 = .{ .x = @as(i32, @intCast(p3_x)), .y = @as(i32, @intCast(p3_y)) },
+    try canvas.triangle(.{
+        .p0 = .{ .x = @floatFromInt(p1_x), .y = @floatFromInt(p1_y) },
+        .p1 = .{ .x = @floatFromInt(p2_x), .y = @floatFromInt(p2_y) },
+        .p2 = .{ .x = @floatFromInt(p3_x), .y = @floatFromInt(p3_y) },
     }, .on);
 }
 
@@ -391,8 +391,8 @@ fn draw_half_circle(self: Powerline, alloc: Allocator, canvas: *font.sprite.Canv
                 const average = @as(u8, @intCast(@min(total / (supersample * supersample), 0xFF)));
                 canvas.rect(
                     .{
-                        .x = @intCast(c),
-                        .y = @intCast(r),
+                        .x = @floatFromInt(c),
+                        .y = @floatFromInt(r),
                         .width = 1,
                         .height = 1,
                     },
@@ -404,110 +404,86 @@ fn draw_half_circle(self: Powerline, alloc: Allocator, canvas: *font.sprite.Canv
 }
 
 fn draw_trapezoid_top_bottom(self: Powerline, canvas: *font.sprite.Canvas, cp: u32) !void {
-    const t_top: Trapezoid = if (cp == 0xE0D4)
+    const t_top: Quad = if (cp == 0xE0D4)
         .{
-            .top = 0,
-            .bottom = @intCast(self.height / 2 - self.height / 20),
-            .left = .{
-                .p1 = .{
-                    .x = 0,
-                    .y = 0,
-                },
-                .p2 = .{
-                    .x = @intCast(self.width - self.width / 3),
-                    .y = @intCast(self.height / 2 - self.height / 20),
-                },
+            .p0 = .{
+                .x = 0,
+                .y = 0,
             },
-            .right = .{
-                .p1 = .{
-                    .x = @intCast(self.width),
-                    .y = 0,
-                },
-                .p2 = .{
-                    .x = @intCast(self.width),
-                    .y = @intCast(self.height / 2 - self.height / 20),
-                },
+            .p1 = .{
+                .x = @floatFromInt(self.width - self.width / 3),
+                .y = @floatFromInt(self.height / 2 - self.height / 20),
+            },
+            .p2 = .{
+                .x = @floatFromInt(self.width),
+                .y = @floatFromInt(self.height / 2 - self.height / 20),
+            },
+            .p3 = .{
+                .x = @floatFromInt(self.width),
+                .y = 0,
             },
         }
     else
         .{
-            .top = 0,
-            .bottom = @intCast(self.height / 2 - self.height / 20),
-            .left = .{
-                .p1 = .{
-                    .x = 0,
-                    .y = 0,
-                },
-                .p2 = .{
-                    .x = 0,
-                    .y = @intCast(self.height / 2 - self.height / 20),
-                },
+            .p0 = .{
+                .x = 0,
+                .y = 0,
             },
-            .right = .{
-                .p1 = .{
-                    .x = @intCast(self.width),
-                    .y = 0,
-                },
-                .p2 = .{
-                    .x = @intCast(self.width / 3),
-                    .y = @intCast(self.height / 2 - self.height / 20),
-                },
+            .p1 = .{
+                .x = 0,
+                .y = @floatFromInt(self.height / 2 - self.height / 20),
+            },
+            .p2 = .{
+                .x = @floatFromInt(self.width / 3),
+                .y = @floatFromInt(self.height / 2 - self.height / 20),
+            },
+            .p3 = .{
+                .x = @floatFromInt(self.width),
+                .y = 0,
             },
         };
 
-    const t_bottom: Trapezoid = if (cp == 0xE0D4)
+    const t_bottom: Quad = if (cp == 0xE0D4)
         .{
-            .top = @intCast(self.height / 2 + self.height / 20),
-            .bottom = @intCast(self.height),
-            .left = .{
-                .p1 = .{
-                    .x = @intCast(self.width - self.width / 3),
-                    .y = @intCast(self.height / 2 + self.height / 20),
-                },
-                .p2 = .{
-                    .x = 0,
-                    .y = @intCast(self.height),
-                },
+            .p0 = .{
+                .x = @floatFromInt(self.width - self.width / 3),
+                .y = @floatFromInt(self.height / 2 + self.height / 20),
             },
-            .right = .{
-                .p1 = .{
-                    .x = @intCast(self.width),
-                    .y = @intCast(self.height / 2 + self.height / 20),
-                },
-                .p2 = .{
-                    .x = @intCast(self.width),
-                    .y = @intCast(self.height),
-                },
+            .p1 = .{
+                .x = 0,
+                .y = @floatFromInt(self.height),
+            },
+            .p2 = .{
+                .x = @floatFromInt(self.width),
+                .y = @floatFromInt(self.height),
+            },
+            .p3 = .{
+                .x = @floatFromInt(self.width),
+                .y = @floatFromInt(self.height / 2 + self.height / 20),
             },
         }
     else
         .{
-            .top = @intCast(self.height / 2 + self.height / 20),
-            .bottom = @intCast(self.height),
-            .left = .{
-                .p1 = .{
-                    .x = 0,
-                    .y = @intCast(self.height / 2 + self.height / 20),
-                },
-                .p2 = .{
-                    .x = 0,
-                    .y = @intCast(self.height),
-                },
+            .p0 = .{
+                .x = 0,
+                .y = @floatFromInt(self.height / 2 + self.height / 20),
             },
-            .right = .{
-                .p1 = .{
-                    .x = @intCast(self.width / 3),
-                    .y = @intCast(self.height / 2 + self.height / 20),
-                },
-                .p2 = .{
-                    .x = @intCast(self.width),
-                    .y = @intCast(self.height),
-                },
+            .p1 = .{
+                .x = 0,
+                .y = @floatFromInt(self.height),
+            },
+            .p2 = .{
+                .x = @floatFromInt(self.width),
+                .y = @floatFromInt(self.height),
+            },
+            .p3 = .{
+                .x = @floatFromInt(self.width / 3),
+                .y = @floatFromInt(self.height / 2 + self.height / 20),
             },
         };
 
-    canvas.trapezoid(t_top);
-    canvas.trapezoid(t_bottom);
+    try canvas.quad(t_top, .on);
+    try canvas.quad(t_bottom, .on);
 }
 
 test "all" {
diff --git a/src/font/sprite/canvas.zig b/src/font/sprite/canvas.zig
index 67e213b21..a3792fe23 100644
--- a/src/font/sprite/canvas.zig
+++ b/src/font/sprite/canvas.zig
@@ -3,340 +3,88 @@
 const std = @import("std");
 const assert = std.debug.assert;
 const Allocator = std.mem.Allocator;
-const js = @import("zig-js");
-const pixman = @import("pixman");
+const z2d = @import("z2d");
 const font = @import("../main.zig");
 
 pub const Point = struct {
-    x: i32,
-    y: i32,
+    x: f64,
+    y: f64,
 };
 
 pub const Line = struct {
+    p0: Point,
     p1: Point,
-    p2: Point,
 };
 
 pub const Box = struct {
-    x1: i32,
-    y1: i32,
-    x2: i32,
-    y2: i32,
+    p0: Point,
+    p1: Point,
 
     pub fn rect(self: Box) Rect {
-        const tl_x = @min(self.x1, self.x2);
-        const tl_y = @min(self.y1, self.y2);
-        const br_x = @max(self.x1, self.x2);
-        const br_y = @max(self.y1, self.y2);
+        const tl_x = @min(self.p0.x, self.p1.x);
+        const tl_y = @min(self.p0.y, self.p1.y);
+        const br_x = @max(self.p0.x, self.p1.x);
+        const br_y = @max(self.p0.y, self.p1.y);
         return .{
             .x = tl_x,
             .y = tl_y,
-            .width = @intCast(br_x - tl_x),
-            .height = @intCast(br_y - tl_y),
+            .width = br_x - tl_x,
+            .height = br_y - tl_y,
         };
     }
 };
 
 pub const Rect = struct {
-    x: i32,
-    y: i32,
-    width: u32,
-    height: u32,
+    x: f64,
+    y: f64,
+    width: f64,
+    height: f64,
 };
 
 pub const Triangle = struct {
+    p0: Point,
+    p1: Point,
+    p2: Point,
+};
+
+pub const Quad = struct {
+    p0: Point,
     p1: Point,
     p2: Point,
     p3: Point,
 };
 
-pub const Trapezoid = struct {
-    top: i32,
-    bottom: i32,
-    left: Line,
-    right: Line,
-};
-
 /// We only use alpha-channel so a pixel can only be "on" or "off".
 pub const Color = enum(u8) {
-    const CSS_BUF_MAX = 24;
-
     on = 255,
     off = 0,
     _,
-
-    fn pixmanColor(self: Color) pixman.Color {
-        // pixman uses u16 for color while our color value is u8 so we
-        // scale it up proportionally.
-        const max = @as(f32, @floatFromInt(std.math.maxInt(u8)));
-        const max_u16 = @as(f32, @floatFromInt(std.math.maxInt(u16)));
-        const unscaled = @as(f32, @floatFromInt(@intFromEnum(self)));
-        const scaled = @as(u16, @intFromFloat((unscaled * max_u16) / max));
-        return .{ .red = 0, .green = 0, .blue = 0, .alpha = scaled };
-    }
-
-    fn cssColor(self: Color, buf: []u8) ![]u8 {
-        return try std.fmt.bufPrint(buf, "rgba(0, 0, 0, {:.2})", .{
-            @as(f32, @floatFromInt(@intFromEnum(self))) / 255,
-        });
-    }
 };
 
-/// Composition operations that are supported.
-pub const CompositionOp = enum {
-    // Note: more can be added here as needed.
+pub const Canvas = struct {
+    /// The underlying z2d surface.
+    sfc: z2d.Surface,
 
-    source_out,
-
-    fn pixmanOp(self: CompositionOp) pixman.Op {
-        return switch (self) {
-            .source_out => .out,
-        };
-    }
-
-    fn jsOp(self: CompositionOp) js.String {
-        return switch (self) {
-            .source_out => js.string("source-out"),
-        };
-    }
-};
-
-pub const Canvas = switch (font.options.backend) {
-    .web_canvas => WebCanvasImpl,
-    else => PixmanImpl,
-};
-
-const WebCanvasImpl = struct {
-    /// The canvas element that is our final image.
-    canvas: js.Object,
-
-    /// Store the dimensions for easy access later.
-    width: u32,
-    height: u32,
-
-    pub fn init(alloc: Allocator, width: u32, height: u32) !WebCanvasImpl {
-        _ = alloc;
-
-        // Create our canvas that we're going to continue to reuse.
-        const doc = try js.global.get(js.Object, "document");
-        defer doc.deinit();
-        const canvas = try doc.call(js.Object, "createElement", .{js.string("canvas")});
-        errdefer canvas.deinit();
-
-        // Set our dimensions.
-        try canvas.set("width", width);
-        try canvas.set("height", height);
-
-        return WebCanvasImpl{
-            .canvas = canvas,
-            .width = width,
-            .height = height,
-        };
-    }
-
-    pub fn deinit(self: *WebCanvasImpl, alloc: Allocator) void {
-        _ = alloc;
-        self.canvas.deinit();
-        self.* = undefined;
-    }
-
-    pub fn pixel(self: *WebCanvasImpl, x: u32, y: u32, color: Color) void {
-        const ctx = self.context(color) catch return;
-        defer ctx.deinit();
-        ctx.call(void, "fillRect", .{ x, y, 1, 1 }) catch return;
-    }
-
-    pub fn rect(self: *WebCanvasImpl, v: Rect, color: Color) void {
-        const ctx = self.context(color) catch return;
-        defer ctx.deinit();
-        ctx.call(void, "fillRect", .{
-            @as(u32, @intCast(v.x)),
-            @as(u32, @intCast(v.y)),
-            v.width,
-            v.height,
-        }) catch return;
-    }
-
-    pub fn trapezoid(self: *WebCanvasImpl, t: Trapezoid) void {
-        const ctx = self.context(.on) catch return;
-        defer ctx.deinit();
-
-        ctx.call(void, "beginPath", .{}) catch return;
-        ctx.call(void, "moveTo", .{ t.left.p1.x, t.left.p1.y }) catch return;
-        ctx.call(void, "lineTo", .{ t.right.p1.x, t.right.p1.y }) catch return;
-        ctx.call(void, "lineTo", .{ t.right.p2.x, t.right.p2.y }) catch return;
-        ctx.call(void, "lineTo", .{ t.left.p2.x, t.left.p2.y }) catch return;
-        ctx.call(void, "fill", .{}) catch return;
-    }
-
-    pub fn triangle(self: *WebCanvasImpl, t: Triangle, color: Color) void {
-        const ctx = self.context(color) catch return;
-        defer ctx.deinit();
-
-        ctx.call(void, "beginPath", .{}) catch return;
-        ctx.call(void, "moveTo", .{ t.p1.x, t.p1.y }) catch return;
-        ctx.call(void, "lineTo", .{ t.p2.x, t.p2.y }) catch return;
-        ctx.call(void, "lineTo", .{ t.p3.x, t.p3.y }) catch return;
-        ctx.call(void, "fill", .{}) catch return;
-    }
-
-    pub fn composite(
-        self: *WebCanvasImpl,
-        op: CompositionOp,
-        src: *const WebCanvasImpl,
-        dest: Rect,
-    ) void {
-        const ctx = self.context(Color.on) catch return;
-        defer ctx.deinit();
-
-        // Set our compositing operation
-        ctx.set("globalCompositeOperation", op.jsOp()) catch return;
-
-        // Composite
-        ctx.call(void, "drawImage", .{
-            src.canvas,
-            dest.x,
-            dest.y,
-            dest.width,
-            dest.height,
-        }) catch return;
-    }
-
-    fn context(self: WebCanvasImpl, fill: ?Color) !js.Object {
-        const ctx = try self.canvas.call(js.Object, "getContext", .{js.string("2d")});
-        errdefer ctx.deinit();
-
-        // Reset our composite operation
-        try ctx.set("globalCompositeOperation", js.string("source-over"));
-
-        // Set our fill color
-        if (fill) |c| {
-            var buf: [Color.CSS_BUF_MAX]u8 = undefined;
-            const color = try c.cssColor(&buf);
-            try ctx.set("fillStyle", js.string(color));
-        }
-
-        return ctx;
-    }
-
-    pub fn writeAtlas(self: *WebCanvasImpl, alloc: Allocator, atlas: *font.Atlas) !font.Atlas.Region {
-        assert(atlas.format == .grayscale);
-
-        // Reload our context since we resized the canvas
-        const ctx = try self.context(null);
-        defer ctx.deinit();
-
-        // Set our width/height. Set to vars in case we just query the canvas later.
-        const width = self.width;
-        const height = self.height;
-
-        // Read the image data and get it into a []u8 on our side
-        const bitmap: []u8 = bitmap: {
-            // Read the raw bitmap data and get the "data" value which is a
-            // Uint8ClampedArray.
-            const data = try ctx.call(js.Object, "getImageData", .{ 0, 0, width, height });
-            defer data.deinit();
-            const src_array = try data.get(js.Object, "data");
-            defer src_array.deinit();
-
-            // Allocate our local memory to copy the data to.
-            const len = try src_array.get(u32, "length");
-            const bitmap = try alloc.alloc(u8, @intCast(len));
-            errdefer alloc.free(bitmap);
-
-            // Create our target Uint8Array that we can use to copy from src.
-            const mem_array = mem_array: {
-                // Get our runtime memory
-                const mem = try js.runtime.get(js.Object, "memory");
-                defer mem.deinit();
-                const buf = try mem.get(js.Object, "buffer");
-                defer buf.deinit();
-
-                // Construct our array to peer into our memory
-                const Uint8Array = try js.global.get(js.Object, "Uint8Array");
-                defer Uint8Array.deinit();
-                const mem_array = try Uint8Array.new(.{ buf, bitmap.ptr });
-                errdefer mem_array.deinit();
-
-                break :mem_array mem_array;
-            };
-            defer mem_array.deinit();
-
-            // Copy
-            try mem_array.call(void, "set", .{src_array});
-
-            break :bitmap bitmap;
-        };
-        errdefer alloc.free(bitmap);
-
-        // Convert the format of the bitmap to A8 since the raw canvas data
-        // is in RGBA.
-        // NOTE(mitchellh): do we need a 1px buffer to avoid artifacts?
-        const bitmap_a8: []u8 = a8: {
-            assert(@mod(bitmap.len, 4) == 0);
-            assert(bitmap.len == width * height * 4);
-            var bitmap_a8 = try alloc.alloc(u8, bitmap.len / 4);
-            errdefer alloc.free(bitmap_a8);
-            var i: usize = 0;
-            while (i < bitmap_a8.len) : (i += 1) {
-                bitmap_a8[i] = bitmap[(i * 4) + 3];
-            }
-
-            break :a8 bitmap_a8;
-        };
-        defer alloc.free(bitmap_a8);
-
-        // Write the glyph information into the atlas
-        const region = try atlas.reserve(alloc, width, height);
-        if (region.width > 0 and region.height > 0) {
-            assert(region.width == width);
-            assert(region.height == height);
-            atlas.set(region, bitmap_a8);
-        }
-
-        return region;
-    }
-};
-
-const PixmanImpl = struct {
-    /// The underlying image.
-    image: *pixman.Image,
-
-    /// The raw data buffer.
-    data: []u32,
+    alloc: Allocator,
 
     pub fn init(alloc: Allocator, width: u32, height: u32) !Canvas {
-        // Determine the config for our image buffer. The images we draw
-        // for boxes are always 8bpp
-        const format: pixman.FormatCode = .a8;
-        const stride = format.strideForWidth(width);
-        const len = @as(usize, @intCast(stride * @as(c_int, @intCast(height))));
-
-        // Allocate our buffer. pixman uses []u32 so we divide our length
-        // by 4 since u32 / u8 = 4.
-        const data = try alloc.alloc(u32, len / 4);
-        errdefer alloc.free(data);
-        @memset(data, 0);
-
-        // Create the image we'll draw to
-        const img = try pixman.Image.createBitsNoClear(
-            format,
+        // Create the surface we'll be using.
+        const sfc = try z2d.Surface.initPixel(
+            .{ .alpha8 = .{ .a = 0 } },
+            alloc,
             @intCast(width),
             @intCast(height),
-            data.ptr,
-            stride,
         );
-        errdefer _ = img.unref();
 
-        return Canvas{
-            .image = img,
-            .data = data,
+        return .{
+            .sfc = sfc,
+            .alloc = alloc,
         };
     }
 
     pub fn deinit(self: *Canvas, alloc: Allocator) void {
-        alloc.free(self.data);
-        _ = self.image.unref();
+        _ = alloc;
+        self.sfc.deinit();
         self.* = undefined;
     }
 
@@ -344,8 +92,8 @@ const PixmanImpl = struct {
     pub fn writeAtlas(self: *Canvas, alloc: Allocator, atlas: *font.Atlas) !font.Atlas.Region {
         assert(atlas.format == .grayscale);
 
-        const width = @as(u32, @intCast(self.image.getWidth()));
-        const height = @as(u32, @intCast(self.image.getHeight()));
+        const width = @as(u32, @intCast(self.sfc.getWidth()));
+        const height = @as(u32, @intCast(self.sfc.getHeight()));
 
         // Allocate our texture atlas region
         const region = region: {
@@ -372,31 +120,7 @@ const PixmanImpl = struct {
         };
 
         if (region.width > 0 and region.height > 0) {
-            const depth = atlas.format.depth();
-
-            // Convert our []u32 to []u8 since we use 8bpp formats
-            const stride = self.image.getStride();
-            const data = @as([*]u8, @ptrCast(self.data.ptr))[0 .. self.data.len * 4];
-
-            // We can avoid a buffer copy if our atlas width and bitmap
-            // width match and the bitmap pitch is just the width (meaning
-            // the data is tightly packed).
-            const needs_copy = !(width * depth == stride);
-
-            // If we need to copy the data, we copy it into a temporary buffer.
-            const buffer = if (needs_copy) buffer: {
-                const temp = try alloc.alloc(u8, width * height * depth);
-                var dst_ptr = temp;
-                var src_ptr = data.ptr;
-                var i: usize = 0;
-                while (i < height) : (i += 1) {
-                    @memcpy(dst_ptr[0 .. width * depth], src_ptr[0 .. width * depth]);
-                    dst_ptr = dst_ptr[width * depth ..];
-                    src_ptr += @as(usize, @intCast(stride));
-                }
-                break :buffer temp;
-            } else data[0..(width * height * depth)];
-            defer if (buffer.ptr != data.ptr) alloc.free(buffer);
+            const buffer: []u8 = @ptrCast(self.sfc.image_surface_alpha8.buf);
 
             // Write the glyph information into the atlas
             assert(region.width == width);
@@ -409,102 +133,105 @@ const PixmanImpl = struct {
 
     /// Draw and fill a single pixel
     pub fn pixel(self: *Canvas, x: u32, y: u32, color: Color) void {
-        if (comptime std.debug.runtime_safety) {
-            assert(x < self.image.getWidth());
-            assert(y < self.image.getHeight());
-        }
-
-        const boxes = &[_]pixman.Box32{
-            .{
-                .x1 = @intCast(x),
-                .y1 = @intCast(y),
-                .x2 = @intCast(x + 1),
-                .y2 = @intCast(y + 1),
-            },
+        self.sfc.putPixel(
+            @intCast(x),
+            @intCast(y),
+            .{ .alpha8 = .{ .a = @intFromEnum(color) } },
+        ) catch {
+            // If we try to set out of range this will fail.
+            // We just silently ignore that.
         };
-
-        self.image.fillBoxes(.src, color.pixmanColor(), boxes) catch {};
     }
 
     /// Draw and fill a rectangle. This is the main primitive for drawing
     /// lines as well (which are just generally skinny rectangles...)
     pub fn rect(self: *Canvas, v: Rect, color: Color) void {
-        const boxes = &[_]pixman.Box32{
-            .{
-                .x1 = @intCast(v.x),
-                .y1 = @intCast(v.y),
-                .x2 = @intCast(v.x + @as(i32, @intCast(v.width))),
-                .y2 = @intCast(v.y + @as(i32, @intCast(v.height))),
+        const x0: usize = @intFromFloat(v.x);
+        const x1: usize = @intFromFloat(v.x + v.width);
+        const y0: usize = @intFromFloat(v.y);
+        const y1: usize = @intFromFloat(v.y + v.height);
+
+        for (y0..y1) |y| {
+            for (x0..x1) |x| {
+                self.pixel(
+                    @intCast(x),
+                    @intCast(y),
+                    color,
+                );
+            }
+        }
+    }
+
+    /// Draw and fill a quad.
+    pub fn quad(self: *Canvas, q: Quad, color: Color) !void {
+        var ctx: z2d.Context = .{
+            .surface = self.sfc,
+            .pattern = .{
+                .opaque_pattern = .{
+                    .pixel = .{ .alpha8 = .{ .a = @intFromEnum(color) } },
+                },
             },
         };
 
-        if (comptime std.debug.runtime_safety) {
-            assert(boxes[0].x1 >= 0);
-            assert(boxes[0].y1 >= 0);
-            assert(boxes[0].x2 <= @as(i32, @intCast(self.image.getWidth())));
-            assert(boxes[0].y2 <= @as(i32, @intCast(self.image.getHeight())));
-        }
+        var path = z2d.Path.init(self.alloc);
+        defer path.deinit();
 
-        self.image.fillBoxes(.src, color.pixmanColor(), boxes) catch {};
-    }
+        try path.moveTo(q.p0.x, q.p0.y);
+        try path.lineTo(q.p1.x, q.p1.y);
+        try path.lineTo(q.p2.x, q.p2.y);
+        try path.lineTo(q.p3.x, q.p3.y);
+        try path.close();
 
-    /// Draw and fill a trapezoid.
-    pub fn trapezoid(self: *Canvas, t: Trapezoid) void {
-        self.image.rasterizeTrapezoid(.{
-            .top = pixman.Fixed.init(t.top),
-            .bottom = pixman.Fixed.init(t.bottom),
-            .left = .{
-                .p1 = .{
-                    .x = pixman.Fixed.init(t.left.p1.x),
-                    .y = pixman.Fixed.init(t.left.p1.y),
-                },
-                .p2 = .{
-                    .x = pixman.Fixed.init(t.left.p2.x),
-                    .y = pixman.Fixed.init(t.left.p2.y),
-                },
-            },
-            .right = .{
-                .p1 = .{
-                    .x = pixman.Fixed.init(t.right.p1.x),
-                    .y = pixman.Fixed.init(t.right.p1.y),
-                },
-                .p2 = .{
-                    .x = pixman.Fixed.init(t.right.p2.x),
-                    .y = pixman.Fixed.init(t.right.p2.y),
-                },
-            },
-        }, 0, 0);
+        try ctx.fill(self.alloc, path);
     }
 
     /// Draw and fill a triangle.
-    pub fn triangle(self: *Canvas, t: Triangle, color: Color) void {
-        const tris = &[_]pixman.Triangle{
-            .{
-                .p1 = .{ .x = pixman.Fixed.init(t.p1.x), .y = pixman.Fixed.init(t.p1.y) },
-                .p2 = .{ .x = pixman.Fixed.init(t.p2.x), .y = pixman.Fixed.init(t.p2.y) },
-                .p3 = .{ .x = pixman.Fixed.init(t.p3.x), .y = pixman.Fixed.init(t.p3.y) },
+    pub fn triangle(self: *Canvas, t: Triangle, color: Color) !void {
+        var ctx: z2d.Context = .{
+            .surface = self.sfc,
+            .pattern = .{
+                .opaque_pattern = .{
+                    .pixel = .{ .alpha8 = .{ .a = @intFromEnum(color) } },
+                },
             },
         };
 
-        const src = pixman.Image.createSolidFill(color.pixmanColor()) catch return;
-        defer _ = src.unref();
-        self.image.compositeTriangles(.over, src, .a8, 0, 0, 0, 0, tris);
+        var path = z2d.Path.init(self.alloc);
+        defer path.deinit();
+
+        try path.moveTo(t.p0.x, t.p0.y);
+        try path.lineTo(t.p1.x, t.p1.y);
+        try path.lineTo(t.p2.x, t.p2.y);
+        try path.close();
+
+        try ctx.fill(self.alloc, path);
     }
 
-    /// Composite one image on another.
-    pub fn composite(self: *Canvas, op: CompositionOp, src: *const Canvas, dest: Rect) void {
-        self.image.composite(
-            op.pixmanOp(),
-            src.image,
-            null,
-            0,
-            0,
-            0,
-            0,
-            @intCast(dest.x),
-            @intCast(dest.y),
-            @intCast(dest.width),
-            @intCast(dest.height),
-        );
+    /// Stroke a line.
+    pub fn line(self: *Canvas, l: Line, thickness: f64, color: Color) !void {
+        var ctx: z2d.Context = .{
+            .surface = self.sfc,
+            .pattern = .{
+                .opaque_pattern = .{
+                    .pixel = .{ .alpha8 = .{ .a = @intFromEnum(color) } },
+                },
+            },
+            .line_width = thickness,
+            .line_cap_mode = .round,
+        };
+
+        var path = z2d.Path.init(self.alloc);
+        defer path.deinit();
+
+        try path.moveTo(l.p0.x, l.p0.y);
+        try path.lineTo(l.p1.x, l.p1.y);
+
+        try ctx.stroke(self.alloc, path);
+    }
+
+    pub fn invert(self: *Canvas) void {
+        for (std.mem.sliceAsBytes(self.sfc.image_surface_alpha8.buf)) |*v| {
+            v.* = 255 - v.*;
+        }
     }
 };
diff --git a/src/font/sprite/testdata/Box.ppm b/src/font/sprite/testdata/Box.ppm
index 676b07ebea775e35eb23bf1d5095343bb53d5300..c21952561269dd20ed23a27cc122b77191d3d959 100644
GIT binary patch
literal 1048593
zcmeFa+i@(nwk*o;{Udq6>49`k-2L73K=Od}K+A;f!CD@W9!MTEJ&@u!!0CbXbt29e
zFP@1UKvosHQQb|>i3&wmB1et~27rE%oci^@|LcGK{J;OdFaF`b{`Y_X_kZ2ZYj$9E
zV0K`3V0K`3V0K`$1MlyR<lS8j*-Hr54;8O>9((IVU+x5_dZjq!t2pJWIOVIjUSFkC
z+(?cQUsa3Zx^wKkU#=Cea}Rcx^U>pVPh@AI=u7Y3>fM*Oinn*(z2|$C`gYzfR|_Zh
zrR;xiK_5L{?>zR_iN5T;FK-oZ@4RyV4fO9S_3gc1t`$%8RrbHPo{t`{cOHA|L|^va
zm$!<ycV4;w2KslE`u5&0*NP|lD*NAC&qt5fJCD6}qAz>z%Ui|UJFnb-1O2;7eS7bh
zYsC|NmHqFnXYY8uc|W>N^kwgTd8>GP=au_!pnq4X@1yU(rTRo)W&fA_Yku!|b^Omd
z7Jb=!U*0O--g)Kz8|dFv>idTFrR;z2{KPs9#p})c(RHFPd+*Cz#oIft+%E(DyGnf@
zeg7@hC;BS;zvMspz7B{!@4YW?6>smna{mqV?<)26?ShY|5`C5Zzg6WQy}sVQh<WV2
zFK-oZ@4RyV4fO9S_4V$9kERoSmHod}-5<Ta-oA)=?7c5<6>smna{mqV?<)26?t_n}
z6MdEazg68Iy}sVQh<WV2FK-oZ@4RyV4fO9S_4V$9kERoSmHqFn`=iI}oyXof(U-mV
z<*nlFomcL^f&N{kzP<O$wc?4s%KrD(^U>q=&SP(#=*!;w@>cQo&MWudK>w~%-`@M>
zTJc0*W&eBY**jiu-jA*mec5|o-YVYSdFB2a=-*Z9`{?^`sXozH+5aW~n%_HK9sjeA
zMPK&bm$!<ycV4;w2KslE`abf$^bNIFsaEvWON&6EDBwq^gllKK5YD@c5D0$~@QLHK
z>Xt?Bu60GA?U^mQHjs*4^f`i?dn+t53d^!0K%5LryrQE(Ny+ipIr?Nq)~6QC<V6Y;
zVB#fql`}aIMomLZL{dbScqL#jhpGt_Um_D^TfrH%de3C5yVruoL_~tdA6yR9hOE}u
zdBhU%7y@Dx?KzxTr$qXku>0sKKm^{RxZ=@;e3Ez-d1femp-M@HD}&WIp-D*#Veu`&
zv$5DR!|MXB$Vv#ql>vOH7R4<DD$;;D;}+R^*xhs`P$_twyHa7PoKj0hysp>fd`UTD
zxoa+Wlg>t=m~??+SUiS+7)5*bb@>4^@TFv&d;{azXi38~X7zTw6xk<<mq}G~S>*1}
z8LkXqr=BWqAyAP9)ET$P7Qz-mNwCf>1S;;k)|bMTlGf92>WZKgyv_|KJbwbIEPdWe
z%^B^lD;=^ZXDs((r*n`=FUwfpqktcw6Rw@{LOAbkLYP6!BI602I=9N#bLqk~OcB{C
za(EM*imq@l3Q+XISg0af2wMcjNui=^1JN|#TJ)eyUlQwOMG(;vmqJ3G#zw9<leY*E
z2cRt_Yy;R6*i2Y*l({J`2j;v*m#yW_wt_Q)&J$*k6yOrbjx4V=t7A%&bv|asH@}Ng
zWPL(s_Dq_H@>QJjRh;rw+|B#ARX}I|hI$}!s9q^f<?ZEEJjJPaN~bvGJHn|Pz1-GP
z8++%P7@unndg~K9)&K4u?j7%%Jv%2?=Y03>>E7${8S5LVS@rLv<rnKd9P^|CHpIR4
z3EfCvm9OGduN1d2%iEhf`#01Bkwf)LaVl>wr{XD2#Zx-PDc=!J<>=*3+Dx5ma#GA&
zh{Zfq|GRs*cf4El<dmG9^WD3rd#}f5tZ$@d)xT4gy*GDvOtFuKYwSrb^7hV6*0*=O
zo;lo7(>dS0d#ZY`IF+}TQ}Gn1;whctl<x?qa`bX1Z6mSnDo=OJz2o&J7I{_w)!Y=P
zd=;mB71ukBo9R0HH`D`>L-k5=DsL~R;wetWQ#!>d-w{sb=;cn@L!E2V9dqw^y@^F$
z)&K4u?j5gp9=FnU&Uf#g?!6wLvA&U-RsT-fLSo&AV%{pA$UD+k<*PW=E5)6(CbtlG
z_HU>MB8Tdg;#A&VPQ_E4il=mnQ@$gd%F)Z6w0%0)q&w!V;)%Sf|J^;@J6?AKZzSxT
z@7_J#dp$m5eIqri{++ag#JUf~yj47rccib%S8=LWiaTjdZXxdM-%t-k4%I8gsl2_M
zil;agPw5n=d`CEyqnA5r`*f~Jcg$PG6M0qtyL-5IyzU0xNZ2{wy?eU%dVI$EMrv05
zJ81`rbsvg(t9T;sNMDt&;#98`chZ{NLfqNEp&p1Fs#l6rd3!k(PjM=q(kV{)j&LeR
zFL%=R>0FbOV%|b5=AruE-NU`(-J&O_<m{a9-aXxWJw9W7BQ>l3owDq`xlf9D3vuWE
zRsB+&>Wku(ui|del9O_F_HU>MB8Tdg;#A&VPQ_E4il=mnQ@$gd%F)Y>uDi-}Qp{V3
z#oki=S94RG@>QJjRopFFa#GID{tfj&<WRj*oXXqFsd$Q0@sv(+%6EiQIeNL#bys;#
zig^pM*juXqYHo^CzKT=6in~QiPRiNYzo8z89I97}Q+azi6;E+0p3*5!`HpZZM=y8M
zx=-=!!0f>6!0f>6!0f=c-2w6b>sIek)O(qcT9j{Z4yEhmR9?lYc)dF1tGMpXEA~NW
zJnt#IyEE@AY%5XqDNfBo=@fT6-_h>*+0@qnr}oSUCwJD)^{E`!s9ELMy_R}ym&T%}
zvcA^edkXJXcon9tMAfG_H4CLv-0gfvo9bs%Ujv-lGb5bbSv%LKa$KWkm1Fl>>a|@O
zi=N8*T7T~;yj$T_n6?sCpW@UklumKC^BrxfpG|!YaB9ztaB^quT%XEujha=C-D|1W
zc4;hnD(h?gy{GVQg;!zPN>qJ{Q?pPy#of+#w5fhJ^)<k$Ju|||owaj)D#tZyRylUB
zrC!^mvFNF+ul4ty!n+k-g=s5M^(juxLg^HDJKxc!`q|Xi0H^lM2q$;e&h@Dr*Qi<L
z*u9o|ZI{NPr?S4*-+K!0R(KVrtwhzQI5i8UQ{3%*N1N(rQ(ps|+A|}Z+*v!<r*d4Q
zW|d?2TI#i38jGIF`dWYQDZE?ZRhYICRiEP2ER;@hxAPrss-I1L4RC7DjBs*i?OdPA
zagCZ)j@@gi*LG<vdMfK{{k^B~ZiQE2+DcS?ic_;tI>p`2ceJT~HuW{YsXa5o$(^-x
zeJaN_YF0URuccnwrLpL#tgrR=p2E8oUWI8ZQS~WK%|huEcRSzFrux~`*8r#X%m^oU
z*3R{*9M`B><=DNJdTp1+qNlRH*57*y?^bvfrmaNPr#LkWrBmGPd`Fw=XH#DToZ2%Z
zoZMME*Qat^qh^(3_gd<;T^fs?%KBP=?<u@n;Z>Nn5>=n#)GU-vakujwZK|J5eGPDG
z&x~+#XYE{{%5jaFRgT?jsn>RCEP5*IYyG{a@NR`yVcJSmeTq}FP&&ok&Udt_em3<r
zz^Oem!pWVrbA2kuHELElcCV#g+oiGSsjRQ{_nyML6<&pDD^c|+PR&B;6n8t{(Wd&@
z)Ykx~_RI(;ch=7JsT|j+S>@QhmU?ZM#-gXPzSiG+3h!2U6{f93)u%W$3#C)s?R-a@
z>St451Dx73Bb?k>JJ+XjT%%@{WA|F>wcUTL&%^$C`XaV_L)E9a+nwQ&n)|x4tZH+s
zK6G<xPmgfwJdALv78OtFdN~zOaVlP~PWdWsdErmA_~7=+0hZ2ER%e~MXN|x6J(e$}
zEnHUldtoCgwro-LakjO*);Gsiry<))Mb<Z0SF#mUe00q@Hs{S;xo6+@UNjjM4d>2Q
z=CYD)1!t61VF}G3DZnL=9o?P#km(}lTC$b7tYllk8D*8NFS8<^cg$wmmxW8loFK@}
z0Nw!@6Acp7Js_QpX5?moc(p?u2eXF&TGz5vOO?Ynikg*Ni@uFjn4*eN^0+7z$|`@a
z?ZuWYsy@!f{mVZ2Zhdp?zWG|;7;(S6xt4OzHMjSo$%yAXx!eI>ozKqYCoQaW*cM?{
z9s#tw<y1!P^Ys(rJ>peRX8fJmvJT8<b;~KH#8~#bL|*j%jQQFwtk+$vWu_5DOU?Qo
zP43(7L0(|YGHivg>K*~a9pZ_MaQ7MvmO<Q2wY_x>Dbqt%w<>R8R(WM_tBfrBW~Ri*
zo~_DTm@PicF8jAC&*Uh}eoJ|6HS7Mxn)Bs*wB%8_XAgT{v$L(>jBK1d%2s#JRK6P%
z4HC>U+l8Vl_Ket8a7I~W<0aOMiXbAx<tPifWG<_Hf=>A=PSvM$;A)35tuWW1V@=zF
zuz4X#f$5e_ai8Z=z87I(+U`f+sOA1;pM1ADa_lzVT8tda-*W_i=MigKY`v_#*fMWx
zjx1+e%WHjeZ0}K`XExu9g>{*AcQ2&j=YXo@ojKJP#ZmW5??f%iS8*zy(v5JX&<(UT
z4_}16azO9ddKc}QF8W4o+@Kj}uXwjPa_qjmHd2n4V{irSEUa(wX_dv6eQeD+Z<%F5
zDjLr%$5vdnl^7~-$nK@NxKDFeCGX6sz9>%Ya$XU%RQW1S#Z$Tw4n=JN(NmRIgz=uO
zw`OW~e_v7HRj7Jqu_raV|9tBVI&Ww}=XzeHUEN9A`KjGD#Hqe0PV91-SNSSV#Z$Tw
zZlupDuZVZ6J+1SK0<S`~`&8AZIJJkAPI0}yp#`1md6jl`C-Jkcubny77saXbsyOAV
zxDj6!PjPkY7RREeo$*HY<^H~l6TGmY?Vp;P;(E{Uh;NvD=XzeaQ749P`>Ogi#Hqe0
zPMuf9DPP5n_^Nn{>+ZJbb7#DfeYwAH_k?#Aw*6CcQ=HnnN~gG9-!R$E_1w8JgW>$F
z>uYCD^+j>&yedxlDsIGA#Zz3}y2Y{RX=l8VeYwBy;sh^jX#1z;rnufSJmMQB-?^R_
zZq$k4+rFxP4RNY3ic{xRamrV5Bfcu0;<~#n`rH|BWMA&@+dbi(g>C=T+!UwwuF@&4
z*EdYIb3J!%%wRY_>-yT6Q+-jKI<JaTzKR?1Rq+&8w{CGPdfFLpWMA&@yEwrM8`}P<
zxhbyq43GGR$#<^jg&TEZ__nXAUqhVgi{jLIRh;rw+=#D=r?~EJi#~V88`+op`*u%w
zXJOkvH8;hny{mMJ>-7zj?Oe~D8#5Tr&$_;L=2Tx4r_QV5l&|7Od{sQf)va3`i=KAI
z8`+op`z}uK!iKhgYHo__J;NiuVe*~pdErK#7{2YR>empb`l2{>UKOW&6*uCm;wi4X
z+oI2%@kaLL{=VH4-dWi8Pt8qnYVRtY;(C3<WINY$=f(_%^Ruq6ojKJP#i{eEIOVIj
z5nmNgadqn!$D*g5@kaLL{=SP7ys)9|pPHNEde88PZ<u`NdS19uCx&nPs`@p=slF&q
zoma&vU&W31s(6Y!vD@d!NA~6ZzURzf51)3Qs<|mn?Oml)T(57K;f?mkSa<!b>uYCD
z^+j>&yedxlDsIGA#Zz3}x(kd)_T~P*7nopI$aYVvxhbyq43GGRDcop(bhU3$)b>^N
zYlu^QQJgxjic`Ld8}U`~6t~#d{R|`fa)00b=5<=6cAu)bDNgNOrBht5Z<x!C_Qz>`
ztkr(j^|dpn`l2{>UKOW&6*uCm;wi3f-37)Y`*MHZ3rw&pWV<KT+!WV)hDUt6rZ7db
z1G59O1G59O1G59)Ne6EAzNNRlZGGzfQi=BHJ}<mKmT^Wk=XGnB%DblDu<icdyt6)&
z*AMKqQT3J0wl+WVYi%!IQp*?)RYL~`I${!M7|&G%4=M0NbKTwjPjlVfy*3wWK{S<9
zaa;;x1Y&XnxA8qwvx;j~k?Hf@C=2E~nl~seJG}Yo-}0#3YtHM|uC*L%ehW6gGz+Hl
z=+1B^FZKt(dATp;g|YzrTHDL#ZW+U&Jm|ncM@-@j<GG69Aq8%Z|HC~0+^X2g)oxmO
zHBz36OKy+;oWIpC_Rd&wL06?R!<8IH*E#Szrc>G{>kMb|V%q}r<+_#%WdY#45>s=r
zItZvS;AjGgBPPW&Ox(2<RfZIJp8wL8*fm;!VDjUdW1n@bJQbIsgk!D_cg0mt2W<^2
zIa={H<9E!q>I6k+IFlFK7T|=0SCFDCfOIx7j<cqrgRwB;P$@Vth(}D~470IgDeyf1
z+&I4)c||g>xNO_L*U`L(;$oz9%GK#MT=i@q%2>`J#M_kLG25yW6rJHrUTj-{6B6z$
zMF{}u9AfN2%lR^lWS)Pl4Eh3Q8?^f<cHMd^PMt%gQ=E;yHuB!}rMRd`hg=<R!<i?8
zI(!%}=aBL(GH&KA)d}{_`b=Kz4}cRA?kq(K0QI@{hL-bXz$JiK7;&f+92oc#lQ_e8
zt|E9yfk$5dIph*vt)p_HvMS5+u#F$PZaoyIdZcuUv(eW^-n+gO7c@BI>U<LiT^+fM
z7jqQJDabF)Df&%qXE>7=`vYJz&BX*L0RU$cjGrS70`vu0E|y3fY6b@ezQiQXFrKRj
z9#Y_W{%vF0&c#(^)hSr5gW^I=9C3BPiGyyrlEoa$E^8LQW7g~^+d9LUyx6t?C#T)b
z0VM$7Y=ZG~q(Okb0L#Uah(nFwz`&Q7#2LnO6~RLaJkP&vOxwA*imW;Xt94MEkGXNe
zaq3C8Sjtk4MTd2R-!bco6Rn-$OkQkTfD@DMC`SnZIGbSn9BB}sFTrxLDB@5hI56-f
zCUJ)GTt)DZ0?+er8`E|!t|F^W!D<~8=VRi4o2LR~!?7nFj5S!wQR_O#e#dl5`(&Nr
zOkQkTfWBO#Dx?FzIRxY9LxTic&evjfu{z>VE;umoB_?r(@mxjlkOI&1ZyVEgF0L%A
zPQhv&6z5{%eEaz7j3>_MmTFnZu_WK({f^nvoMi6|XYyj(0-TicY%$US;2eVS^Pxe4
zE$54{x>y!*s1qC*_!5&i!+5SDcu0Zg`L~T}I~P}$RVUz|>fhV*zHje8sSxJKt3#eR
zqf>!Ax4QmIPRqRCG0WC7ES=#@UTj-{GZLTSf^-1s%wg<7ALr8>x71eRP$4)l@Fga3
zhVfiQ@Q?z}^KTo|b}p_gt4_ebydPWj@%@*Uv@icFZvfOYLmO^&{ag7K<9E!K<|KP(
zIFlFK7T~0mXN!>z0Ot@)+$A&&u+{k@tSXj89O?uI2EN23&M=;<2p&@4dH!u<+Rnw5
zWz`Az`Te(6{oDI<OV;oB)iDoT*USI<`itxvl;1HMlGDtc;Y?m^TY%GY_NWmW0A~(N
z+z~Vkuutb}u%59h;!q|yFz_WNafb0+MevXU&+~5^({?VdHmmya{on8Rt@{4`@0M-O
zyE^BNYjkaV&#bP$c3ETnj#;yxZ0ihX@?zTpoSb&k0iglX`NG(P^EqFD36<JP9BKpy
z2EN23&M=;<2p&@4&v5=xE&%!frp!04PM&TS6pw<k?G)b}x9fiY=lkzU`1|{x_ch-f
zcXiMm*XZg%aa`u4?I*InRVTWv%R9rFyk)&ITF-HgHAgOYy<dtwqduLlDSO0wtdcmC
z4Gs)^iAkJcJXaAsq`>q1`)1_kc*LXFD0CgVtY5qT^vm<#?|=TFBtQK5{`d1QKe0)i
zc6HL<-qjb194O&33t?QK;%xn6BUmJ(Pop<P)YuT5L)lXR<5YARnKZi$a^!L^tG@^5
zi!5<Gie(drd4K~0Ut$tx7|&G%4=M0xIR9Mw8pgB$*3`B~Z~<8N_162}j&Hut?o1wk
zySMrn=68Ag@9uthj`rds4!d|ta9<MA-v-AR{;|Z#NORdEtCo6D><P&vIU40Mx?;}=
zarf3C@F>WYmb>p!eVxzw`fRjVF>#mzI56-fCUJ)GTt)DZ0?+dw8-tbraKjY-;rA%b
zYw~Q)x_VCFDEYY~aq6ts-w1l?<M(|2xqo|LEHyx0OArq1%lW!FdOoMbVQS#Oz?YcB
z8OC!J!9xmsjq^|GxQd8ZjaGu$FRV!$ym%nZs&8EF94GsfDB0rf<~{t%dwp_Qf#T)A
z`h}qA{N|olR>GWa;(e0m%iLN-q4!kdvZWJt#1P?N8wDkAMrEZl<MS7s(fOROIY){2
zI1A!1EpTAqOHASn<GG69AqAf2-!~&SC$4rG#YUm)&^bl@9p2J+eL2|bUB3K_-vMws
z`Ao4IeAQUyzAEcAax>R_dUfmjcr8PREv_y<mX$8-zML<s#P%o_N*pQ&2L`^xB+f9N
zs|X%a;LmXWW9jy6@^sF{ZI4tA#ktS^&Cl@k&-(gmTR7Cs(}L4ATwjJIRojClWf$*v
zR*$e(x4w^iGOcKE3;=Mxz{J(3VSwlBd^t8+ERZ-<3=Rx@iAkJcJXaAsq`>q1+s3q=
zi#vc-9*f`Mk-zIx`^oX*nXaA}g0A9Zz<zRdpY(^xO24x*Uc2||*7xyRhRTL54}h}-
zCayFM1Kd~V%d#0_p~RtbaA4p|OyUgVxr*Q+1)k^MHm2=d+!d_ySo{vJ{9WJLPmULl
zboIm#bQLE9_LHmoq(4kn`kj^W+Pzn|zK_>3R5omR0GuT-aiwV(;J!Lvmdy|gB@UH?
z0|Q@T5@#6CRRj+y@I3#vF>UAKu3(kN;%|85-}=;kYP@oy%`-#N8BWDJg<IVxrv!>x
zzq5M0dUfmjxF=K6V9NvGEP;tDO~U~9)%kL4hFBnRsbbQB0Y^;Y4CA?q;31OT-OclF
z8`E|!?h00Utop0C+E0yF4zzh{NIJu*c&BjZ*1V9z@2mxrp?h_*&n8mm09zg)og0i}
zNoE)Z`|5nTHb<NQahMS}Fz_WNafb0+MevXU&+~5^({|2xvh8NeC>6(E#H;6-o*SSe
zZZT-DLCvGrSLpoC5_CiL>O|+8SX~6RJb+)X`no*li?vx}`NUy9;K0C_n8X>za}~iu
z3OvuhZA{xa-^sR{Eu&N%dl9c5XB>L^X&`ZnL3<5q9=*Oo=XaK%8>&|)I^V=<2yAZv
zzc%%C8_pMND~aV3hxvd517BhiXBf{_1P>|jJpZ;aZRdO^+itduQgQ4hym6ZG>_9qj
zOELR-HIIJpM(KA}Lkgl-Cps^pRTQ>2fM1*Xx((+`wUxx;iNkEbfq^eEi8GAnDuRa;
zc%FaTn6`7ilWjL!MyWXV65cq>IQFCiw-mFVSM%uiZj^p!HKZVVb)xekT18=d1NgP6
zuiJ3GR9i_bo;b_~92oc#lQ_e8t|E9yf#><RjcGgQJK1)#Wt56zFW}9yoX)+`(R0B<
z*j`%Aqt{pH{LT_|L-p!J=bKm!f$a_8*QUO1!}(%uC9!<sFduMW;7d&64CA?q;2{N`
z=ifG_?VRsq+s&3yDvrH?H;-~U_(o@*3>E_S(P|!jo>Jv^mNZmkuTFH{M#5GG@as@t
zABXeR+v?(ch{FWIfq^eEi8GAnDuRa;c%FaTn6`7ilWjL!MyWWq#vLa)ocu~>o(**5
zK32`6&r_=W&XR_T?A3|R+ep~T0Dc|n>*H{~dRtwb4{?|vI56-fCUJ)GTt)DZ0?+er
z8`E~qce3qf%P1Ac*0|#!hofKVs;2`TyN6Zt=<!?7_?@+2GIXy_bl$|N&Qc8PAMNGW
zroL{&`RZ*YaX!Rhg5bcwmzcyE#&Z?HLkc|4zimw0Ip4{)n=PYM99!d_Kj(e(6}(x0
z&+T!kc?`t$JFCaNS0_4e!(%H0_;skSkHh)uZFO-z#9@Nqz`&Q7#2LnO6~RLaJkP&v
zOxro%$+nv<qf{JQWBTWOw7{{CzJf<<_S_zqn#Vv~zq5MWdv&7oHaxa6fM19D`Z%1g
z-c}dqLmVau4h(#WNt|IkR}nm<z@O**^8_ZI9he=M9he=M9he>X);mCNS1{F8j*Nf+
z2!S_K=Dkv{j>1ehJ1{$NxeoLmN$3FRh!clOM>qtTV0K`3;F29U`IwReAOKZ5!cmy`
zEMtg;fls(#GW0Gw3IYf~>4*aaxS&plCbosDtX8$tdQdVz0Ln@nAixE6IyA8@RAsfQ
zkJdk5b-1Y1O!0972XL*|f1i+wTZC8DQ8GXPDv>xqfD7t$XkuHa%4$^~ZU3eUsdzvD
zngwxy0Hh<hVTSUmI!XozfRfB&!3^Maeh{w8X^~5Ee<lF|dV_4}$ZV!y_<G=rs5v#>
zMMpsZ0VpeRfB+ZN>CnWsP?goHc3KZg1_(e|i30?<piYM-wuP#!R`t>P=c^7Em6|C&
zPT&CM`HwC04G9RKYGoS23~)i64oz$eRavd-qwU{RAr%h@K(inY5P)<9H_T98RY%DH
z0Z@`zESLej&JV&>IW2NYe!bU!Dr~~pf!TpwJ3z-7aaTDq0s<h!8BZ+aJK^lW?7&qz
zaPrY52S5NSd4!`d6V49I4qUDSCm&OC00f{qM>q;I;q1Wdz~wq{@-Za`Kme+9grhJM
z&JN5DT&@ErA5(Gw1fV)cI0~bo4_;doS8JBW%2#nq{xz?B6<7N$jg_zB;IE9t0Rqrh
z#l(HL@i1`>dRwHuTlRMGeCBXbQJLZ+1`dF};v)`WMmPjn0zP<cQ(UcC8Y^GLE&12H
z@>N{zw=`D1ii5v05(fw{&p&$C0jO<ID-OmDpnMg#5e8=Et2jEt@ZI2im~fFUQ+y^z
zII2ZMAH23GuGTD#m9OHK{A*tMDz5fh8Y^GL!Cx7P0|c1oAHC}U)HbLU2jd1%zKYuj
z1GDl~9GzkKZg4(KxJZ{NK9eIH)uN#fURxAbYnH~!S8+@JHLrXXSNkoEm9OI9uZ+Y2
z0?hM|-gN+K8`O$}aRVq{#chOvS@|lC&M<s8I3Fflq{|ea$q|lfnQ(SscHnXyIQf{8
z10Vp^Il@tx31<gp2QJrvlaDDm00K~*BOHa9aCTsJ;Bp<nF$JJp#1SMxf~!XQoIsv%
z0U+aqvjejOSLpzb9{{B#jvxsVTs6+~-!^PAvjZQa130<>R2OjsNs!>G@s{V`7gm=$
z!qu@x$0M~U-_d$%f0bi|tK*N3M`}^Ns7D#k;NSufPeaIa2Df*8bT)l8;X)2F8c2c!
zvuktKqFtH60kmPq86UikGQ!odM#m$yDBsa~YJZhugsbC^jz?-yzNkkT&*0z!FwcMN
zv2TdI(EA;M&rpXO;p$kU<B?jF?`S=>zsfPf)$vEiBef`B)T4}NaBu;b=Rfw?H^g4(
z{f@wAsKbqLb*$0xNG-~Dw4T~u<rv}W_@m>IT9hy9QN}YkxB$%aAA9T@VlVW5g4u!D
zfh%>uo<TpN#4vu=&;K-+31<gp2bvDxhy##>ID#ZdaMh@vJKzG)hMmmp!0f<fJAmT{
zK>3IxNP+}cjr07s4V%pDz{ltSjxGSzMI1pAB)Dpv=f7>(WM&6GMh9?o0jMtG2$CSd
zRpUJWZNnxrJMb|&fTIgQbrDC91PQJh=lO3NHksLhkI?}fT>z?!ID#ZdaMd`^f7`Ih
z%np2v4&dklP+i0kBte3!#(DnRhD~O6;A3<EM;CzVB90&l5?nRT^WQdXGP468qXRg)
z08|%o1WAzKs&StGwqcW*9rzd>z|jSux`-o4f&^EM^Zd6Bo6PLM?7-~6?7-~6?7-~6
z?7-~6?7-~6?7-~6?7-~6?7-~6)jQz7#Y;&60jL_{00BrxaKjAcRdtjM5CA2`uP@UO
zW`GOobZBB*sLE<pW=hHoZZ#em0Ra%gB!d~?f;t_V*cPg?T9uiSGJ{)<M@B#ZgfPiq
z1{l+k_x^b(xGI;-fLGN~GC%;7w0~bH2Ot3DCAeXR@~S#Y1_*$XX8))a22!s80Z2(4
zAi$W8y!T%x3a-i}GvHNqlnf96CGFoA$^i&Kc?oWqp}eY&k^ussq}e}eg@M#7Kmbw_
z2M92xBk%p!iGr(g$qaZ^9VG(<KuP=eg>nD_P+o!?W+<<!qhx>pC~5YOT45md3J`#l
z!~p_~>BxJ+*@4-C*@4-C*@4-C*@4-C*@4-C*@4-C*@4-C*@4-C*@4-C@3;dSUpcm&
z*>HdvfW9gw4uLl~m;pX{JWN~znkEE_gK+~WU&U>Nfm!(~4h|bY`6>=!l#w_<fKR?(
zXdbEuFato<1(ciO5KI|~0|fZ~`j4ptpnID@xhQTkP!cF##eJH1s(9i60lvTe*SnTp
zM{R<e;#Q6100=-ii2H8iVd5I}wn%%o>^B#0V?r<ke1H3|-37}GwFx4{ts2Pz5a9dk
zKc)_V?rj3)qPWdKNuYcc_i5s(;)w$U`2O}^?^;gX?7-~6?7-~6?7-~6?7-~6?7-~6
z?7-~6?7-~6?7-~6?7-~6=j;H!{shq9gTf4;b?5@=MmU54(0O47(7KUYlrPc&=)5ok
zXdQCw1SlOMj&KOF6BwyQ`R-H}!l)c090KnIMr(mTfZmrd185zlMY<6VVE}Yqm;tnI
zq!#6ibO1Up%m7-496JF@hlnE_g6srFYEiyBm4z@W#|VeOJAu(!;18hpCCmU?hiQ>+
zghLnrofl>RtsALD`63;F&I>bu)*;7EfYKr22!|j$fstC2?@nbQjLI>>A@ELMv=;aS
z=zR$@fYxDJq#NN720-V989?hsYEiyO2cYx944`$$u@j(lh&aL_$WCCS7UjEBSqP(Y
zjBp6N6ZlBA>@=krW_Dn9V0K`3V0K`3V0K`3V0K`3V0K`3V0K`3V0K`3V0K`3V0Pe}
z?7+zPxzm?^rb4@8_C@!FS~TGY>A>8pdausD?BACUvgKzP)PcFr_uuEUr@E)K?I!#n
z9hiGn@739t{rmDkw)`w(c3^g3c3^g3c3^g3c3^g3c3^g3c3^g3c3^g3c3^g3c3^g3
zc3^g3c3^g3c3^g3c3^g3rw-ufssK9hh8a#*)lo7)0F-2Ya0)ZP1$8<!u`N_(wJI|u
zWd^qzkBoo-2w{@J3~)i64oz$eRavddOi7u+t;QoGAOJ#`WH19<P^Uu^OQorLNV#E#
z@~S#Y1_*$XXp#i4q0=Q0+d@@VtD<=*BXO(o$Os6agTM@ML7fgwYztLct!nl!_XPa{
z$QFCwFhhJ*9VG(<KuKn?VFtLMPKPG8g{rJpWu~Oe;8x?25fA_&Ofr}OE~wL?iEW`O
zt5umPDKog$cw__wKnRlzW`GOobZBC!G*u5NH_T98RY%DH0Z<Z6lHfITx&&fdsLE<p
zG!JDYZZ#em0ReOnm;o-R)1irNp(?9Y&HkyLOm22yc3^g3c3^g3c3^g3c3^g3c3^g3
zc3^g3c3^g3c3^g3cHrCU0KP8;=%1f*QCx3C<*T?)6HgUS93TLFFHIcgyTQQ>@X6z0
z;u_F2Ay6EQ8$kIgZX*oL%2#o4*Z|5`aR{S~6^9`Jyf=YzQQT&rBv8JJ`!w-X@x%cF
zeDd>3^S~Mr2MDk+Uj)&B!a$%n7&n0ORoq4xn3b>M;IILdui_9!87mG$0C;Z#<)XOF
zKuMr{758c4sp5$P1lTwaq(cvg0|fZw@i0jZXqpfx4#o|jd=<A524>}}I5=zo<*PV^
zQO1hH5CGnrK)EPxGf)yJU&Vczc&d2f00BPvd8K(^jfevT*qASZXh2~gP#laKK=~?e
zBMi*SS8;II0LoW!2&0UOgTn-~1G59O1G59O1G59O1G59O1G59O1G59O1G59O1G59O
z1G59)QU_MQ4phc7IJf}l?|oqg&^q*=bR!(X0O-6h18ChyEy@?^0CZlM0klr#7~zl(
zK<9-SK<iM;PJq%O;s}QzJAsi}l<z5Jt@<nD85~>y^t!_gpmmrZ=|(t&0nmA22GF{Z
zT9hx+0qDFi18AMfF~T7ofX)jufYzawodBgn#1Rfbb^;@{DBn}cTJ=}PGdQ>a=yit~
zK<h9+(v5Hk1EBN544`!*wJ2Yt1JHS42GBZ{V}wIG0G$_R0IfqUI{`|Eh$9?=>;y(?
zQNE{?wd${oXK-)<(CZE}fYxDtq#NN720-V989?hsYEiyO2cYx944`!?#|VdX06H(s
z09uDyb^??R5l1)#*$Ir)qI`EMYlfK}m>rlMm>rlMm>rlMm>rlMm>rlMm>rlMm>rlM
zm>rlMm>u}!9Z=sNPL95pruChDxlUiu^4ai<H+;7E;w=u_dG@)#&-47F1N}`qbAz4f
zHurQ|qS?Rww<vc1Z0`Qet$JDw=eeHe`sT;)<^_CjhXM_sV0K`3V0K`3V0K`3V0K`3
zV0K`3V0K`3V0K`3V0K`3V0K`3V0K`3V0K`3V0K`3V0K`3;9Ki}`thfl<1RW10ti4^
zi30?<piYM-wuP#!R;7Qn<76Er0|cOG!~p_aP^Uu^+d@@VtD5~&J@H&`|3>|L^Q`K~
z2nawG5(fxyL7fgwYztLct*U>WR^yQo5P-502MBOMoeoWG3sqUIYW7d{#B;s<8};wa
zv#KK_AOKZJ93a32bviV$EmUQ-s{VCajYmd60Ln@nAixE6IyA8@RAsfQ*+112&-M0i
z)W0{+s*a3+08}AyfB+ZN>CnWsP?goH`qybS9vJ}vC@XP*02kEh(8RV-mDQ?d|5Q&r
z*W14(|H;n|%nr;B%nr;B%nr;B%nr;B%nr;B%nr;B%nr;B%nr;B%np1T9l&>m0QCJK
zahUA}2Q$DYkB5nCK+}XkaWHNG<*T@jFfc1$#lc|%C||`Pj4~1j2=K}G3(Z6I0A_%V
z`67r06b1st!B_!k0XFI(9YPTY2=K|{VUilqG$BwNj2l4tDsCeT%*t1BaM%FKS8)iV
zjKl!~eDeK5^H4p28DL|+2%-Unfk1IERsdRnje1CjP{aWOeDZjhqy{ui2owk722j3=
z+Xw@*@>LuhHh}U~9Kt9gaex4ye812<R1aVV*qASZXh2~gP#laEfEHk*9?~Hcaex4y
zJRT;g0ZkJE#lg4%l&|78!oaM26$gh6pnMgFFv>_AAiyWzFEkI;1DF9e=8GU2P#6dl
z2jc{@1G59O1G59O1G59O1G59O1G59O1G59O1G59O1G59O1G59O162pqcZwri6@GX)
zQj79MLCSar2NwYSy)VoFS~u&{eL>p*wC>aN4~32aYR6fDl>Z2KRuL;sBef`Bq*BH+
zIJf}leF-yw*3J5KU(hxHt@||nL!o1U+HqDO<v+rmRm6(ZNG-}2sg&^y4lV$CU&0Ka
zb+bO*7qksP>po5YQ0N$-cAOPR`HygC6|v$pQj79MDrG!_gA0J(moNip-K<ad1#JV+
zx=+(T6gmc|9b<u}_w2yz!0f>6!0f>6!0f>6!0f>6!0f>6!0f>6!0f>6!0f>6z~}3L
z`hswB^yM)v>Fmqwi{B0x+y@8^K<RekXh&0UVkr;U$<1E*%`o@DMSFnu0#Lf0IGpLZ
zp0O3wSM+9bwDHiD$zAx&VA}yu%Xi{vH7Pi;ln3nOX0I;1_vc=tEk@@J5MWG)K2I<^
zFgq|iFgq|iFgq|iFgq|iFgq|iFgq|iFgq|iFgq|iFgq|iFgq|iFgq|iFgq|iFgx%~
zbbx-8xH_Lz9T@=uC@gV+02kEh(8RV-mDQ@~=da30+-f{B0s`nDFaumrr$ZClLRD6)
zn*CEf@!Ylgw@V%h0ti4eBn}YZf;t_V*cPg?TGdYLM9BaFC@XP*02kEh(8RV-mDQ?d
z|5Q&rcdh>Il81r-0?-VJ0|dCBPKPG8g{rJpwbMFLGC%;zN*o}-1$8<!u`N_(wW`@a
z)f3NMtAD%Xp&)<&G(+M50WPT1p^0swDyvoPv`&-^5P-502MBOMoeoWG3sqUIYW7d{
z#B*2e-%=5ipB<PTm>rlMm>rlMm>rlMm>rlMm>rlMm>rlMm>rlMm>rlM_~tsGzI#+$
z@5Gg_;w~0XB_j?HfWChv4)twtFavz@c$l~bG))K;2jd1%zKYuj1GDl~92_=)@>Lwd
zC?j!z09W2GC^P~p0~W=>xB--};x@v-tb7&M>i}PXEBBxBs+wU2xN?0cN&_kj0>#0&
z0hF)eHp0NHd=&?W4WN7#hcL=W93a4L*O$szodS#-;O6l*3V<2l%KfLjXeDug09URL
zMQK1~L7+GoH-Pe0+(sCfm9OI9umO~>;t)m|i30?<?fOy~t5bk+1Kd2`MgcGbT)F?0
z7p)`?5a7!7p(qWgEC>__;|5T^irWYSv+`9O95#URRUE=7BXNKLA8UOvj|pZ6W(Q^m
zW(Q^mW(Q^mW(Q^mW(Q^mW(Q^mW(Q^mW(Q^mW(Pi12k0wD)ZNR$VJFa=w^z4QSqRgc
zw^xV2JAvN3y*iZD>#(7N89;x}4Ksk&&H8j-&^7?A^Zg^q$?b&0PGF=K<-1c^2%~b0
za0t8;7^y}1qAX=RgM$kI?Y}d)S>Jg7sO=}W6An9pky@1RPGupC$}z$r@J?W)7Uhew
zl<^D>E&${EH`GY+MruYFfN#G(n(E1|gTqc>q!#77Q&|Y3a*S{Yyb~CyMfsvEWjup}
z3&6L3zCsUaz9T(F7=WGnhoUB!9he=M9he=M9he=M9he=M9he=M9he=M9he=M9he=M
z9he>XmO6m15&_1(Q$!em8PD$*v;$yrvoG<P1X}{&I(uXG|F-?dasymv-H{Gp=KUVe
z!uL35Y<YmW_vb$NK3@msx__VRj;B6=`5gE?o`tyk=i}_a?7-~6?7-~6?7-~6?7-~6
z?7-~6?7-~6?7-~6?7-~6?7-~6Ejr-8pbnQxo^9{(+iMIL9Y{ChnP%AD`H^Z6ad4n8
z#E}wk)l5b}0E94L)l5b}022m-RWlg@0T9A~RWlg@0ZbSSlo=cV<OXb*jrJoAR_i7s
zAb_65s+o*{05hKH0QChBz$h_THIoq#03i%mHIoq#z=XkI)l5b}0E94L)l5b}022lS
zWd;WTxd9tyul17T0|-FDhyw)JTjv9Zivj~?+kE|Zoj_L%pcWZE49Exw(8>)%IzRxU
z#9-A-MnC|BFksb8MnC`)27^^I836$h!hlsX836%I7z~sd90247Y?!^)OOg*D00kos
z5MXbe4;(HE447^6{JTz|D+W-D3?Bw$1O#a1h9MmwfKg(wY9=Ef074kBY9=EffC+=a
zs+o*{00?2gs+o*{0459u$_x$wasxKZUh5^v2M~aQ5eEpcx6TI+7X=2)wt4<tC(soG
zs6~bk12O^vv~t6c4iLa7F<3Q|5fA_&3|KXj5fH$H!C=))MnC|BFksb8MnC`)1_NaV
z2LQPN8)mQdlH>yjK*5Lu1lU{W1BZ(O17_Pi|E?40iUHIj!-oMG0RdXMVMqrEV3ZiF
zn#l+VfDi_(n#l+VV8URqY9=Ef074kBY9=EffC+<vGJ^wv+<*<U(SD@CYTaZ61kkfs
zHIoq#V8$~YpuPYC7$pX)W-<Z-AcO&{W-<Z-m@pWun#l+VfDi_(n#l+VV8URa%-{eZ
zH(<kTv>$1(S~nR10rV_Z&13`wnDI;ps4sv3Mv1|ynT&t{2w}jgnT&t{CJY9vW-<Z-
zAcO&{W-<Z-m@pV9GdKXq4cIUn?ME7{)=fr006mLUGZ_H^W<1jY>I)!%QDU%aCL<sK
zLKv`WCL<t#34_6^nT&t{2w}jgnT&t{CJYA33=RNt12)V?`;i8#b(0YgK+j^;Oh!O}
z8P9Zp`T_`Glo+g<$p{F55C*K8$p{Ew!eFp!CL<sKLKv`WCL<t#34?($g9CuvfDN<J
zex$)_-DCs=(6d-IlMxVL#xotDz5oIkB?hZzG6Di1gaNB&G6DjaFc_?w$p{F55C*K8
z$p{Ew!eF4x-~b>uV8iUSUSfPK2QvV<5Qh*O9LxYJo;QXIVF_g0{Qlo{g8D*`T10Gu
zWCR3g<%S_0Ab?R~uxchFAOJ!buxchFAb<&j!K#^zfB*<#z^a*yfB+^82FeT$0CEF1
z%wFpy#>a9n1CR@G2(iJz46qRofqQ`u94>Om6wkDmLzrHN4IRt?u>%+e(9fa1odD{f
zc)c84dL1@&Faz}W1ug)nZa1Jpk8mjLj396U(7i&A2)w$Y_&EV)fEYEn5Qh*O9LxY4
z@esHd_`u;JhfMKIdpU&Zb=c6s44~HuW&o{2eLDe4hlsu0$qpNVVFu7WfEhsRP~T2~
z(jnpqHxdMS0lIqN)kvEFaex3ZYETk~5E~rK02}cTxEJ`q;Ub4j@l1O;gz0tI(7_C#
z*9m3-twVh~0ZNC6z1+zT8-Za4&^>?|K<iN7PJq%O;s`eq1bG3j(*xA}K>$r36^(Ga
z1fc=|0rWb-44`$WZzn+M5V4m#*<m9v%mBIvFau~E>e~rWIz$}dMuH$Oz;$|nnm-7j
z>7$|%ZkHfb03d)~Czt`W4)yH>C><j9awj`%1cn(v_W))9twVh~0ZNC6Biu+3<OR4+
z4^Z<50W^J7G{Wr?gbDxz(CY*<fYzbDodBgn#9r=XhmF861Lz*W44`$WZzn+M5OIVX
z34*);*XaRj{vd#+kBUaPU4l>nfB<@(U<S}S)VC9$bcoo?o$Rm?7-j(71DFA{4)yH>
zC><h>a3evG7vMTQK+PWn(DYH!2)9cRDgY2buM^AwT8H{}0+bFBd%2SxHUh&8pnCu_
zfYzbDodBgn#1U>J2=W4S^}wr<Hi0uZxB$fLA>tWaT~hzpCkq$3WQu3n%OOm!!-fuK
z0KHBy185!U+X+xQMC|2GcGw6EGl1>^%m7-4`gQ`84iQJVks!zm(A5L4M%o0<;NSuf
zvxkUhaCJ%jW1lQs<dP|#X)lK`y$%~Xm;v-U!3>~vsBb4g=@7A(JK13)Fw6kD2QUL@
z9qQW&P&z~$;YNZWFF;ohyc%f}ID>-=K+GN@p25{6^^bkBaFI)<c&5D^!t^?9=wJrW
z>jX1^)}g+g0Hs63UhZUvjleJi=pMifpmnHkCqU^CafBNQg1i7-J@9IzP2da;E&wrm
zh<FB9m()M@$-+f0nc|uDatPDwu%Uw)K(7<b09uFob^??R5qr6l9X0~P44``eGl15i
zzMTN2L&OnoBna{XT&D-9d4ijCfR-EiCr5i9T_G?VM9rA+Gk0KaWqsCFU$D-~_`Pvx
z#sr_a1K-=W9hooIc_hfU;)`ZX@R>XCt!&3@%meFujbtA^2bwX#XYRm9-)0}FKCJUc
zD(WV+_@tp50P$1QN8zwi6Kr(gCfmo=^cnM_m7lTrU2FEiLN@^B`N!_~RAAR_;b#6V
z$HT&Y%aiPyn-_&{0L=4`-SMeF*K)fVO&u-kci#*gdw_ZVX@7ho_}y>i6Q0|!tg~ro
z#RQ+b19L0)zqi3k&!(Xj6MXIt%&k1nzxfsZ-VO1&xBaX>b)a{-r;fJFYz8%B!r6h@
zf!TrCf!TrCf$ypV^gaj>KzCx*Oh!O}Z#$lz-TCbOitE!#Pk;QQIR3n%yL&3SFKfCY
zyBu<pMF~{bA3wG!q5HYm$OR;HUqDF!9RSsG2KN;{8I*X>R6(}p``^D5-JdJEcNZl4
zyshcpi!STU_XOS-27<&<c)(RN836$h!hlsX836%I7z|d;WCR322m@Blt40&%<iX>z
z$It&P=l%5`x9<7+&$90B=YO*5?(S(tS5y~7Zn7xhpK`F@|M|5^3f+H-iCjQJ_XJ7;
z=m4meGq@M{WPo^o{y-IEYr6mLZJGORS@-UPWZm<M&Y$(HD|d465C($8Rq8NYHIoq#
zKt-dx%1pk10LtNZ@pLukv%I!8%8!4c1#XSa_2VDTRG5|L2^Xa3UJ$TwS4D2JDB=CD
zA9HTUHE#i+`zNcMZ*c($-3urQpabAqGTF~PWf$DD@83Uh0Wf>uU;N#bId{ZyWiH)A
zkjy<oReZB;o_}gihG>rNSC;XeT$qk{)!52UVcr(S<+Soy_j32*?=Cq0it-AVjV?%r
zcXwYQEZkL*n=DFre}BojJy*X4D7y0AfLyX`iT4$h1SMWZrj|tZb6;{8+_Uea$q%yw
z;l<xs6Zbm{S?6#FlDT^lX0F^3uTbyg;2|soiL2CMxN0UNAb^TSd6k)b0Rfc5?c(Wb
z&S&3waXHKr8sGpM!izt1{>?zTSm&`10`5yz5eMTnz`d)b)bRfPl#)iyz6B_{KTw+E
zvI}!b`URW<C0<6hmdN&V^kkX>Cf{e9J!iv<zcVK;z8OgJqVp&QN!%Mpv^qaRJTkFI
zN!X>W>#0J8Au@I3{>I{|c8qZGnZAucAO6D{LlIy6iTe?&(VXiL1fKrMAYI_D3LdMy
znm^B{qRd->T&cVB=Y;Hnx{K}!oB~Cck>0!6TJBnR_fixQMV<NZ?+Sq`zxadu+ms2N
z2PsJ6;*&vA8(kPs4j4^n5)fR{)_JN>VMxA!0Lp>#Dl_>40w{;u#naWC&!(sPIskw8
zcb$MazxWgPFLa7@!662LFYQ=xR|U`2`Olx{F#&oD0Pg+$U%5!n)n5S=-Ak6qd(mY?
zdni+NeSz0H`Z6_n2c7-!?<z^g^vOS#7P)_$I+HHka*)J5vXFHVBL|t-!(mtm5?86i
zaMeskKmZku@+vd=0s<(9+r`t>oX@_$Nnbjvz7bi27k}dZLZ^x@>SK`X%kNmhT@}1F
z=j$cpFB2$l0Sfp2S1vMe(Ir!5YrlU5rvSxEkoFLa{oLIbIB`64R(&1ONH6~2ellfp
zv>YUH_bg;x#5o6B*aHcjphKHn5v>XphGYZ;P!5z=naLLrKsnqlp04J6_FaAf@Pye%
z5aZK}zoS#mpIK_^NI^2k8#Z%Sz2A%d_v9<!8RWMB%>A6oj9hR@jPd;fQUHz@vGGmj
zevZFQE%Bb4%-&{R{GB4`|FqH=BRnEV<lc;sbP4L3wNr;SIU<h<)Ec1!P!5z=naLLr
zKsnqlp04J6HhtA=1K1D$t`ot`FaDXkyZa9sRk$F}L2~HybP(j3yXyT>od587o@bDM
z5Ww7jQkjvnUjdNrX>B&7OOW;ujQ!kQ^96wOtv~#`f?R0(;vc#9_up)_D1m}R4o?Tc
zq3ANwHJj*J>h=*JfKg(wY9=Ef074kBY9=EffC+=as+o*{00?2gs(ICD!kj$F-}SO%
z_2J)TcUJr2A35*z4~i?C=MlsY{PjY@$XxmHS)Bjyd7cXMFuVm|?w?d>;>=e7q<dML
z59t!rXQ5ZezfBc=?(XvEcCMp&@ps0c|AAFh3k8XBAC0iTK5~N-<spugfU9OQ0s<g}
z0jp**0s@#Y7_6Gf2nc`>2CSM_jV8>=gLm--0H4&DI{(Zcg*XsX{mNA0f(1cxNnc5s
zxvSpa)cN<n1we%b=)VOpoE!Tqb1z{OA2R3eUx{uY=@KmQimji!YrX)uyR#=Xme9QT
zJKE>_y{Zs3P>{&o8zpoAx@PYL34rEIf*l5w4$(%qU4l>nfB?ELFaw~HP4JUY(9*){
zLfZv)7Ni5voDcE`G#(%<eLbDHFDxv&jDDy$WH$8NyC!=d>f<UR)^jtuKVlK!o=Q>R
z9Qcyxr8CD``CR1s_m>s9k$vYB%^}fOD=jr`A@BD~CCag$i4WWx5e{ZuzVqJ`WG?T6
zGR~;=&sv*hY?QOP*5MlAb_qfS00QW~zzl#&Ho?Dzg5KJ}>OwzakO0yF==z-eM@Rxd
zr2OIEv59+PC3G45@^{u{=9Igb>`#B!0UkpwTt&osZdUhW2nLS60s!a0Q%bq`o}0yP
zzGfH(cX$74Nct!RTS)YRVe(DsH*ZefbSK3!-yj6;*@jEHd<#!?c~I`rr;IbwbG8}A
zM>!jBsSyqpjRYZIKmgqrm;q49CTJGedC*ro@OPoB6+Wq!YP;sJ`0($-C(i8FtV{PF
zelNPp)HQ4;`@^3(F9JM=9PXjIdglAD5T!VluK)^HemmH4&07H0MXd8yPpD4nQq!|(
z_I0dzMn#`7-(}!p{JXZsxb_tyaF13Qb?LGCdxBz%K4qMdp0mv`KFZm63yg56Xe0>v
z0s`p1zzl#&HbL_S&v}TW?zr{~0Hg!Z^|_FLK@vbNiw}F^zOpjw;@!RYyUTK?jDBf-
zpWAqEA%}aYE<HCZ`YV(}+)J*HID7%%Ir%d;7Q33wFbrJOKWf%zdaB5f{fi|7*Y)81
zeyW8r)}x^VcW)?DbAFPub@);AQM}@8hVc<{P^b|OVMc<GFCc*43orwql1<Qj!Q(t!
z7`wocYrX(LIsjdtdtrhif>f5-u935wHR-}Fy!bnJ=1#-EH@=T;yn9DEoqx1f7x%NT
zi2a^YiOl6M03w%9M-&=e7}$8x?zF$w%HaQE*%<SyAtTrG=zhVsk%dstmiFA6rKktx
z8P1du2GL3JimOr#50Qf+jc^Dv5`=sK0rXyg832`Rg7^0y8jTBM-{3ig5B*pvkq$uD
zSN-tcsbe7fb~SQOh~)t#j4%F{BXbA*!Nl_pKlwKlmFJ_Q9OobH)y4hCM`OSI6##Sj
z3xLFx15)`$yN#E*>IbTh4)fp-RGBkBQElcM-1I~9!~ay@pVgM}H9c1$JQpr{ddpO~
zN2fB*Xlc$>lEGol##>;7LpdWs$QKYm_XTDERI&+nS!2oz2;fWj2crl=S*PsP{NnH2
zH>b2D_^tMJAO72T&yn+w_Uh{WKR&tfOJ4yP?j@E8ZsUwV2jKnxgHZ%A=1I?~(Pi;*
zhu4P9=Qf|bnur@}Zn4la4aiIWBOJnv1R-BQ0Nodu0Z_>%*ylZ7POR%2f3J!cf0t-;
zN=pKF+O6q1lx?}YH&5z$1i5)=b-%iBMw0&d0K<KaISW^gN#Qc8<9Xiu@9~)`*0J<B
z{@IY8*j$o4qjHa#%Q&ND?NY7h@HrcAixCbrj07QHKmgqrm;q49CU7phe8&@0>-t8&
ztD+Zwmu7QH3xc0&-^W}tJKP6S&m-#jvDN+Ig0aHkCm-K(Up!^Fd{V5=kE|}*gZp1r
z8#wpyLUiK35_LlVM!y>_&uXy_t|H=Peo32Vw95@SDB1{zFe5?87Z5=21(*R)$tG|v
z|E@(bjQt8AaF14rbO5@(A7RhH#j$Txogu`2%8Gao9KZNyoh_tm^zX_yeE6?8_q%b9
zSvqsUWFEwPe{8t+ZwJyB047vAEQSk%CZ1!%g#Tr=xe5HSDslXjaKicjJBOtE2|pXo
zPigSB_4-jXDPD01v=cxMiZ{X`%t#RO1q9H00cHSHvI(3^v&<>XpFfay;GT(a5a|Fk
zS8Y_MKH+y&?2CW!wN><v{<gmH!@uQRsAtCvo&SHeJ@8)vgs^yc2}NTVK{ziJ=Z3Yf
zR|NO!X*F>Efu30tXP#A|+XT%EgY&Sj3NeH$=&=sg{9w}weC76nzonPwT&EctBPm50
z;Sgpd2>Aj6=pMohfJ!!jclo7K<uK302kwms2eYo+HTPE$BxK*EMl!_yYNdB~)r-F~
z+Zwt@zxc?ce)eO35$E7r;=;{5=(!n*AN(bBFE#fyB&~h@TY#GL^(oE=_`?`N$nfw4
zK5<__I8bz3=qDj~6r2lsU(?NtzY}m35m#Oi5KkN+fQ|_>!1t$9>$J2czf-$CMB6Ax
z<m@s-2SC1OaO!VW5C)(s)(@%9Y;FEd;CCM&Tl4#T%Q@@4XO_-fwDIHF^Znv0;Ybnd
z=fz+Dsg>Tl{uO}8g~yky&)nBcxO?XIySG36*RiRH2wy541jG{u2%viiGr;$!lkd~&
zFJxSsLyLZ(erL`bC`fFjJ!p~R$FV-E-(TbC5!+h*sQn#&|F*&EehALs@`r!Ng7e@p
z=ZJTH9Ge?Qa`?$7t2j4Y_+0eC|E9YA>A#;{d)4zTza5>l5Rfl%fB?FOFavytI`ip0
z_S3bac?%F)^c(ftL9~s&?1|j75f)uW?0Y)bzWV8Srii<{_ILQhcLrAVTPP=U`0(%8
z%!T`NtXk)dHhw%?_u{j0q@0&Oy@PB1tUA<M{R%+h!YzLVkhv#HmAU=y;^V&zPDOZ_
ze>JLTAt+R7lI|B=0H)6GY4x1!TU5UV2yOe5`t2ZENkJm_W`v|m(D<Ir>3^%L&SFaN
zgXiiDr|%4`?#~b=bIt`<=ED6g(qr>LXn-c(SDy`L0M9;Q6oKd5`vUV}KXUe001_8(
zDM;qNP|VEjcNZW3WpFCO!>2|B0a=Ix1kgQ%8Q{CrJyI6~jP0;*0V4O4k)mx5<RFQ=
zXCdn%9_w@SPgT6@cw9q#%g#*xfdO-OKjr!Y`^c5E?miiGUiOOkeBBq9R>gDli{*2D
z{4GG{_?y+t#d{0_?rVt&Za2{U^8Z5QQiDe?tqKA%5eEpMdk8bYcd4UK@3Eh$9p)`S
z<o-<!x4p_il5c)G2zs;^!1_kIpKD(Fdd`3NBv0kz2L{C5-Tgb);#vCe?`YONM%hB%
zWTadF+{h`wCzrP7QZ4)gd?!JD^cEm<|Dq6?i}x4=96hIi+YL0o{J#*Hif#XabngQJ
z1t1O(fLtbr4){Ie2lZC~5YStI$o)+XuX>t-B;RKiA|62H8~A>%e&x&aK=>4oYp8yQ
zKfm}pC9CfKjxhqqAO0)dbCjk2rNKqG7gvgn??<ub{OLW~BNra=79ev!Qi#l@dk6yV
zsR+UC2Jj|vBVwsRefW3I5KtiE00GEla_9igjXAB{tMV2gIsHN%FUt^&F-X$AE$h63
zb0m)wIR2OaqqmO7H9QAlX7J+g*v$F+FEL!?qDx||?hQJa0dy@e1N7;>x~kv;;A?dk
zomN|4ehUy~?vcI~0GEId0`3b^6b{Bw(EjvaM=Uj{5C6^w0`eyg5P)1JhYt94kAp{Q
zWagqJzXeE6zq616c^HCZ?w*91D|eeC^8Flb(iBqrQVEYahv(?U-_fMH`yKJ{Bfi8K
z-F+r?-fS)R0o@nv8>@aPDa<_b4+1c^UyBf21};d3{LO0N_Cjx+fl96F!@mmx0R<!u
z5P)1JhYpnUpFh3Fev$w`2*}(Is^!rbf@Iy}vd&4y*5fHB!Cl8ICFD=?f#DbbPz!WF
z<baOJE}5b;l7IR?IuC>f5c7Bv9BSdO05FRP!drmC{X;nl8;}c9nDUHLxV_MGXCRc?
z{NK@100<}`aex52hcE+tw>t9`0NN2jcngrZ-&D(!(Fe&oUhB-2d(oZ4l|7ohbv%@c
zPx6kjFa9n7RCjm3m3VN;6ziTdsq<!Qxew@Gv~Q}Kz5<|mMCiT+DBOQgOyM&6`JasV
zBSqo%LXVw+P-^>={}=%R3P>CvfbJp80N=0fS+z3t#eWNsTlD9O&VRp~b#H6B_oB=C
zQ(jBI7N>-CJe2U99X9?>;LoCiOQu}SxZX0U^JZ(g59q#X-(1TRl4PdbfBd81pI3Bu
zPeu1-O;==Rq5DoRl<?H)FlDm?vjejOvjejOvjg932k^cCK)*MF89?a1Qk$N$@H+1g
zUvoO#@9rL`oB$mGa|W-1bMH$JwXYi=vroPsg-AIDnfX*)95p@8x|x<F*GDRQZd<wQ
z-q+RoSj8u!+i|A8P)^Ix{ByzX9(6<{&N5#2Klw7D(;(aahjhfPx>UM4^H8r}C^Nq7
zaoiQYA9`pea(DMi<pk)6M)n{$*Z*Vf>&A!L`5;8fG05`jS3a5^XXp7(tEJbsUdmJU
zy;;E!sAXsYDOJ=F;h^U%<GJdXngncw4!}zD2xfq2sWYF_hsO6QXY=>`!=Auh{O3b=
zuXQ!9s}CVJ9v)m@Yt^f>)?Dbp0d55xt@2nbKnIsYUp}h1G(FC`S&3{{qf~a=JwMXD
zuZvvL^~T#<XX*>VTZZOO>7$Ma2R&yQ&sE3NBw!<S09KkuFatzOoq4F&Uvjo^e~hss
zXWs&_ZlB)|xqGN*;=20EdEEN}vELq?r1lIwI9lH@I#}gFNRBRtzI;@1X?mQU=RYmh
z!ur~!bfbQqS;Y{iWoQ8@Q`8X=^0t{j=lTC?+ED&Iw@C9_02cm|+7Y<twl`V(2A$RT
zk=MLdwD0OecJ}uNvBt07P&tNp@9zh1s;(nTkCo96547%;dBLLTan{YMD&VR^m7SXx
zvF?3csE^g_dkvkbFC=Uknm?5kbwoJmIm>vidYLu}*a#hfmF5x50MSxs9_jT<PL}TR
zD*%x6766<Ff5#dEhi?stOXBBLYTK>Y!-r(gH?Mn5zTx&N=Jwz%*`wS&cMMMU;ZP)t
zPY~9ljAE5#oRFr+SvLcpfeVhfso)}nnt<Z!<VvHs+AojQeD~&Zu5b^*TZU?;^ifBI
zgPyaD=c;3B60i|E04vQSm;s`t&g^ra%RS{}`5wLk$OCuBTL9vA`uk89@zvHH=J8N6
zJvehcx7T2{etr~mhvEx#EZQT;0uI5qj2Zc;;?ndu>sHZcEA?h1YPbNUCZM=FwbCfA
z_RHfu%W~H}DE5r{bvakK2ltktnsWkok2)d}XBn@nj;Tq&M(6;nG>>2gP__6v`uk8<
z{Us;E{V~MD5I+bgj6*-gIEnKvxL9|eSY4k}`U6f^(7l!jug+HcJ_Iv4B^Y$a;tO;r
z+5^Y}j?P?07A;sbJ<iVapO&g#b5D<PW?e&;mZ1eqb;P`0NrZzf<GJdXngncw4!}zD
z2xfq2v9-h+&*Z-5Ty%e?J_I_x1t5ike};sab1t|t7w&J7uKVZNsPiYhTv7LsLp*p(
z<@*|q#V4bvJ7izFAG730WQGTC%!B&AVA1qAJI{YwscOvwJI0xnP4S3B-o&M`zeR?K
z#97Acs$)vw9V8<lfR*ME%mC3+XZE?z<X&<vy1UQwH1aTf3t+k5Lc+}PR{)s{_vcub
zuFosN5l8V+_ZnQ?!>cz`z7Np|7l5ccW?#A=vgSZ!hDUGAgZjQ;(eyaGa{lua%Dt|2
ze5@v*xH^;4D9-zp`ZKDd#c{TlyY?;ijI_>V3DTCKnsYFBSM_*~Bq?5XOihA2G6Div
zX&%805G{3PU*nnFQ_jWrYyJDsA?GVsp62lot9eL$WyNbWEzfuHjIVt=n1VL9y!w?v
z)8nk0Rh0=t?m@&b?FLkK#nlOwMseP+cOFVN!kL;wpq8PUDOJ=F;h^U%<GJdXngncw
z4!}zD2xfq2sWbcBXL4V1E^~i0qcFI{{~sa*e&EW}JYHk|*I=@9dx)mx`5u#iZ-<!U
zz+A$|ltY*tMLU88njUAD@`Xy`9LggXv=3GjP+Xl&X%y%EO8ptt(c(B;%U$~xdq!Gk
zvIJ?%P|Z1*yQ_LUN0Jn;I;JMU9T@=utTc~c28fnAv#+t?PWb;S<$NdIfAJZ~<2hGA
zoP%$HrXM3@8GK*ClzBY(AP>=C=Jx6h{vxLYGk1sr^LX@{qoLy9;0<OTMzmnj^f>Eg
zWzy3i*2#}#+zG1eimTHpjpDo?&miJDS{y6g2xqc{KrKTxQz})D=SY&`RmapMxFaJV
zfR*ME%mC3+XZCeB+`n3G>Hn*k^PP78+odEAd<8(9gU_Kb2k18mVy#*C5Qj+@2e01v
znsp?2@CJVomtf`=kNF{bn#W^wHXIGi3=6O6an{YMB(X@%*DA(GD!bz9OiH6T@7FsI
zr5oW)%^^_BP|cJo>WFaAbC&U3bxchHHbMtrrFjH1K(y4EeeQ<)vEdf~zsfn^Y4?Ap
zK4h=(ivtyx|Mj?_!EX}8TC-n1=blB)4|w@6oU^~eDaPPO_#PlC;JEto?{erXu5@G5
z^f<e6{+k*c!LyH8srorJ0mao3l}2&3->G9Y-(Jq-3QjFUHB;EABf>$?S;lkKF*OO;
z2pxcx<`K*Q(Nbsjxogh<lP4AT<h?ET#ZzZqKe<rG^)bdaT>2kGM$WzkfbOBxg!*3b
z@^9w$0H@-U5qysj6>wPm)xY3ggcemZ%tlR*vu<^Aik^F(dAav^Eo)SE#nlm%Msc;@
zspB%wjIidJaCgm_TtVG3RI`cal|+ObHRFA&V`>tx5jp@X%_EorqQ%zYM{PB~I;qvY
z1ZTs24b-_0e^S3fjOP%%3isy8%<)@*%$1YQ^LW6s0?p&K_=HsEq38}p26Q|E7I4S`
z3T08L-0+$nXXp7hZLSA-waUG$`4(ql1#!#J0+w)T?-Jpl_qB}Ys$*&run{@{E6pRA
z0ivbO>~mM#KVE6L^gj<Z+)Ipi=HqYFuMi@B1yDtM_8Gy^TL5tRi@u=qYJa`H#=Du@
zL-EPhaxe2e78%gt09e2=dl$+gs+<j)9%tQ(ahffht1tI0k9E7suDCji(kQO>J9S*<
znNeBu%=o+oXH;bhwBeEvTy}K#$Ptk^%XnRNOc5*qG6DivX&%805G{3PG3WnyCF}A(
zA<CTiKMLm5J@+4SZy*j|0bmrtqt8ej-U1Zv0Y%xGBTd{x@yRagdE!`P>*$!H6q$XV
zFqIdPrpMVG&VQa!xtFrHkeYzv>NrZHIPX{L%BYUEJkB2FuHzMZMpeu}8!idKWk+|9
z91)4LjMr7i6u|;mHIgss00C4jzK*63KYJxt_!M0Y_x>f3XWo8Ky(#XGp_Vds=-yYs
zOnwVcxK}R;m#w+sUW-q1cvqhj4n?+(4mnDZ+0O~Ka;P*t&brk>jx#}?QMvaAQrQ((
zM^qZcdB0LuMs>91arP*89k19ks$vG(a7hR*JGy)1h)A4eyskQ?2o}Jqk$g!92%u{5
zb<}<MD+sB=m*{DAUo&~;{g-yPKIeXwkZ+;BGS0sX0Oq08Y3qwTa}RJzKCk851L%$r
zZGvM81L47bPuR{TC^bFK&hw9jP#`T9-LU4HoQW6IEkg@v;&~+z4z`Tvs$*&rtQ*Of
zf&c=<khT^-s_Xk#5Efnf@bB2d$^WCUt$AzocOz*3b|Cx)K&1<B0l>YMnrwZMXYLhF
z$>%}*obUkACOD)p5FYK%328?_;((^d*@eKIdJCrs(QV74*wq9SS3Dw88pU}v6_#Kf
zZF#JGdpQ#|IJFGbOktyr2nRi98P8S6)FfCpk}m}T1W>j3I_heE3_?_xKm1o!KgDq8
zKmNOQZU1)QG1ONn-M0Xlc|e)%JiN)w?IC+br&m9GjX3L$S<-VqX5T12_~D^A`t0J;
z^f>GGl*o1x73af`{q8Ee;_8S>qd4!!l|@`fTOKRlUd}`fPAx+<Q`o2@!a>hj#&gv%
zH3`;@<V!&S0aPu%j;fkJD&12lg{%Hj==0D|!tJr<?n8YwTmGIP9p6w`HIJ5i&2fS$
z{##)W5og~+mh{{Y**A)het0a7v?B`X6Y(hHadshaPH^r$&XFH=BsBrW)!s^@xY}=z
zG0!UEybst?WeO(4B{4X1jvNsVdd@PQtBxsx1+Z!)U(x{r#E^C7e}1j1abtgF-Afq7
zhs?SASE4To-&Xg+16AaD$fJlY_mC<Lw%0g#4TGdRCPC-zn0$l$5C-%HV4-qNkF##}
zon@aSi?iZK?XI#buJ%?M#npa$jCocO+x?O`lPi!dLp4*_s3XEb&soNE)iE^*){W##
zK>z_%E$@H$cl%t;KPhwK%7=fedr7iO-=3+jZLWBDQ>k_yk+j@viV$oMaqtiZNq0zs
z&f6jR2Kg}z=ncR^<(eL6-70pLeUdEBiXXMR%C5NDTWJ(m`|UC2Sw(F3OXf_jK(-9k
zOktyr2nRi98P8S6)FfCpk}m}T1Q50O7k_tgHUEjS#L<U;t9weaOWz);uT8E8F5LCd
z48x$}9%2Y@dd<;Z{V3{=nd`kDqpz1AJoMKgJ1<nO>2cQ0K4;$Z9w)_*I=afPxY}E3
z6j%H0G3HrCT<w=YR+(BZg@G=O{Vj4tB+fEkR~=IX3t-hqzN7;LXln^C{*LBqeomPq
z_moEQM!GM?aJj*K?x9K%jxLI_^N4iCy~Zfs_K*WU_)*jyGS_=QL|-pIdg!lXc3!Al
z)8p(F&wrdKKl=XVd6&E5iZvpoQC#hJsWHzg;wn=h4VPTNQHaP9;h^U%<GJdXB3J;c
zM)D;cAV3URGN%`RTg_iNanQZwQM?!5uhwwE(VN&;DP1_ZrKYy4zOTjZ(L-=`;5Ef?
zw^ug|x?|yb(J}Zs^}&I39g?$;DlScrvu>s$+eN56({lIyYf~w%Rw<3*YQIa3%bYT*
zG6mXjNeC`Gx_jh^NStN7t~#a&7Qm{Jd`SlgP}P!O{2fiz{3|E+ocZu?bT3(U@%yvb
zS1GAKxOAC$B);ZegR3(>q!{k@;D$kWC|oZ(1Yf5<I*_hoa?Yz?#nSXRD^FmRd#7QZ
zX}SCUwZRovtCU7@wcn-2WlkAYnF4LNBm|co-92(dB+fEkR~=IX3t-hqzN7;LFtwBy
ze@9a_Kjy@avmgGA?kUSIet#7EDkk;kmM=4pWN5et?`vNE!y5q49b?$j9*=R24_wG0
zhRCa51=RF7>t;}8y4a8>8?o<Sn@(}HN@)~V`(0|R=4NsQLCa9h6e8+~aL{v>@mzIG
zO@ehJ`BD%-0Ir2z{7p4~Ag`<N34;Vm)n97U#m4tyU&UmvrJR7;uKK=KqF3*0UjD-y
z0M8v_*wY>laXpWm<`_dnOA}7h<E)!Olj}kRPd4JF&yM!ob^%dWM_h{6>b5hSrl>PC
za|i+xGr54H5RoIoLC;ynbJa0LumDz#<V!k0fEdzeHZT6BnqQGu{e~~9{tDUU!P`QL
z2UL?l+f}u<9$Yy7^zWmG|4-=AqtmaWU(bWB<q*Qem2M(xdYtV#ZA~@15w-(<<kw?6
zeALz6eQVhhf4hPc6`@q)lZppNlZhM=4tmZqo~w>2f(5W@Bwx}20>qFmvwiXBYW^u{
zn-Bl2dnvk$qvwSTuWb)&M(wRv7mh#u`{?2S6MpdM^y}c)^Jp_4LzuYIO+-zPvt6gH
zsb)9AcEFG3`Ok}Ld*fSf8Lz93sa=;<Bl(gJ5FmzZUi`V5|CF@Nhkw>R72U<rqt5V9
zw=nFtnru7m&;Dy2(h<j+>m57CL!I{Eo%8BffiykNF6FDzUusylz)wg|%2p6{wYONe
z5U<tQXjd485=@>{6gVm}azr@jIm>viI;IE~z^ajaNe2iJLpCq|T+PoV?Nfn3sQL@2
z%Y%EJ;uXbY&~{bstylMYUx#$Wq2_wW&heN_$oHdnjw{`S(eyaGl&>k^GDMzwxi5CW
z6d>2=y>l&{?&?>Vpvju146d{w)klPbUS}E4RmT*;0$4SYFX;dQY{)s99he=M9he=M
z9he>XemYRU^Dyr{=)D3U9OAo{&?(PR%Ywv@0mzu(`=kH=`Ira)5B$H`2mN09P=|9w
zNW~9F;rm`Yy0P<h|1DjiqS_ehuF1afNW8~)-!FT0#}hRxU!6ncZ8_ilQr(Lmmg~h?
z$|xgn&YT^<1yK$dJ@Tmg9{vrP_dkE=a(KUAmc6YQ@5{9B{SMAo-J-{$%M!Qc0S%~q
zy|FHV@@8NDH*@aa$ziDnc_qqy<MRWAah*b9&*chyop}sE#suq=9@<ttA-uVuhY*7v
z4&uAHqunz>?R1T=i`QJv*ZsG2iDQ$Vp_V+>`H^^U@4UHUp6+<!uq$7kL*;Eb-}W;5
zFMe3AFV0d%8G&==>;Nu^a=_@3N4<A<9RBqGfBC*A0CnbarsoyoW105j`*Zei^LQ)R
zBfp>POL1OP`v&gOdQ;wHi1ST@$9JlEsRwx_%KbX$KWx70A?(rWq5d-M=q6YHYR<v+
zwbUPx*VVr7PFG*S_fWHWEKZF#g}Srml>10L=iB5xAzqj7LJsBIaA671j4yaS59{%A
z_oZDUzaDY5oEZ+8i<TVFjxxJ(_s7A#^rzc~*yVj*mY9QG$)3x;dZr^+ZxgjxiVM-(
z_O|s=-dK^H=<_>G$ePLoVqf|3I_E!3ym|<8t{&<aaeq|nU(G#YAC-A{UG3}lTYUxJ
zW6kEVS_NlYT(#`CbRUW5e4D%{#Ov~1$f0~2&YpjFqRV2uyMG<}#h&YE%$zgBA#+&~
zduc~Y{k1&yJb<;=BlVh_HTNsVx3aIy$AqdyYOxd-QdYi!t4pA~!Cp=C?VTMeW1d&+
zD?j$1e;*@w67b^?8z79@hcI(~aIxO(r++te*U?AH*U>Z8@8%wS#w<wppsReH$$OhC
z`5p|3fm(!MTwFG|N-z>H_*QvOir3-0m_zwiTwDT7+n@NdCjM|YYZTTaXu{dg4px@~
z##njOb*CLqaK+Ue-deY8yf4R$p!YW|Qj4Xyl-T%2t}3BQR=>@0X5*lHP}V4;E1mx^
zb$)Q`{wTf^v@hb}pjLmIa}?)49q2=zRJy0$mRs^Y7KV{ngJ=$Qv$7A5#0$Pv-jm{W
z_%7y9z7<!Wf4rXi*88(<&vjgo#D$}u9jq=lHtX|v`)>BT87%oe^PBftv}~l01_Zso
zX^~ni#ihi?H*!@8RkC`D&TJiOK?oZA%8yq%|MAH&G$1~>v#(zM&5L+Er>@#V>XN%1
z+|ceBdVV-MBVENSqJy&qUy4^WVhOS_l$-~>_mOzX*W^7h-lp$L4&`e&*8|^Yv?88)
zPq`}`?Rt&8A#&;Dr#Aprmjik=h0pZzz1r>7Q2JE|%4(L4&*f=@VDP3Esl`$pvXXD&
zsuFCnW`fRKIW!U%H};huuXX;z<mtg3e)aNiKE=ml>Z(1WP2uxFJ&&k!ngh?Q-9JZX
z4!#txL~d6Aw$+t`ssbbNlCQ~oV!Tb?l^n{~aBix;)tGTQ^qxyk1@#(vL*&xIPfr6@
zmjik=h0pZjz1iyx(YoH3H}a<#>0LI#5P@2x7E5u+OumV$>NCmO={a-dP=7-3VPjwU
z@jB=KQJ(cMXL^VeKj!k%DfPoc>PbA|)jX2UOW+$~WT$jLcryATUXg@4%Fv?KrL)pJ
z8i@yA$_p+h;5+F+6Q_K^DcuI=bHrw*Y;KT_b$%wO*2t@4@ADh~<Y0BV_#k)s&Z|8R
zYV+5@%!%qX-j`<$g2@|gLvtyt;>ZkNaHc+!tetN#8wVGrmBqgDWB>X0UZFOCSZRpn
zVFKZ19+uwghbhxTjCaVs>5%&2F`Ci=w|k~MIn-|evQxSreWd7%ctz6ozu4?YJK|U1
zNIdw`o&l$Fk`89#lrK1?+u(eT*vyp84OY*!7pLB@K2PN0vp$d4@9jQ!dP$l%&*h!G
z;lp6bNN=(UCV#XI&85JKBQt!#nG!gem|8F!2Op*p#=i37b<V&1t}o1(AJWW^vA%pp
z!{`ughcK|`KkI_x7zF7d#Y?wCV3Ti(R}^jgi_M{p#WUJ47>P%|$V*(COxL&SOYxL$
zl?w&<ZlV%DSbJaZ?eYBMX+nNZ$N{rHk5})_E|2O3x`g5*RoH}<jL+psgJ5<~ZKLL9
z%l1nMTAp1@36xAtESQZ$h*C>qU-|J$=l@N7*%v0v4{73uSYJM(VRVePV;I=;pLIcT
z2!iyG;-%X$u*o;YD~eD@8P-UxXst2hk$B{byu_J2X#gD=!6{$jlx~#^Iea&f%n#DZ
zPEP}Njl4acf4uxB2dm2gvp$d4?)4srcF~X&_vNiTd;pe=^d6gFc1vxe=4Q+GD+pSi
zT}%ljnVeQI8;2OB634#s<8{t|Xh`)C=370)qL02noX}WpIOT_k&7Ogde50&vPj9Mx
zL%itXA445w*dn#0tqROYJmZ`48V*?qsQOlXO+2Mr;bK<bW8@DF4@Wz{zMC})=5YSA
z84gyL14f5DTD@00oV6>aAS-8zy%N^O`|_MYu(*e|ndT;&;)+{9spW;G5=t^TsbnUH
z9L37hLeQ=M{QHQ(k$@kEOaOL0gWf5^?5l?`-RdF4iboX(G{!i3(=o@3_wWcE`6hE@
z(&I6bxxNsu2%6p(Q$(Tonp_@_#52Arui;c$Ro{xQiKlccTukrVi9$ke!`aTS@43&h
z>+#WX*$fA(%K@`KkC*Rue^<Iy<DL8eOFvc)G}K0Vi%qb&g|?aICY$1lTR^Ghg{2aZ
zOiwA9$pKM}1FuHzG5*#MfSwcJ5aWfRhY9#wumITg3^K0>)8_}LduyH-LMZoeq?iAg
zD*OoG5W~563Ao!a`{etv*fQnsheX5oz}BYqY>YS*-_n{HiC6iCycMUCs`?gvt9VMc
z$fe}IpQs}?*Bx%WzN62n?@<mj9IP$}jOILAxtrbH>ekITRIQ`mn$LoME>9T*N$A>E
zH8&U)M_v(zmJ>@Q<YWsIN@j8>Vc3NhBSAXrmpuPr{`}z9{jqsk$VDIMCw}>?AMx_<
zwq2RWA^YUJ^UYVL{Qa0n__}!IsykmSKwT@C)g$pL-;lTBavHv?`WAhwcuKd(rKG-}
z$RC^QPB&iP(dX3nD2EviR+j@ta~>_<d2dVAs^Gr7nQxu7k=|hwByZQYs=2|aIP!`x
zw47KfAthUyPBN1Ng&`+ej0CTA{+l-d?(8qV{JYair!&`lIAlb7hIDzx$1E$`v%LH}
zE}IY8!QIz3LYBR8yi6|KN8;7KK5xybw5q-(-#VVsEpaKgZzIY`J^gZrzXK|L&ixMZ
zFJ<JBbHIA|aSrHl8D+M;Fa4csQTokaMitUWIEj>z4j4fb_3gcGt}{Oh&UnQ^{VahJ
za<YZ_X0tnG_abcRL1?RaxYqd(9iRu-lf{>RcU%u;8oAu#F(cYDq{|+MEPKz4y9?hV
znajI6A}jdl;w^6_5<Ah^@r19>+r)Fax)xR6l5ZVP=@vQkHv_E4%N_m<2wWDuzx#h*
z@}&$tat>#o{~vs>w*S=MZGP9QA6%mLdanDx5@n<}*aX?j9d!}({|WB@{$Jzg2aoUn
z&u}OqCwu=taw6Hh^o_<3|Ht*fUwDfTVEZ3ehjfu}k5LzbcrJkW5F{QZ5N_sS>8#V!
zfq#B{EJ1tE|FO6fbj-;Mb;y(}9Pm)=^;s$wLVmwdjbt%Uof{)a1Eup^Q;Q>NPWgIK
zU7yma97?ws4-L!!J$)$$99$NKv%cf`Upv>P-7C)%I5)-t>+{^uIFKt}9*T2)C(i$)
zc%G=wKkCbQ7tVh+FMMfnch&RXG|drj{<+OOc-5QVuJlkmd*A;bic3L<yyXgY%+x#W
z%VV+kqMo;LZ$8J{c0{G~T+gL^y<eSI=~ND-TZ)(J^BqkPJ^ixw{@)pkIjx81urw4f
za^7L-D(`8R6wGhEZ)R=A_jNO!f!<#e%pS}4u=%a|?Y)g3&H~9$LQb|Yk8Hk!0|kk{
z1{evv>;u^T=f1-n3HKOvA&TcHevtSv02vdk4tji<3Qge$_#HaG|2x9NfMckHigfYv
z#q&KFubx4@K*A?X=uBPk6)|gSF-MhG=~ND-3prK1RE%#Z>WbZ6{r=Ax-SM|?00Ng+
zh66?p;alL3{|_y_#4O2iaFk<fye~(Vpz^J`s*&O4P;r%CDgnv#JYY6YPzXg%IM)Dt
z9`Z_zz5hpE0={pL_QAaJ$L9w)9Qo&ZsVa9^4+)~?g#%o4DdI6Xx*L2QJ|bZ+2y<8$
zhgCgO*X)ReREs&Pyh^8XC|yXc;-#8=OHrO5gkR`&Cpu#8b2hq92iY7&?&-kla=;uT
zkLx>a>Dr{n3l7iml}P)vM!X;QyZKprX|`%)*!+sCyi*A!nVd&4t1kf91;-x1_CNO>
z=1925SXdZ(n1H_p3qaJ>k70vV4`KS%L-afNXonob57JpZ2Iw-z`!P%EJ9HE;y1+LK
zk)sZ<I1Y1Iv^qmt)U$NWhFD0nlEdUxI+a7|Qfd`1ntTIMU+88y-1!wguTikL2XAy-
zHp9W{a=_@7N6nqLbYs+ncA=Bgy0ymV@^nG4^%RV0WZEW*v#zNGN~Y#P=C)?zfO8FC
z`=9#`b0pkjtSbyXOu*lQ1t99`$FReyhcNr<A^IJCv}2Cp2kGhH5TM^j;W}hVeTR<X
zMHl#nA#&6K7RT!N7;%^oo(7D`43sW$MyCeKH|b1$N~dxt-AcS@@(o0Bp_}1s=U4c=
zM#179ywP#l3<s;r0i#zQ)pxU{Yva;bI^CBy^sQfOq)!9{TTj85g-P#y6=z*j37kyK
zgUn6MR*rKGVEdo@4s#^jW2`F-Jxsvgf(0Pzn#Zuk_`!vEOUER%fqHT9!O9zeFgV7D
z$1q6g6W8Q&MAik{AvmQ6JKuwHUggyJAqPz33>|zao`TS+9K<OdI0_7od|@UIx?uGE
zgvt-n(N3@Mw$IV%IeG&S9IP$}^zkTsrsLgiX=_^<NjEw=t!r((FV7YPTR+KABk}T}
zIO`lru*sTv7;{x~CC0f1u>H?{hdB~hi!k&RCg5+OU;tY_MvwJrcOf2|uK=R_=!2Cv
z0AY|0>LCnL`otjzfvgMEF*v0MJKrPu^uaKZWhqwl3>|!z!v_m=DhF{&2aa-pBVU+_
zgDx0-KcVu2bh6XafbDZMdXC-z1P80j0ew6QpXqR~wzRRW4wU0_c}L&+)<$}BO|bQo
z3^g+Cdpr$9y=`xb5lX0%)$=fBTXQYNxdyQP&wYnE5?G5c^ra=>Z=q6vRCjX=v!e&s
zgY}nxyaDi`aZZmhk>Q$Ljz~IhI(RXzYQ6{Myvl_+EGcM_Pu7v|Qux4=PUQfnbi^SC
zaqxu^TeaG(v5k<q_Tpg2=K<U2Xbc?Q01!v36GKJ<;ThChnr$0cxSF$5yVu6&@?=4<
z_F5WKbJ23e8UIj1ovfONG5`PV-O;t$xE27==^!O!I!FnWGNq&xrh}9~2`S;0kP_~b
zWJ#7}VGPgAn#tOHfP5q!%iA2_5+L20hk?!e`nACO^HkhxcXMW9Gp(&QmX)R<v6&m`
zrn`!r>5!YN5ZQnFuSFkI)iS-rtz}m=7SoTHJT8WBaZ0@m$9o$mKHRS7ZZ`7a`OK4M
z<C@qkb&WOSW^1Xj*|;Y5|Ky8vqiD?$+Pih#JSOZuPLzbp=_*_8?dC4sc0OuvCx2aE
z(r+h^oy}|Orn#LrkJJ-0e@va7nm?9*(BJH=KaQ@uU2{JyuBz5`Z4C90;G1#Fi3r!t
z36HVn5Yv#jog3+<ySi8FNu;?-NiXZvOA3VTTWX}MTi8syYGP~e+FHhi?Ms-@T*l*F
z_OZ@;+?t=LG_N_u%}L&-oQ>;Z+bK2Djhn3{$7bWYSkBnRj^+z<qiJ1K^Yot|2|DLo
z^Ghs0P;{|!f=a?=yKFVL+q-nr`M9~A-WAW7pzr*;K2|i%anAkiirbLSVqJ!?gvrj@
z;~2WvHMfIu&RX`h(VeG!8qTl(kIf5!xS(|K%~e|Dt=tz%*@x|>NH1YCt+|QigX?aV
zaUpaI6Pn9-yti)Fk&Wi;jl=8Nj2%yo>*kJ$o9%AgY#%u`8`s5h&MtN|Uzi(B>#~}s
z|NKzUIp>mJV!80USUEu@;j&$}*4xQldhI;Y+9bHHuj#k*`_AU2b<^C=x)-Uq4f!nA
z%@0eM?W{SDp}VfR8B#OWvagNqJmu4He*I71>r2PpUd3fz%3UGerW!Ag1Nk6sOZ`8r
z+ca*-7gBDHH|?VW9ceVDuOD7d<`RBt+%UIK+-wcwX8TC7*|;H=vJSDM`4+~yuJy0~
zK%D**Xr0ST{v1n{Kg8m+a48A<trFe-o@MPh+!(83sGdGg-+5ghDw<|LV|Od#cI2~I
zn<Ok@y0i5-hVHWFbwHD@mVIrs=V?)oXQ6hNP(CwhGv>>Uc$3oC#ysEn?*B)eln%YU
ziosjVR4%s|>7M>mL2qJl*m$`tE4!6k`F69uw2yJ}?sj+n+L7s4*1%KahPi8&p*4(~
z?IXoz<AzwuJj9OXTNvxw*1!HEbFA&wIX~5xSStM?R?p6oa@nlub7Oj!9tS<z9+G@r
zU(|1B_MPi1>-Kv9+gW!b8Mh;!#o7d63C(_+o_ZWZcV2VtZl*MKT?@QDPx&N-^HjS_
zD8%9=Z8nX#T{}`^o-ZBpaFvdHxayz&m&$Md`j5M~#rx4M?`-ELjvMpE)z(AGjmu7s
zc~^G=*CFXx*3G-`0L;d1Vr`zLy%{%KONh<JZDJwsCU!JmVyp`*&*qsl6nEoXJ4-Bd
zZ(_^LlCa;((fM7vOgVfzq#4SmPxBG)L;3Qxd8BBXrTfTj!X3zGu{PJo=bfFE;~2Zs
znoDP!DYR={*G4%f`OKx8W?T&~iE?+IwA`eg*pc^Dr_{#gkN-rF4t%)kpZ=Fu{_S7?
zaTk|Z!v~5sjvMpEMb>5I#x1Y=_|0$z%fxgnWh34`F>W)r&C|3u<7WE^vDvsyEact9
zj^^v@zdV~~(oo%vbL}j#(7lN*Gs}_oTTArLUAjy;e%mLyuCMC1DX#H8lrQClb=-XJ
z!~Z42a)%hm6ZjCT6GG$qd2eU=IL7X{=Kc`ZbhYejqnwj`=F-ji^*?*9A0M~U!LL`_
zr|U~7;dS{-D+D&NaZKC}gD&l1<zw4~F0;wIvFYiWRL=2n<AeB}-5=b?$LBWREH-<D
znyq;@H!`lP{4;K@mp?;K*{V<VVR(*(?lPgqQddd1j4NBq?Q`Gw>*w}cu{vs`E$bRA
zjlY^hyfzOMO|uL=bo&tvKKh}vSTo|U&$~NYj$`bOYrbby%f2>x|KeV|mq55)R+Khl
zGdJQ*>Zy%+zOm5b8soatoo&6^K3(5J4KKT1uR~xHTYJ}57<7y4>MoP@-PS1wTaz-J
zJeA&g0gw*2-_c+9&v8F+I`^X=6SijgX0h$l|9s7}xu4;(?)dMxxnBMbJ!Pvt)ra9Z
z7P`xX8cSUz;WDmlEw@j7;kTbUZiT|Bp|4{-)}GM#x{eE0Uu##>tg$1Fa=&lK&SFjf
z*U$SqOUE&G$2Fg5)htDm009C72oU%$3Vb%NXX5(D?N^Q|zot7QRe60_Q=XD^InbQ*
z_z~7c9G_w&fq%8Y&(Ck|`o1;qgS7Fd+Gp0u&zdYZ*7s?iTGM85O}y#38z@UTYYBIE
zq=UdeByj!w-tOu9_-_Yo=h1elli{Vs(dpf|YfY9L*RK2Zjhgs)in5fmmT-K}MiO|B
zz~%GTmo@X3Uq}8OeH}k<l{z`2{x3CIZd~U3d^%baAD>y4a@G=#@7YKKZxXnD{#Y~r
zIPx*Nb@-iDsgpD6fBXl#+ns$rAHL;f9-o;foVSGIyEc-*dj!7y`uve!0et!K`Rm&W
zdpWiK!}^-bDeGGfu_Qizz{@&LT*C2P8%f|z0-+9meGg^-?R#jPQU2?D_NT14)M>1~
zbJmxal@lLdWUBvDmvDUFMiO|Fz_(kvV~4BTDdm51{r_?L`ikR{xNJSXjZ7yIJ>HoS
z1pX<3%QgIK*w5=(?`xHZlT-fwj|L~NZ)J^%*R4mb6Nw(}N<V>riNO6DxmOQI>VJ34
zZs*AI{QAZWwh0iJ3+%l{j%7M~J$$5mcgO5@_Un9QeSHR71PI&_C~pqVyYIYZc%*!H
z$Lw}?OWA!7FQxH*T?b}^z;S_4UgaML=aZ)YWO$@}cgO5@whPJSf9G%bC5;DwR!p0~
zae?l`LGx*oj|?3t-`z30o$WGid1M<~+qv_$MzjbJI4sbAICwto%l0F~N6L41%x-71
zj7u5V#F}R5y)-dR0t5~U3||hK&)R-vsMOt^m*pwV?wH-qW);^mu!%Lz(tB-Ungj?O
z6d1l7JfF4w%5bT>x7YuJ@9>s&{G2ablmLM}fnz_mu^CnB?(OyeU`G$S_l{g20Ro%K
zo{}6IHP+&|-DTeGY(_p9_n^B;q)C9lM&QNQ|C>A^FxKL@-ObtUY({$h-(YAGI9cGu
z_y1<A7)yHG?&j=vHY2ZPU=wSarPup^4T>g#qXIAf``;{msm1c1z_{Jb+3jpbUdq5G
z)-+4+rHN@0AaGRRwa@>|vX@#swf^gm1I_kW|K_;OdqoKl*cW*1^S`s4@2P*E|D9Qj
z5jaU86j=Ikpn1AG-~X8neoy`XksknNd(6-OKK?()5hZXy;H>Z0zN!B6j|0sQB<^$e
z{p9&*Q33?E0_Xg`#=Gi&`Ej7_s$=Bd?~}|&ixMEPD-a(FUM#Ay{&)BDYUhjVdwfXu
z9|K=qs(A$dV*<;c0x!O({s-#+<@G%}q`kj^{$uCRatXXBP~HlB;!VG1Pw%h)@^1lg
zPtU#Sm)3TF%I<rN%^_+M_<aRJ6_lR>n*Qj|gCl=?{kP@+<O9I6wtEi%CGmp;)J@=b
z7U(_-H2Kk=2gm;Q`oGmZq{cHJ`pGlzOeu*U9H4Fjzp+67QQ$AzAN_f7?6o={jC-~F
zQg@IWm*bhr)ajI(_;@}e3H)vX!&iZ}Kl=0FfJ=>iJ{b3E_qFaIH?GGsm8sJyHSzI$
zMiThl1ct8yU!MJsf=i9PtN!a(fy<7b_|c!;DK+u$c19f!f#U+hk9_>=>c7<Rdi%_=
z<yo^SHQ}sNi~s=w1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly
zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF
z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk
z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs
z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ
zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U
zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7
z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N
z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+
z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly
zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF
z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk
z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs
z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ
zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U
zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7
z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N
z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+
z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly
zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF
z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk
z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs
z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ
zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U
zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7
z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N
z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+
z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly
zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF
z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk
z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs
z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ
zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U
zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7
z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N
z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+
z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly
zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF
z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk
z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs
z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ
zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U
zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7
z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N
z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+
s009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+z?lO71H)wrhX4Qo

delta 15685
zcmeHOYj{-Exz3)<S~Iy!GLyOHKH(;WOM;4m1R1E5TOin{wA!i!ZNwYI%h9&lDoQP-
zr&z{)<ZEdq(5h8L0qt|9rAmNmk5!bUmTFZ<uqi5t!PCPz#R_xYZ|yyMF1f(aKmFl6
z55s29%=+H<yRUC;qRS)E<&ovT)Z!CmB%YHKkLZhU^|~WDX|D)*?$fGi=SdNWh9}Xn
z6`EZ?wSS&@!(-f7pP#4K%_*Q$F1a)s9?3WJXzLuUJl1uQ7PiqFZb!%#Z>NPqj->7<
zG$-|Ha^Bf2R&5&<iMQwEw8t}W0(8c`2#udD!~YEOYMWGRVx<YvTcQLfm4lDSbD35j
z4No~6CRHYrKDd>HG%kusO;${N$_6pmV4lrsE@sehr_sPiQBQW}O)UK-ZLY00$6757
zv8kn$dy2!5v*N&l3X;|}XcSq3l>(!l>@1y1!tf{qatdP0($+o2Vdz<ND6|&J$uUS2
zS|spM)RTpYTlFmm@?&jxYHKvcQ^Id{ef7^*QMg_DDg3MqrqUYJ$rP<&XHIKC*?~pz
zEtb}-i!D@hyA~))A)lb1Z_-!fv+#`p0cd+!mM?cvd~ej_O;^AmDo_d-{?9Amds-{i
z+@b}Bcg0&R`qo970um~4DitUNWZ_ts8yU9wLG8=2uKC(g&0g0VZ;E<+S`*bYIQlRj
zkh@|&;>?++3BFbPP_wo6Qq6B<Xn0$^H<NP);RIzAn|6Fep5ojQXIKX^#D1XJs~7E!
zv*5CkZSnZtMb(y;A%*Kh`iqf}_LTI}^DoJKnlepRn4GJWCiR;aR_oQld<2zKpVB)r
zw*Q;jYqr9ioJCu@xH5KaSwx)=*?hJxD7kD{Q&$|Xq=-I56IAVyII3!NzL!qkuZ=K8
z&2#BTzcifxVb|Ax#mh2j|NUB6U)nuNU)DX6t7sbGJGDJpbwVaf?rKWP<}2W%kDt@3
ziwwBthbqPRg8hNZ9#y4Y{G$by*WDQFx=;JP=FZ7!v$UVENJTU}B-D>kWtbErygJbl
zp*7b_x4!pKK9pV+4LcJW%8%{1Ups8`VEI(9GJFjJ!kTj2Kwol(Z$&+g89u#nn4$1f
z8xwK<`BXOF*Iv^6IZg52x>U^?K{Y8CY!F-iK8}U^5vn;V&!YujkaZL;5L5M2XCTR&
z(<eb7aje!Z0<DAHvdkYzMWMth2y;(M>Rd%ChQu(_=0g@tU5BstQq3yOBoxRbq>N?|
ziiXRwlBI2-ol)u2wnRM_7`7=(w~eVgh#*gaXiSa!z6wia!KKX0MXfR#t}rYSN(ZiB
zbHuSFqT$jr2EI;v&}M+GNCj(|S?PRsR?8UxVZdY#GT|iwhEBVpk(3Blyho4s_+nk_
zv=S3(RT62sjHqYU*{y3pu1G?*04E^_&t_?l2|1jAoC4VF*=C;Ix(4K`1mqOJNyvfI
zE}d^B0M_zI*0S4#Jj8&U23SF+jkC4H4hQ?7zOLKvq6Xfufj13p0xyM&bAy8iMW2;-
zXdWA#J6jIr?Uj$N5wa{A9+y3BUPnWM8v=a13rx=$h}ZkH)7vQn-O8@tY4~HqpWIm2
z@9b}`BkeX(K^q<uf_|`Dl<3Vzge$NqDXUJuWo`wnc~BJ4*L%ek(eQ9BOdEHJfNfKN
zG`p=_tD~n{gb^BRuMw?Umwv3LoQ^yof+buO;~y*pdh^vmi#s%*UO&r8jU#12?CopB
zI~v{8i`756TI5B;4qNSUJvzJ0w(tWw)*{ZqKWXU^aV_MQH7D1JAFs6_LV&(&f}d{g
z&;->lmj$-twF=qVC8Cv{t#^1H{A<2#Uw%TgWaK~i0Bi5d|3&8#VYB7$(>o89x&xbl
z(ppv=W*oWs`{LBO+2PaIEgT*Vf1#o+1w?#be&Xx+C00GSLJJig7KxiV_(Bre%<+zD
z%e7}x>y4reNcD{eLLQ|f{L0kQCw>d7P(<m{o28F#>d^9H$6CdIX%t&3&UY{TfWL1E
zYXXUKy5TRv1rtOUjsseHWSJ<peXx*jS*C^1gTA#)jG!U6U~Zr$0bGu#wG~Tk3K+|n
z0JbtJ+Hz31>GFS*g_h)(G_tJ_A;Vo({XzI_flV~EOVsimJ1y;ELk+Wq<Fv6;R6x&I
zCqRF`Rk&#TL!z9%`bB9v%1}8AC_nBA())LdQiiy4KbBckC?z#bl#bY2w}}Igwo(My
z1-4Drf?k7f3T6K%Y&ZV=hPTD_kX+Nuk&^OCxQTfSV%Y!O@lX5-wc!)E3mcBUvOpS&
zSqpT^ah%3#@Es^H=uK$JveaS0ipqtVfAQMal_SyC$y&gtJjsxmkkhJnz{TKAcqQ)T
zv;jZbrmxY@?-ZUA)0>Rn;T`IW&i55t?K^*9KnW3?DXB@i`7XqxK_lSfc(Y!{Vr%pb
z2TGj@WzeO|rB8qAKq2)?F)G&al6|g5Z5`SI>fNaY>6O)@Sg$<d<Pe<vx|j?*nJD88
zh_J`+7L~A<9zEoxx9%3_us@|jxkrrU09(;1Cevl#k*@v(tz97kh7*1FUEzTf>7FBA
zBodgplpg+*aM04d>`Fs(!*Tf1K~Yu8L=yTpNEz>SwDm<e^%a-MO6AnB-E4PP>Njvr
z2bUq(j5zRW9c;}~`e?l<rsrQ37sQH}i%rVcyrs3rGk&Tx3D0AM)c6W~74Ej=6>$^L
zjcjIHX8b^DIqe?gzWiNLZ3KIeu85>S^|z&cRjsFzhL4p9xAf80l@JQMZ~48r#Dx0m
zRicEm)~>nU(iC4amNINF4y&NCv&YdlUK1lYvs2_w(L#@WUw9|5vt@`@>P@|u$Yka{
zGlkOTSCOt4j7M$bEW!$>E874F$qa)^_6?+C8<H3Yjh*#D@7xC)eI!L~s<*{fi;-|L
zum~VGA#g(Een5EOZykrwRbE3sdq9jTNi81=NE7XUKn!QMOIn;NQM+i#E@ULAP5Yh}
z(6$;uy?GZ_47301)s10#uT@N-HNO=zu)TATyH?E9I}YSA5g#oWEzYAKp0GDi-E?gv
z{V*y<qt@~M(x@#+y+dhkJM!Pk-NHlR2Vv8|rl@B!e-&j>zpCdVdgnnTO%|$em@nw7
zm&iPY2!)aduhleD$EsaaJzbl~Qth-w$AYMq9(wM1EC}r_?k>v$r?v(o6%Kx|_S2zT
zMY&$tSYz{lqEZoit9z{&$H9z-p~1h#N)6TpzOwTc;nNrQTv%czM#~FeQ{*7}=w~9|
zG_0Wu<`QVNzLvPi$x&YEr~3{d-MbsKDuIs<3${goop|%F#kgEJT9zbj$h7QRMrhFX
zAB%=uNJmimd`3<+5SX0W*!Ro1IB9$AQ-b2K#Wit(ALUSVlkn1`>%_zoBgZCKaGFJa
z^U;T!9Zn`-mUd-9XqMxS6`TK`Vx-=E$Q2ET?D*Ci<tqU$C<!tAw_3HCgP~lg+?*wM
z`r*UKmVaF){IvaHaV?w&<Wz&yx!ra#-L)Phxa!lor$s(Jx?WsW)Md(N(MHz0fM&K~
zm3!aCDpk)2zrJB%_1Z~xCq3OJYT@NTR|ZGbYi3nt;d{3DR*G&^3#bZ;2&u8xL8!`>
z-}wWORsRf`7YTlGFU~1ybGNeEAH)3@7sz6SOij-e<o&izQLT@gCF!bR(&1DJ<LZan
zilD93nxJq$UG;O}2l?=H?ZVu6yhAls@7*nY^qZ$ta%YR_t=)xL;Lk-PfOULk?evoU
z7@}A8MofifKZ&{uLoa?(jOGZMHp&qR`aek)a6`|kXwWXi5Iy>sn8T}m{FtcWg2U|w
zVXg2OaoFHxy{q4*Ixjvk)@xHF>B(P+5Xa0DPe8iS3}6oB%7S0&5_7Xdz<mtcO*N7}
zx^oliG{^P<FKyT)E}p1rgT;j}GTRrv*JcpnOZ75YR5ezo>UJ)QMZacnBvgnULva?o
z`}oJC6d$s6sl36JNA)%5pdtA3>u|ufZb=)S7E@7-x}O%qyk-<7uyZIWAFFPx>c`1d
zwo4BPKU<BRI6JSTC)>nWBPKJ$mQz~IbnO!Q<}+dhzOvyN@dbLV6~TYG4}E&Uv!am8
z&B0bNp5{D@!0h6$Uyz<c{H#?3{E%4jR<T0UPMfzb$jEsd3JAj)C2OD?!@|Zp_HK)N
zeOj{~?wK0mQj*l2x>AXeE_GuwQxFp_t|&!~^@C?}#@{#nC)%S>mY&hK9L+pU87pC(
zS54PGU!o#7g>L>ny7y(fpW3#IX?|3Y46@v-Z`>~`^Aq>s4DQBpnUxunu|6fdgf)}c
z(`L!ApRVF+Z&w0?@pn!REI59;yeJxW(KmJ==W*jjYhJcXYTF^M4H>SV{v+l8p2l$*
zEf9*IYL&d<6xFCp?!>t7agUH8)nUczUl12+r)$$%doH2!-J-E1bqt{v+)Cfvts1TB
zo+Z?5mx2m@J7^0TA@rEOXxQjf)!3o=Q&C7CV!LO6IbGALFYB2>t^bb9zV{9}Y(nkv
zOp?%KS_tIipg!PbeNW9Z0*DEKeyACko6eU-s>YekQg$tTGz?O!(nvYDNTB`>jn(fF
z6E(9*Qh6X_|FBGJqaAn1^SNiT02&u?@@rp4|EXpnL9LD9=C<W<qZT~#UC}b$u~$;$
zHk(WJiBP%Za>z||Ec~XJt#Kn0FcVOptynYi72{L=Oeq7)m~kkq9c$N%Fs=EPCTT}V
zR_g6db91{C(hxT|s%-VMpS+G|7a7=S$n9E*>6?EP7in}uCk761+_deh&Mw({j|kF%
zJ@#rf>nJj`X1uHR#!GknL7an@rmE+9c1}Cpvle--mC&3Z+ERDY5#DGI<3kq7$Hmn9
zv~W`M71EW0gtf*^a~9iq1Lj=@%&HRAm88G~2&5}tF(+-yLDAfxn+(A{0UPdVr-C=2
z0i?FG8DvDL`X>?tjY1}p2jkn>HIW3y^S)|<-ur$x-{5`WV8ax`Fl8D|{jnIOrZ6=2
zEiuGnbV?Q<ZcS$UixXXyx^JK^rJo)aPJMXK)wVXZ&)^OYBu_U$x<UDFG;EAaxlq6a
z_g*jkYO5GNVGkro92{V1MHpI5P`Kg8)1u;BTJpLmG8zru;+}d+)aV;#OM3W#xWU%N
z;K~VIaw(UXv@zNe<*NxSw7C;I?%Lb6goE^mDH<(3D9X-caL4BrYRp$gCttQ3^A!xX
z_yw`b`djIace$U?H#Xf!&cBEYPd{56%QHv}wu_Xh>BIG+AXeNXuF~MR-1B5H(=-8M
z9?Aei#fce=f(WnW$v}~jRSmy|J)g!F2u`c;zK4O7N?T_%!Ugc_lfZd)N1ab-rLpk)
zVm{|iYxK%#z#J13#>5vR&x|zuaY0msi>%Jw&?5tKSsP+b?R4*9v64Ej5e52<-MnAy
zZ?2s>`!=eS3j86MM5cON&za5B3G?*x87KTN5*77a)(hDGcFiqEmwTxW|LZ;^>8Brw
zn=oead|1V!jNs{p{(MG&V_@?v0mF>~8oNrHzAe1lVOSp?b_qo8M6tDV&hAHEHRV=f
zV$}P06!3Q4(-`1O4;4Kn9n}4Zqk!h05CJ~*yUkV>yXS-$W20x+NMA`rc|pn>PDG|B
zrScZN@XTy+5xxB>>iFoB5c}qTAT|3=iqX`4nRIci^HH)l_O+8@rj5Vnu|se>eet9i
zg>7q69#W($3kRi6E*N&d!}riJ90)Bqfro*-jeh*2I9iQyoF<2YGX^up=_s|I6k}s}
zWax-S&;CgI`+ex|b4fH@XVjP@Es71H@7O_cjF83j#!qqO6>_X1i@lMeigT%1plX+H
z$vQTl0!$_QIix7M{B;cS)mAApgH=(k%MqZDowA0B2@rz;p+%<UqWO6^caG?n9`UIs
z3llcoAOl4#&w8n^M8owKtIx4b-Pz~+#t`i<1k<rDNdMC#xGHhs^q&o^V8yO>$=P6B
zD2tsD79R)aFS+D7X`I=>>M$hR@`>T16!7r>b|ZG1G?aF4W6;t^0nPIO5y05V-hfFb
zj2tk{?U8Km$7Qfk!D489{)o}lj1;B7)J5c8fnydvYZ<Kn0Fnbf2pi{@1+;H7t~`D@
zo@EzUc<iwjzx+J3;gd!A5hW2Z&`w|W%Mq#in9g%nUpD<%4c*ux^T>S?!vBW$Olzl&
zbF?Z;avts6ikPpfbhsFjpq5GUf!N<%@>kdyv;b_7-`?16i&*5Rvqz{wp8}eE>)W(~
zdshX1UsnkXfBYq`+?99^1cLGmCupveb3wa;oq~4#QkG?iV*69^O5yzHdi};4avq2D
z{|U=-Xkfz}4bR4yvEiWhUr8s``5lD}OzLa9-TaVEic8eMCG=_x>~Fdf0UW#%=QTg-
zE{cYYCos5RPUqNH4&mg}S&xsac~Ud#DzRUp_F-}Y)enc-Uw;muiihK$knFG^4T_-~
zLpdmG=X0_=4FzVv71X_hXYs4G$^Gmj`3MQ6AVhgbA@%r3SxQG9apdVgK2(UgUXdkT
z(2kuj*EoSPz_9csy#pvj($a-s9b5>7T;H2i2tyfo3m%|(;~f(Q+`l&+5P3o!qwqYL
z4vqoXS6=|wqOtNq1|%$`gF(adb6iHK{RLT(39fw5X-IW6JbHjximRCyH8yKTD-@vo
zzXIU@*2^GkGhbgfzd)~QbQ>7AzXbQ4G6fIQ0k@N8jdG-=n(>@!+9zucS~d?~49>$!
zb1>92LJe9hr><u(olT6CLYxbD)>ITu;xOArKfQZCe*W4X0C7!_lhi|Xri(ji%A*5F
zR$rqXdvFN$_7x6n&c`H$o0B*8r;AgkayutVmr;Z;INA@P|2L5zrrnPebL0253WNG@
zIxqoi7z;-N^*mbjt@JFLpzfsB8Tevw29_y^+mqDGN(Y>2QrBk3#Aw)?HU{0<AdA$5
zbhek?UJi{PZIJo4_I5O-UMid-C)*;4h#v$EzC_Re5y#@`Sm?lG7`PgP<T7f$Szb8k
z+!@hty{uz$>clxO9gWCyY44j7eT<JQa~}P<feQiMGD+^>sYU}Q9-qoS+|FD7&^snb
z<FE$<ob#+?d>$2FhqJcJ|AEOPJjpo#fsCm>hX13|hyPyXQ5I@kAe;Z=9XVXD@9`P7
zg^iSL@qZIw(OAn2u2?K{AQP6-{`LHX!)nk=KNrCxk*P0)(!N<Zw|uQzjyCn8TsF_}
z&aIDY>apiBv7NIM5i^M2mJV`$gysE%A3(j>D2sUmc&bMZ#nGKlMKy<JIs0|33glSJ
zJd^u?ksBK1>@JU*m&-|moc-~m8+qhvfH|R>Hy6km^y>R^tf?A@)oqw5ZrkIUdM4h~
z4=R-$Ec=D}o{C0jb(E(iq%~-MX0IKUHArKMeI6(HGG;%9**jt_3&EbH^q>AQXmAXC
zPP|&17Y&!w?#tv?XwNZOXOfb|rb8%BoVYih5Y=W&wlGnD2dI5K*BX-TdHEvReFXxY
zyJP+NrXgqx+*eA~7K~$>GS+gXL34nzujVNj-Df;Za4~W;w!&z%xrx9+F8#TtVcho=
z&?|qJ<C!nIo&r48Z@wZlb+Yk)hhRXa+J8S3!q*3X<YFqK8dC#PG>8>}*iaDjv`E$Q
z)iWLuYiVKc?WeoTHO^ELPm(B8RZ>%@ELTM6$aS*rp_d(ad{pHwyGPl~BEnXSOvd9U
zNLrw>R8bkEqs5kf&_`-+puInl9^UDKOEI_%Q=L5Zx}|F6&t+70+V;2%#O_)u>#;{2
zux+=_t_FVEXgaH}+1UOmyKTqLJwm^AIwpg#zul<&DQubVw8?nxA(ihy`x$=D4U?FR
z=dY$IlNmA<$>*9zsO=l|I;Gmbou3Pdv<(}z*j=|Pj)U&m?B*~x`FxqOVl~f)LWVn@
z?5PE<7u*S2Fk%`hrqy`@<1(hjZ)#^0jE1kXS_2m?y;hb)!*2c<|DonRCvCY?e%jyu
zHRZs5uxIgZ;yF;rFf<-BReSS5pQkL*dGU7n0$T(sndk_`zWr@E4rU*4bFMr(adeaU
zI3gOodC7?n?Y|69*?-zI2FiIcWGbhRYM>mw=DjjAF@0OPc;<Y9M^*hii>bt6b`vyM
zp6aO$wPSrV?^%E01}IJhifwDy==Uq5_dk5u<~i57Ej;wIKcDH~GavZO2mVEUV6Ef-
E0UUB=5dZ)H

diff --git a/src/font/sprite/underline.zig b/src/font/sprite/underline.zig
index 29bf7b251..cd580d141 100644
--- a/src/font/sprite/underline.zig
+++ b/src/font/sprite/underline.zig
@@ -70,8 +70,8 @@ fn drawSingle(alloc: Allocator, width: u32, thickness: u32) !CanvasAndOffset {
     canvas.rect(.{
         .x = 0,
         .y = 0,
-        .width = width,
-        .height = thickness,
+        .width = @floatFromInt(width),
+        .height = @floatFromInt(thickness),
     }, .on);
 
     const offset_y: i32 = 0;
@@ -91,15 +91,15 @@ fn drawDouble(alloc: Allocator, width: u32, thickness: u32) !CanvasAndOffset {
     canvas.rect(.{
         .x = 0,
         .y = 0,
-        .width = width,
-        .height = thickness,
+        .width = @floatFromInt(width),
+        .height = @floatFromInt(thickness),
     }, .on);
 
     canvas.rect(.{
         .x = 0,
-        .y = @intCast(thickness + gap),
-        .width = width,
-        .height = thickness,
+        .y = @floatFromInt(thickness * 2),
+        .width = @floatFromInt(width),
+        .height = @floatFromInt(thickness),
     }, .on);
 
     const offset_y: i32 = -@as(i32, @intCast(thickness));
@@ -121,10 +121,10 @@ fn drawDotted(alloc: Allocator, width: u32, thickness: u32) !CanvasAndOffset {
         const x = @min(i * (dot_width + gap_width), width - 1);
         const rect_width = @min(width - x, dot_width);
         canvas.rect(.{
-            .x = @intCast(x),
+            .x = @floatFromInt(x),
             .y = 0,
-            .width = rect_width,
-            .height = thickness,
+            .width = @floatFromInt(rect_width),
+            .height = @floatFromInt(thickness),
         }, .on);
     }
 
@@ -146,10 +146,10 @@ fn drawDashed(alloc: Allocator, width: u32, thickness: u32) !CanvasAndOffset {
         const x = @min(i * dash_width, width - 1);
         const rect_width = @min(width - x, dash_width);
         canvas.rect(.{
-            .x = @intCast(x),
+            .x = @floatFromInt(x),
             .y = 0,
-            .width = rect_width,
-            .height = thickness,
+            .width = @floatFromInt(rect_width),
+            .height = @floatFromInt(thickness),
         }, .on);
     }
 
diff --git a/vendor/pixman/.editorconfig b/vendor/pixman/.editorconfig
deleted file mode 100644
index b98bc65a7..000000000
--- a/vendor/pixman/.editorconfig
+++ /dev/null
@@ -1,14 +0,0 @@
-# To use this config on you editor, follow the instructions at:
-# http://editorconfig.org
-
-root = true
-
-[*]
-tab_width = 8
-
-[Makefile.*]
-indent_style = tab
-
-[meson.build,meson_options.txt]
-indent_style = space
-indent_size = 2
diff --git a/vendor/pixman/.gitignore b/vendor/pixman/.gitignore
deleted file mode 100644
index 046b161bc..000000000
--- a/vendor/pixman/.gitignore
+++ /dev/null
@@ -1,56 +0,0 @@
-Makefile
-Makefile.in
-.deps
-.libs
-.msg
-*.pc
-*.lo
-*.la
-*.a
-*.o
-*~
-aclocal.m4
-autom4te.cache
-compile
-config.guess
-config.log
-config.status
-config.sub
-configure
-depcomp
-install-sh
-libtool
-ltmain.sh
-missing
-stamp-h?
-config.h
-config.h.in
-.*.swp
-demos/*-test
-demos/checkerboard
-demos/clip-in
-demos/linear-gradient
-demos/quad2quad
-demos/scale
-demos/dither
-pixman/pixman-srgb.c
-pixman/pixman-version.h
-test/*-test
-test/affine-bench
-test/alpha-loop
-test/alphamap
-test/check-formats
-test/clip-in
-test/composite
-test/infinite-loop
-test/lowlevel-blt-bench
-test/radial-invalid
-test/region-translate
-test/scaling-bench
-test/trap-crasher
-*.pdb
-*.dll
-*.lib
-*.ilk
-*.obj
-*.exe
diff --git a/vendor/pixman/.gitlab-ci.yml b/vendor/pixman/.gitlab-ci.yml
deleted file mode 100644
index 25c73df12..000000000
--- a/vendor/pixman/.gitlab-ci.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-image: fedora:28
-
-autotools-build:
-    script:
-    - dnf -y install dnf-plugins-core
-    - dnf -y groupinstall buildsys-build
-    - dnf -y builddep pixman
-    - ./autogen.sh
-    - make -sj4 check
-
-meson-build:
-    script:
-    - dnf -y install dnf-plugins-core
-    - dnf -y groupinstall buildsys-build
-    - dnf -y builddep pixman
-    - dnf -y install ninja-build
-    - python3 -m pip install meson>=0.52.1
-    - meson build
-    - ninja -C build test
diff --git a/vendor/pixman/AUTHORS b/vendor/pixman/AUTHORS
deleted file mode 100644
index e69de29bb..000000000
diff --git a/vendor/pixman/CODING_STYLE b/vendor/pixman/CODING_STYLE
deleted file mode 100644
index 9f5171d10..000000000
--- a/vendor/pixman/CODING_STYLE
+++ /dev/null
@@ -1,199 +0,0 @@
-Pixman coding style.
-====================
-
-The pixman coding style is close to cairo's with one exception: braces
-go on their own line, rather than on the line of the if/while/for:
-
-	if (condition)
-	{
-	    do_something();
-	    do_something_else();
-	}
-
-not
-
-	if (condition) {
-	    do_something();
-	    do_something_else();
-        }
-
-
-
-Indentation
-===========
-
-Each new level is indented four spaces:
-
-	if (condition)
-	    do_something();
-
-This may be achieved with space characters or with a combination of
-tab characters and space characters. Tab characters are interpreted as
-
-	Advance to the next column which is a multiple of 8.
-
-
-Names
-=====
-
-In all names, words are separated with underscores. Do not use
-CamelCase for any names.
-
-Macros have ALL_CAPITAL_NAMES
-
-Type names are in lower case and end with "_t". For example
-pixman_image_t.
-
-Labels, functions and variables have lower case names.
-
-
-Braces
-======
-
-Braces always go on their own line:
-
-	if (condition)
-	{
-	    do_this ();
-	    do_that ();
-	}
-	else
-	{
-	    do_the_other ();
-	}
-
-Rules for braces and substatements of if/while/for/do:
-
-* If a substatement spans multiple lines, then there must be braces
-  around it.
-
-* If the condition of an if/while/for spans multiple lines, then 
-  braces must be used for the substatements.
-
-* If one substatement of an if statement has braces, then the other
-  must too.
-
-* Otherwise, don't add braces.
-
-
-Comments
-========
-
-For comments either like this:
-
-        /* One line comment */
-
-or like this:
-
-	/* This is a multi-line comment
-	 *
-         * It extends over multiple lines
-	 */
-
-Generally comments should say things that aren't clear from the code
-itself. If too many comments say obvious things, then people will just
-stop reading all comments, including the good ones.
-
-
-Whitespace
-==========
-
-* Put a single space after commas
-
-* Put spaces around arithmetic operators such a +, -, *, /:
-
-        y * stride + x
-
-        x / unit_x
-
-* Do not put spaces after the address-of operator, the * when used as
-  a pointer derefernce or the ! and ~ operators:
-
-     &foo;
-
-     ~0x00000000
-
-     !condition
-
-     *result = 100
-
-* Break up long lines (> ~80 characters) and use whitespace to align
-  things nicely. This is one way:
-
-  	 some_very_long_function name (
-	 	implementation, op, src, mask, dest, 
-		src_x, src_y, mask_x, mask_y, dest_x, dest_y,
-		width, height);
-
-  This is another:
-
-        some_very_long_function_name (implementation, op,
-                                      src, mask, dest,
-				      src_x, src_y,
-				      mask_x, mask_y,
-				      dest_x, dest_y,
-				      width, height);
-
-* Separate logically distinct chunks with a single newline. This
-  obviously applies between functions, but also applies within a
-  function or block or structure definition.
-
-* Use a newline after a block of variable declarations.
-
-* Use a single space before a left parenthesis, except where the
-  standard will not allow it, (eg. when defining a parameterized macro).
-
-* Don't eliminate newlines just because things would still fit on one
-  line. This breaks the expected visual structure of the code making
-  it much harder to read and understand:
-
-	if (condition) foo (); else bar ();	/* Yuck! */
-
-
-Function Definitions
-====================
-
-Function definitions should take the following form:
-
-	void
-	my_function (int argument)
-	{
-	    do_my_things ();
-	}
-
-If all the parameters to a function fit naturally on one line, format
-them that way. Otherwise, put one argument on each line, adding
-whitespace so that the parameter names are aligned with each other.
-
-I.e., do either this:
-
-        void
-        short_arguments (const char *str, int x, int y, int z)
-        {
-        }
-
-or this:
-
-	void
-	long_arguments (const char *char_star_arg,
-			int	    int_arg,
-			double	   *double_star_arg,
-			double	    double_arg)
-	{
-	}
-
-
-Mode lines
-==========
-
-Given the rules above, what is the best way to simplify one's life as
-a code monkey? Get your editor to do most of the tedious work of
-beautifying your code!
-
-As a reward for reading this far, here are some mode lines for the more
-popular editors:
-/*
- * vim:sw=4:sts=4:ts=8:tw=78:fo=tcroq:cindent:cino=\:0,(0
- * vim:isk=a-z,A-Z,48-57,_,.,-,>
- */
-
diff --git a/vendor/pixman/COPYING b/vendor/pixman/COPYING
deleted file mode 100644
index 6168dea56..000000000
--- a/vendor/pixman/COPYING
+++ /dev/null
@@ -1,42 +0,0 @@
-The following is the MIT license, agreed upon by most contributors.
-Copyright holders of new code should use this license statement where
-possible. They may also add themselves to the list below.
-
-/*
- * Copyright 1987, 1988, 1989, 1998  The Open Group
- * Copyright 1987, 1988, 1989 Digital Equipment Corporation
- * Copyright 1999, 2004, 2008 Keith Packard
- * Copyright 2000 SuSE, Inc.
- * Copyright 2000 Keith Packard, member of The XFree86 Project, Inc.
- * Copyright 2004, 2005, 2007, 2008, 2009, 2010 Red Hat, Inc.
- * Copyright 2004 Nicholas Miell
- * Copyright 2005 Lars Knoll & Zack Rusin, Trolltech
- * Copyright 2005 Trolltech AS
- * Copyright 2007 Luca Barbato
- * Copyright 2008 Aaron Plattner, NVIDIA Corporation
- * Copyright 2008 Rodrigo Kumpera
- * Copyright 2008 André Tupinambá
- * Copyright 2008 Mozilla Corporation
- * Copyright 2008 Frederic Plourde
- * Copyright 2009, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2009, 2010 Nokia Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
diff --git a/vendor/pixman/ChangeLog b/vendor/pixman/ChangeLog
deleted file mode 100644
index e69de29bb..000000000
diff --git a/vendor/pixman/INSTALL b/vendor/pixman/INSTALL
deleted file mode 100644
index 5458714e1..000000000
--- a/vendor/pixman/INSTALL
+++ /dev/null
@@ -1,234 +0,0 @@
-Installation Instructions
-*************************
-
-Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
-2006 Free Software Foundation, Inc.
-
-This file is free documentation; the Free Software Foundation gives
-unlimited permission to copy, distribute and modify it.
-
-Basic Installation
-==================
-
-Briefly, the shell commands `./configure; make; make install' should
-configure, build, and install this package.  The following
-more-detailed instructions are generic; see the `README' file for
-instructions specific to this package.
-
-   The `configure' shell script attempts to guess correct values for
-various system-dependent variables used during compilation.  It uses
-those values to create a `Makefile' in each directory of the package.
-It may also create one or more `.h' files containing system-dependent
-definitions.  Finally, it creates a shell script `config.status' that
-you can run in the future to recreate the current configuration, and a
-file `config.log' containing compiler output (useful mainly for
-debugging `configure').
-
-   It can also use an optional file (typically called `config.cache'
-and enabled with `--cache-file=config.cache' or simply `-C') that saves
-the results of its tests to speed up reconfiguring.  Caching is
-disabled by default to prevent problems with accidental use of stale
-cache files.
-
-   If you need to do unusual things to compile the package, please try
-to figure out how `configure' could check whether to do them, and mail
-diffs or instructions to the address given in the `README' so they can
-be considered for the next release.  If you are using the cache, and at
-some point `config.cache' contains results you don't want to keep, you
-may remove or edit it.
-
-   The file `configure.ac' (or `configure.in') is used to create
-`configure' by a program called `autoconf'.  You need `configure.ac' if
-you want to change it or regenerate `configure' using a newer version
-of `autoconf'.
-
-The simplest way to compile this package is:
-
-  1. `cd' to the directory containing the package's source code and type
-     `./configure' to configure the package for your system.
-
-     Running `configure' might take a while.  While running, it prints
-     some messages telling which features it is checking for.
-
-  2. Type `make' to compile the package.
-
-  3. Optionally, type `make check' to run any self-tests that come with
-     the package.
-
-  4. Type `make install' to install the programs and any data files and
-     documentation.
-
-  5. You can remove the program binaries and object files from the
-     source code directory by typing `make clean'.  To also remove the
-     files that `configure' created (so you can compile the package for
-     a different kind of computer), type `make distclean'.  There is
-     also a `make maintainer-clean' target, but that is intended mainly
-     for the package's developers.  If you use it, you may have to get
-     all sorts of other programs in order to regenerate files that came
-     with the distribution.
-
-Compilers and Options
-=====================
-
-Some systems require unusual options for compilation or linking that the
-`configure' script does not know about.  Run `./configure --help' for
-details on some of the pertinent environment variables.
-
-   You can give `configure' initial values for configuration parameters
-by setting variables in the command line or in the environment.  Here
-is an example:
-
-     ./configure CC=c99 CFLAGS=-g LIBS=-lposix
-
-   *Note Defining Variables::, for more details.
-
-Compiling For Multiple Architectures
-====================================
-
-You can compile the package for more than one kind of computer at the
-same time, by placing the object files for each architecture in their
-own directory.  To do this, you can use GNU `make'.  `cd' to the
-directory where you want the object files and executables to go and run
-the `configure' script.  `configure' automatically checks for the
-source code in the directory that `configure' is in and in `..'.
-
-   With a non-GNU `make', it is safer to compile the package for one
-architecture at a time in the source code directory.  After you have
-installed the package for one architecture, use `make distclean' before
-reconfiguring for another architecture.
-
-Installation Names
-==================
-
-By default, `make install' installs the package's commands under
-`/usr/local/bin', include files under `/usr/local/include', etc.  You
-can specify an installation prefix other than `/usr/local' by giving
-`configure' the option `--prefix=PREFIX'.
-
-   You can specify separate installation prefixes for
-architecture-specific files and architecture-independent files.  If you
-pass the option `--exec-prefix=PREFIX' to `configure', the package uses
-PREFIX as the prefix for installing programs and libraries.
-Documentation and other data files still use the regular prefix.
-
-   In addition, if you use an unusual directory layout you can give
-options like `--bindir=DIR' to specify different values for particular
-kinds of files.  Run `configure --help' for a list of the directories
-you can set and what kinds of files go in them.
-
-   If the package supports it, you can cause programs to be installed
-with an extra prefix or suffix on their names by giving `configure' the
-option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
-
-Optional Features
-=================
-
-Some packages pay attention to `--enable-FEATURE' options to
-`configure', where FEATURE indicates an optional part of the package.
-They may also pay attention to `--with-PACKAGE' options, where PACKAGE
-is something like `gnu-as' or `x' (for the X Window System).  The
-`README' should mention any `--enable-' and `--with-' options that the
-package recognizes.
-
-   For packages that use the X Window System, `configure' can usually
-find the X include and library files automatically, but if it doesn't,
-you can use the `configure' options `--x-includes=DIR' and
-`--x-libraries=DIR' to specify their locations.
-
-Specifying the System Type
-==========================
-
-There may be some features `configure' cannot figure out automatically,
-but needs to determine by the type of machine the package will run on.
-Usually, assuming the package is built to be run on the _same_
-architectures, `configure' can figure that out, but if it prints a
-message saying it cannot guess the machine type, give it the
-`--build=TYPE' option.  TYPE can either be a short name for the system
-type, such as `sun4', or a canonical name which has the form:
-
-     CPU-COMPANY-SYSTEM
-
-where SYSTEM can have one of these forms:
-
-     OS KERNEL-OS
-
-   See the file `config.sub' for the possible values of each field.  If
-`config.sub' isn't included in this package, then this package doesn't
-need to know the machine type.
-
-   If you are _building_ compiler tools for cross-compiling, you should
-use the option `--target=TYPE' to select the type of system they will
-produce code for.
-
-   If you want to _use_ a cross compiler, that generates code for a
-platform different from the build platform, you should specify the
-"host" platform (i.e., that on which the generated programs will
-eventually be run) with `--host=TYPE'.
-
-Sharing Defaults
-================
-
-If you want to set default values for `configure' scripts to share, you
-can create a site shell script called `config.site' that gives default
-values for variables like `CC', `cache_file', and `prefix'.
-`configure' looks for `PREFIX/share/config.site' if it exists, then
-`PREFIX/etc/config.site' if it exists.  Or, you can set the
-`CONFIG_SITE' environment variable to the location of the site script.
-A warning: not all `configure' scripts look for a site script.
-
-Defining Variables
-==================
-
-Variables not defined in a site shell script can be set in the
-environment passed to `configure'.  However, some packages may run
-configure again during the build, and the customized values of these
-variables may be lost.  In order to avoid this problem, you should set
-them in the `configure' command line, using `VAR=value'.  For example:
-
-     ./configure CC=/usr/local2/bin/gcc
-
-causes the specified `gcc' to be used as the C compiler (unless it is
-overridden in the site shell script).
-
-Unfortunately, this technique does not work for `CONFIG_SHELL' due to
-an Autoconf bug.  Until the bug is fixed you can use this workaround:
-
-     CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
-
-`configure' Invocation
-======================
-
-`configure' recognizes the following options to control how it operates.
-
-`--help'
-`-h'
-     Print a summary of the options to `configure', and exit.
-
-`--version'
-`-V'
-     Print the version of Autoconf used to generate the `configure'
-     script, and exit.
-
-`--cache-file=FILE'
-     Enable the cache: use and save the results of the tests in FILE,
-     traditionally `config.cache'.  FILE defaults to `/dev/null' to
-     disable caching.
-
-`--config-cache'
-`-C'
-     Alias for `--cache-file=config.cache'.
-
-`--quiet'
-`--silent'
-`-q'
-     Do not print messages saying which checks are being made.  To
-     suppress all normal output, redirect it to `/dev/null' (any error
-     messages will still be shown).
-
-`--srcdir=DIR'
-     Look for the package's source code in directory DIR.  Usually
-     `configure' can determine that directory automatically.
-
-`configure' also accepts some other, not widely useful, options.  Run
-`configure --help' for more details.
-
diff --git a/vendor/pixman/Makefile.am b/vendor/pixman/Makefile.am
deleted file mode 100644
index 048fc8d8a..000000000
--- a/vendor/pixman/Makefile.am
+++ /dev/null
@@ -1,143 +0,0 @@
-SUBDIRS = pixman demos test
-
-pkgconfigdir=$(libdir)/pkgconfig
-pkgconfig_DATA=pixman-1.pc
-
-$(pkgconfig_DATA): pixman-1.pc.in
-
-snapshot:
-	distdir="$(distdir)-`date '+%Y%m%d'`"; \
-	test -d "$(srcdir)/.git" && distdir=$$distdir-`cd "$(srcdir)" && git rev-parse HEAD | cut -c 1-6`; \
-	$(MAKE) $(AM_MAKEFLAGS) distdir="$$distdir" dist
-
-GPGKEY=3892336E
-USERNAME=$$USER
-RELEASE_OR_SNAPSHOT = $$(if test "x$(PIXMAN_VERSION_MINOR)" = "x$$(echo "$(PIXMAN_VERSION_MINOR)/2*2" | bc)" ; then echo release; else echo snapshot; fi)
-RELEASE_CAIRO_HOST =	$(USERNAME)@cairographics.org
-RELEASE_CAIRO_DIR =	/srv/cairo.freedesktop.org/www/$(RELEASE_OR_SNAPSHOT)s
-RELEASE_CAIRO_URL = 	https://cairographics.org/$(RELEASE_OR_SNAPSHOT)s
-RELEASE_XORG_URL =	https://www.x.org/releases/individual/lib
-RELEASE_XORG_HOST =	$(USERNAME)@xorg.freedesktop.org
-RELEASE_XORG_DIR =	/srv/xorg.freedesktop.org/archive/individual/lib
-RELEASE_ANNOUNCE_LIST = cairo-announce@cairographics.org, xorg-announce@lists.freedesktop.org, pixman@lists.freedesktop.org
-
-EXTRA_DIST =				\
-	Makefile.win32			\
-	Makefile.win32.common		\
-	meson.build			\
-	meson_options.txt		\
-	neon-test.S			\
-	a64-neon-test.S			\
-	arm-simd-test.S			\
-	$(NULL)
-
-tar_gz = $(PACKAGE)-$(VERSION).tar.gz
-tar_xz = $(PACKAGE)-$(VERSION).tar.xz
-
-sha512_tgz = $(tar_gz).sha512
-sha256_tgz = $(tar_gz).sha256
-
-sha512_txz = $(tar_xz).sha512
-sha256_txz = $(tar_xz).sha256
-
-gpg_file = $(sha512_tgz).asc
-
-$(sha512_tgz): $(tar_gz)
-	sha512sum $^ > $@
-
-$(sha256_tgz): $(tar_gz)
-	sha256sum $^ > $@
-
-$(sha512_txz): $(tar_xz)
-	sha512sum $^ > $@
-
-$(sha256_txz): $(tar_xz)
-	sha256sum $^ > $@
-
-$(gpg_file): $(sha512_tgz)
-	@echo "Please enter your GPG password to sign the checksum."
-	gpg --armor --sign $^ 
-
-HASHFILES = $(sha512_tgz) $(sha512_txz) $(sha256_tgz) $(sha256_txz)
-
-release-verify-newer:
-	@echo -n "Checking that no $(VERSION) release already exists at $(RELEASE_XORG_HOST)..."
-	@ssh $(RELEASE_XORG_HOST) test ! -e $(RELEASE_XORG_DIR)/$(tar_gz) \
-		|| (echo "Ouch." && echo "Found: $(RELEASE_XORG_HOST):$(RELEASE_XORG_DIR)/$(tar_gz)" \
-		&& echo "Refusing to try to generate a new release of the same name." \
-		&& false)
-	@ssh $(RELEASE_CAIRO_HOST) test ! -e $(RELEASE_CAIRO_DIR)/$(tar_gz) \
-		|| (echo "Ouch." && echo "Found: $(RELEASE_CAIRO_HOST):$(RELEASE_CAIRO_DIR)/$(tar_gz)" \
-		&& echo "Refusing to try to generate a new release of the same name." \
-		&& false)
-	@echo "Good."
-
-release-remove-old:
-	$(RM) $(tar_gz) $(tar_xz) $(HASHFILES) $(gpg_file)
-
-ensure-prev:
-	@if [ "$(PREV)" = "" ]; then							\
-		echo ""							          &&	\
-		echo "You must set the PREV variable on the make command line to" &&	\
-		echo "the last version."				  	  &&	\
-		echo ""								  &&	\
-		echo "For example:"						  &&	\
-		echo "      make PREV=0.7.3"				  	  &&	\
-		echo ""								  &&	\
-		false;									\
-	fi
-
-release-check: ensure-prev release-verify-newer release-remove-old distcheck
-
-release-tag:
-	git tag -u $(GPGKEY) -m "$(PACKAGE) $(VERSION) release" $(PACKAGE)-$(VERSION)
-
-release-upload: release-check $(tar_gz) $(tar_xz) $(sha512_tgz) $(sha512_txz) $(sha256_tgz) $(gpg_file)
-	scp $(tar_gz) $(sha512_tgz) $(gpg_file) $(RELEASE_CAIRO_HOST):$(RELEASE_CAIRO_DIR)
-	scp $(tar_gz) $(tar_xz) $(RELEASE_XORG_HOST):$(RELEASE_XORG_DIR)
-	ssh $(RELEASE_CAIRO_HOST) "rm -f $(RELEASE_CAIRO_DIR)/LATEST-$(PACKAGE)-[0-9]* && ln -s $(tar_gz) $(RELEASE_CAIRO_DIR)/LATEST-$(PACKAGE)-$(VERSION)"
-
-RELEASE_TYPE = $$(if test "x$(PIXMAN_VERSION_MINOR)" = "x$$(echo "$(PIXMAN_VERSION_MINOR)/2*2" | bc)" ; then echo "stable release in the" ; else echo "development snapshot leading up to a stable"; fi)
-
-release-publish-message: $(HASHFILES) ensure-prev
-	@echo "Please follow the instructions in RELEASING to push stuff out and"
-	@echo "send out the announcement mails.  Here is the excerpt you need:"
-	@echo ""
-	@echo "Lists:  $(RELEASE_ANNOUNCE_LIST)"
-	@echo "Subject: [ANNOUNCE] $(PACKAGE) release $(VERSION) now available"
-	@echo "============================== CUT HERE =============================="
-	@echo "A new $(PACKAGE) release $(VERSION) is now available. This is a $(RELEASE_TYPE)"
-	@echo ""
-	@echo "tar.gz:"
-	@echo "	$(RELEASE_CAIRO_URL)/$(tar_gz)"
-	@echo "	$(RELEASE_XORG_URL)/$(tar_gz)"
-	@echo ""
-	@echo "tar.xz:"
-	@echo "	$(RELEASE_XORG_URL)/$(tar_xz)"
-	@echo ""
-	@echo "Hashes:"
-	@echo -n "	SHA256: "
-	@cat $(sha256_tgz)
-	@echo -n "	SHA256: "
-	@cat $(sha256_txz)
-	@echo -n "	SHA512: "
-	@cat $(sha512_tgz)
-	@echo -n "	SHA512: "
-	@cat $(sha512_txz)
-	@echo ""
-	@echo "GPG signature:"
-	@echo "	$(RELEASE_CAIRO_URL)/$(gpg_file)"
-	@echo "	(signed by`gpg --list-keys $(GPGKEY) | grep uid | cut -b4- | tr -s " "`)"
-	@echo ""
-	@echo "Git:"
-	@echo "	https://gitlab.freedesktop.org/pixman/pixman.git"
-	@echo "	tag: $(PACKAGE)-$(VERSION)"
-	@echo ""
-	@echo "Log:"
-	@git log --no-merges "$(PACKAGE)-$(PREV)".."$(PACKAGE)-$(VERSION)" | git shortlog | awk '{ printf "\t"; print ; }' | cut -b1-80
-	@echo "============================== CUT HERE =============================="
-	@echo ""
-
-release-publish: release-upload release-tag release-publish-message
-
-.PHONY: release-upload release-publish release-publish-message release-tag
diff --git a/vendor/pixman/Makefile.win32 b/vendor/pixman/Makefile.win32
deleted file mode 100644
index c3ca3bc59..000000000
--- a/vendor/pixman/Makefile.win32
+++ /dev/null
@@ -1,25 +0,0 @@
-default: all
-
-top_srcdir = .
-include $(top_srcdir)/Makefile.win32.common
-
-all: pixman test
-
-pixman:
-	@$(MAKE) -C pixman -f Makefile.win32
-
-test:
-	@$(MAKE) -C test -f Makefile.win32
-
-clean_r:
-	@$(MAKE) -C pixman -f Makefile.win32 clean
-	@$(MAKE) -C test   -f Makefile.win32 clean
-
-check:
-	@$(MAKE) -C test -f Makefile.win32 check
-
-
-clean: clean_r
-
-
-.PHONY: all pixman test clean check
diff --git a/vendor/pixman/Makefile.win32.common b/vendor/pixman/Makefile.win32.common
deleted file mode 100644
index 1b2f89487..000000000
--- a/vendor/pixman/Makefile.win32.common
+++ /dev/null
@@ -1,73 +0,0 @@
-LIBRARY = pixman-1
-
-ifeq ($(shell echo ""),)
-# POSIX style shell
-mkdir_p = mkdir -p $1
-rm = $(RM) $1
-echo = echo "$1"
-else
-# DOS/Windows style shell
-mkdir_p = if not exist $(subst /,\,$1) md $(subst /,\,$1)
-echo = echo $1
-rm = del $(subst /,\,$1)
-endif
-
-CC = cl
-LD = link
-AR = lib
-PERL = perl
-
-ifneq ($(shell echo ""),)
-RM = del
-endif
-
-ifeq ($(top_builddir),)
-top_builddir = $(top_srcdir)
-endif
-
-CFG_VAR = $(CFG)
-ifeq ($(CFG_VAR),)
-CFG_VAR = release
-endif
-
-ifeq ($(CFG_VAR),debug)
-CFG_CFLAGS  = -MDd -Od -Zi
-CFG_LDFLAGS = -DEBUG
-else
-CFG_CFLAGS  = -MD -O2
-CFG_LDFLAGS =
-endif
-
-# Package definitions, to be used instead of those provided in config.h
-PKG_CFLAGS  = -DPACKAGE=$(LIBRARY) -DPACKAGE_VERSION="" -DPACKAGE_BUGREPORT=""
-
-BASE_CFLAGS = -nologo -I. -I$(top_srcdir) -I$(top_srcdir)/pixman
-
-PIXMAN_CFLAGS  = $(BASE_CFLAGS) $(PKG_CFLAGS) $(CFG_CFLAGS) $(CFLAGS)
-PIXMAN_LDFLAGS = -nologo $(CFG_LDFLAGS) $(LDFLAGS)
-PIXMAN_ARFLAGS = -nologo $(LDFLAGS)
-
-
-inform:
-ifneq ($(CFG),release)
-ifneq ($(CFG),debug)
-ifneq ($(CFG),)
-	@echo "Invalid specified configuration option: "$(CFG)"."
-	@echo
-	@echo "Possible choices for configuration are 'release' and 'debug'"
-	@exit 1
-endif
-	@echo "Using default RELEASE configuration... (use CFG=release or CFG=debug)"
-endif
-endif
-
-$(CFG_VAR):
-	@$(call mkdir_p,$@)
-
-$(CFG_VAR)/%.obj: %.c $(libpixman_headers) | $(CFG_VAR)
-	$(CC) -c $(PIXMAN_CFLAGS) -Fo"$@" $<
-
-clean: inform $(CFG_VAR)
-	-$(call rm,$(CFG_VAR)/*.exe $(CFG_VAR)/*.ilk $(CFG_VAR)/*.lib $(CFG_VAR)/*.obj $(CFG_VAR)/*.pdb)
-
-.PHONY: inform clean
diff --git a/vendor/pixman/NEWS b/vendor/pixman/NEWS
deleted file mode 100644
index e69de29bb..000000000
diff --git a/vendor/pixman/README b/vendor/pixman/README
deleted file mode 100644
index 961a8529b..000000000
--- a/vendor/pixman/README
+++ /dev/null
@@ -1,140 +0,0 @@
-Pixman
-======
-
-Pixman is a library that provides low-level pixel manipulation
-features such as image compositing and trapezoid rasterization.
-
-Questions should be directed to the pixman mailing list:
-
-    https://lists.freedesktop.org/mailman/listinfo/pixman
-
-You can also file bugs at
-
-    https://gitlab.freedesktop.org/pixman/pixman/-/issues/new
-
-or submit improvements in form of a Merge Request via
-
-    https://gitlab.freedesktop.org/pixman/pixman/-/merge_requests
-
-For real time discussions about pixman, feel free to join the IRC
-channels #cairo and #xorg-devel on the FreeNode IRC network.
-
-
-Contributing
-------------
-
-In order to contribute to pixman, you will need a working knowledge of
-the git version control system. For a quick getting started guide,
-there is the "Everyday Git With 20 Commands Or So guide"
-
-    https://www.kernel.org/pub/software/scm/git/docs/everyday.html
-
-from the Git homepage. For more in depth git documentation, see the
-resources on the Git community documentation page:
-
-    https://git-scm.com/documentation
-
-Pixman uses the infrastructure from the freedesktop.org umbrella
-project. For instructions about how to use the git service on
-freedesktop.org, see:
-
-    https://www.freedesktop.org/wiki/Infrastructure/git/Developers
-
-The Pixman master repository can be found at:
-
-    https://gitlab.freedesktop.org/pixman/pixman
-
-
-Sending patches
----------------
-
-Patches should be submitted in form of Merge Requests via Gitlab.
-
-You will first need to create a fork of the main pixman repository at
-
-    https://gitlab.freedesktop.org/pixman/pixman
-
-via the Fork button on the top right. Once that is done you can add your
-personal repository as a remote to your local pixman development git checkout:
-
-    git remote add my-gitlab git@gitlab.freedesktop.org:YOURUSERNAME/pixman.git
-
-    git fetch my-gitlab
-
-Make sure to have added ssh keys to your gitlab profile at
-
-    https://gitlab.freedesktop.org/profile/keys
-
-Once that is set up, the general workflow for sending patches is to create a
-new local branch with your improvements and once it's ready push it to your
-personal pixman fork:
-
-    git checkout -b fix-some-bug
-    ...
-    git push my-gitlab
-
-The output of the `git push` command will include a link that allows you to
-create a Merge Request against the official pixman repository.
-
-Whenever you make changes to your branch (add new commits or fix up commits)
-you push them back to your personal pixman fork:
-
-    git push -f my-gitlab
-
-If there is an open Merge Request Gitlab will automatically pick up the
-changes from your branch and pixman developers can review them anew.
-
-In order for your patches to be accepted, please consider the
-following guidelines:
-
- - At each point in the series, pixman should compile and the test
-   suite should pass.
-
-   The exception here is if you are changing the test suite to
-   demonstrate a bug. In this case, make one commit that makes the
-   test suite fail due to the bug, and then another commit that fixes
-   the bug.
-
-   You can run the test suite with 
-
-        make check
-
-   if you built pixman with autotools or
-
-       meson test -C builddir
-
-   if you built pixman with meson.
-
-   It will take around two minutes to run on a modern PC.
-
- - Follow the coding style described in the CODING_STYLE file
-
- - For bug fixes, include an update to the test suite to make sure
-   the bug doesn't reappear.
-
- - For new features, add tests of the feature to the test
-   suite. Also, add a program demonstrating the new feature to the
-   demos/ directory.
-
- - Write descriptive commit messages. Useful information to include:
-        - Benchmark results, before and after
-	- Description of the bug that was fixed
-	- Detailed rationale for any new API
-	- Alternative approaches that were rejected (and why they
-          don't work)
-	- If review comments were incorporated, a brief version
-          history describing what those changes were.
-
- - For big patch series, write an introductory post with an overall
-   description of the patch series, including benchmarks and
-   motivation. Each commit message should still be descriptive and
-   include enough information to understand why this particular commit
-   was necessary.
-
-Pixman has high standards for code quality and so almost everybody
-should expect to have the first versions of their patches rejected.
-
-If you think that the reviewers are wrong about something, or that the
-guidelines above are wrong, feel free to discuss the issue. The purpose
-of the guidelines and code review is to ensure high code quality; it is
-not an exercise in compliance.
diff --git a/vendor/pixman/RELEASING b/vendor/pixman/RELEASING
deleted file mode 100644
index e104bda9f..000000000
--- a/vendor/pixman/RELEASING
+++ /dev/null
@@ -1,59 +0,0 @@
-Here are the steps to follow to create a new pixman release:
-
-1) Ensure that there are no uncommitted changes or unpushed commits,
-   and that you are up to date with the latest commits in the central
-   repository. Here are a couple of useful commands:
-
-	git diff			(no output)
-	
-	git status			(should report "nothing to commit")
-
-	git log master...origin		(no output; note: *3* dots)
-
-2) Increment pixman_(major|minor|micro) in configure.ac and meson.build
-   according to the directions in those files.
-
-3) Make sure that new version works, including
-
-	- make distcheck passes
-
-	- the X server still works with the new pixman version
-	  installed
-
-	- the cairo test suite hasn't gained any new failures compared
-	  to last pixman version.
-
-4) Use "git commit" to record the changes made in step 2 and 3.
-
-5) Generate and publish the tar files by running 
-
-	make PREV=<last version> GPGKEY=<your gpg key id> release-publish
-
-   If your freedesktop user name is different from your local one,
-   then also set the variable USER to your freedesktop user name.
-
-6) Run 
-
-	make release-publish-message
-
-   to generate a draft release announcement. Edit it as appropriate and
-   send it to 
-
-	cairo-announce@cairographics.org
-
-	pixman@lists.freedesktop.org
-
-	xorg-announce@lists.freedesktop.org
-
-7) Increment pixman_micro to the next larger (odd) number in
-   configure.ac. Commit this change, and push all commits created
-   during this process using
-
-	git push
-	git push --tags
-
-   You must use "--tags" here; otherwise the new tag will not
-   be pushed out.
-
-8) Change the topic of the #cairo IRC channel on freenode to advertise
-   the new version.
diff --git a/vendor/pixman/a64-neon-test.S b/vendor/pixman/a64-neon-test.S
deleted file mode 100644
index 5d4a4eaa9..000000000
--- a/vendor/pixman/a64-neon-test.S
+++ /dev/null
@@ -1,5 +0,0 @@
-.text
-.arch armv8-a
-.altmacro
-prfm pldl2strm, [x0]
-xtn v0.8b, v0.8h
diff --git a/vendor/pixman/arm-simd-test.S b/vendor/pixman/arm-simd-test.S
deleted file mode 100644
index 910c814d6..000000000
--- a/vendor/pixman/arm-simd-test.S
+++ /dev/null
@@ -1,10 +0,0 @@
-.text
-.arch armv6
-.object_arch armv4
-.arm
-.altmacro
-#ifndef __ARM_EABI__
-#error EABI is required (to be sure that calling conventions are compatible)
-#endif
-pld [r0]
-uqadd8 r0, r0, r0
diff --git a/vendor/pixman/autogen.sh b/vendor/pixman/autogen.sh
deleted file mode 100755
index fc34bd55c..000000000
--- a/vendor/pixman/autogen.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#! /bin/sh
-
-srcdir=`dirname $0`
-test -z "$srcdir" && srcdir=.
-
-ORIGDIR=`pwd`
-cd $srcdir
-
-autoreconf -v --install || exit 1
-cd $ORIGDIR || exit $?
-
-if test -z "$NOCONFIGURE"; then
-    $srcdir/configure "$@"
-fi
diff --git a/vendor/pixman/configure.ac b/vendor/pixman/configure.ac
deleted file mode 100644
index b81d89843..000000000
--- a/vendor/pixman/configure.ac
+++ /dev/null
@@ -1,1199 +0,0 @@
-dnl  Copyright 2005 Red Hat, Inc.
-dnl 
-dnl  Permission to use, copy, modify, distribute, and sell this software and its
-dnl  documentation for any purpose is hereby granted without fee, provided that
-dnl  the above copyright notice appear in all copies and that both that
-dnl  copyright notice and this permission notice appear in supporting
-dnl  documentation, and that the name of Red Hat not be used in
-dnl  advertising or publicity pertaining to distribution of the software without
-dnl  specific, written prior permission.  Red Hat makes no
-dnl  representations about the suitability of this software for any purpose.  It
-dnl  is provided "as is" without express or implied warranty.
-dnl 
-dnl  RED HAT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
-dnl  INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
-dnl  EVENT SHALL RED HAT BE LIABLE FOR ANY SPECIAL, INDIRECT OR
-dnl  CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
-dnl  DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
-dnl  TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
-dnl  PERFORMANCE OF THIS SOFTWARE.
-dnl
-dnl Process this file with autoconf to create configure.
-
-AC_PREREQ([2.57])
-
-#   Pixman versioning scheme
-#
-#   - The version in git has an odd MICRO version number
-#
-#   - Released versions, both development and stable, have an
-#     even MICRO version number
-#
-#   - Released development versions have an odd MINOR number
-#
-#   - Released stable versions have an even MINOR number
-#
-#   - Versions that break ABI must have a new MAJOR number
-#
-#   - If you break the ABI, then at least this must be done:
-#
-#        - increment MAJOR
-#
-#        - In the first development release where you break ABI, find
-#          all instances of "pixman-n" and change them to pixman-(n+1)
-#
-#          This needs to be done at least in 
-#                    configure.ac
-#                    all Makefile.am's
-#                    pixman-n.pc.in
-#
-#      This ensures that binary incompatible versions can be installed
-#      in parallel.  See http://www106.pair.com/rhp/parallel.html for
-#      more information
-#
-
-m4_define([pixman_major], 0)
-m4_define([pixman_minor], 42)
-m4_define([pixman_micro], 3)
-
-m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
-
-AC_INIT(pixman, pixman_version, [pixman@lists.freedesktop.org], pixman)
-AM_INIT_AUTOMAKE([foreign dist-xz])
-
-# Suppress verbose compile lines
-m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
-
-AC_CONFIG_HEADERS(pixman-config.h)
-
-AC_CANONICAL_HOST
-
-test_CFLAGS=${CFLAGS+set} # We may override autoconf default CFLAGS.
-
-AC_PROG_CC
-AM_PROG_AS
-AC_PROG_LIBTOOL
-AC_CHECK_FUNCS([getisax])
-AC_C_BIGENDIAN
-AC_C_INLINE
-
-dnl PIXMAN_LINK_WITH_ENV(env-setup, program, true-action, false-action)
-dnl
-dnl Compiles and links the given program in the environment setup by env-setup
-dnl and executes true-action on success and false-action on failure.
-AC_DEFUN([PIXMAN_LINK_WITH_ENV],[dnl
-	save_CFLAGS="$CFLAGS"
-	save_LDFLAGS="$LDFLAGS"
-	save_LIBS="$LIBS"
-	CFLAGS=""
-	LDFLAGS=""
-	LIBS=""
-	$1
-	CFLAGS="$save_CFLAGS $CFLAGS"
-	LDFLAGS="$save_LDFLAGS $LDFLAGS"
-	LIBS="$save_LIBS $LIBS"
-	AC_LINK_IFELSE(
-		[AC_LANG_SOURCE([$2])],
-		[pixman_cc_stderr=`test -f conftest.err && cat conftest.err`
-		 pixman_cc_flag=yes],
-		[pixman_cc_stderr=`test -f conftest.err && cat conftest.err`
-		 pixman_cc_flag=no])
-
-	if test "x$pixman_cc_stderr" != "x"; then
-		pixman_cc_flag=no
-	fi
-
-	if test "x$pixman_cc_flag" = "xyes"; then
-		ifelse([$3], , :, [$3])
-	else
-		ifelse([$4], , :, [$4])
-	fi
-	CFLAGS="$save_CFLAGS"
-	LDFLAGS="$save_LDFLAGS"
-	LIBS="$save_LIBS"
-])
-
-dnl Find a -Werror for catching warnings.
-WERROR=
-for w in -Werror -errwarn; do
-    if test "z$WERROR" = "z"; then
-        AC_MSG_CHECKING([whether the compiler supports $w])
-        PIXMAN_LINK_WITH_ENV(
-		[CFLAGS=$w],
-		[int main(int c, char **v) { (void)c; (void)v; return 0; }],
-		[WERROR=$w; yesno=yes], [yesno=no])
-	AC_MSG_RESULT($yesno)
-    fi
-done
-
-dnl PIXMAN_CHECK_CFLAG(flag, [program])
-dnl  Adds flag to CFLAGS if the given program links without warnings or errors.
-AC_DEFUN([PIXMAN_CHECK_CFLAG], [dnl
-	AC_MSG_CHECKING([whether the compiler supports $1])
-	PIXMAN_LINK_WITH_ENV(
-		[CFLAGS="$WERROR $1"],
-		[$2
-		 int main(int c, char **v) { (void)c; (void)v; return 0; }
-		],
-		[_yesno=yes],
-		[_yesno=no])
-	if test "x$_yesno" = xyes; then
-	   CFLAGS="$CFLAGS $1"
-	fi
-	AC_MSG_RESULT($_yesno)
-])
-
-AC_CHECK_SIZEOF(long)
-
-# Checks for Sun Studio compilers
-AC_CHECK_DECL([__SUNPRO_C], [SUNCC="yes"], [SUNCC="no"])
-AC_CHECK_DECL([__amd64], [AMD64_ABI="yes"], [AMD64_ABI="no"])
-
-# Default CFLAGS to -O -g rather than just the -g from AC_PROG_CC
-# if we're using Sun Studio and neither the user nor a config.site
-# has set CFLAGS.
-if test $SUNCC = yes &&			\
-   test "x$test_CFLAGS" = "x" &&	\
-   test "$CFLAGS" = "-g"
-then
-  CFLAGS="-O -g"
-fi
-
-# 
-# We ignore pixman_major in the version here because the major version should
-# always be encoded in the actual library name. Ie., the soname is:
-#
-#      pixman-$(pixman_major).0.minor.micro
-#
-m4_define([lt_current], [pixman_minor])
-m4_define([lt_revision], [pixman_micro])
-m4_define([lt_age], [pixman_minor])
-
-LT_VERSION_INFO="lt_current:lt_revision:lt_age"
-
-PIXMAN_VERSION_MAJOR=pixman_major()
-AC_SUBST(PIXMAN_VERSION_MAJOR)
-PIXMAN_VERSION_MINOR=pixman_minor()
-AC_SUBST(PIXMAN_VERSION_MINOR)
-PIXMAN_VERSION_MICRO=pixman_micro()
-AC_SUBST(PIXMAN_VERSION_MICRO)
-
-AC_SUBST(LT_VERSION_INFO)
-
-# Check for dependencies
-
-PIXMAN_CHECK_CFLAG([-Wall])
-PIXMAN_CHECK_CFLAG([-Wdeclaration-after-statement])
-PIXMAN_CHECK_CFLAG([-Wno-unused-local-typedefs])
-PIXMAN_CHECK_CFLAG([-fno-strict-aliasing])
-
-dnl =========================================================================
-dnl OpenMP for the test suite?
-dnl
-
-# Check for OpenMP support only when autoconf support that (require autoconf >=2.62)
-OPENMP_CFLAGS=
-m4_ifdef([AC_OPENMP], [AC_OPENMP])
-
-if test "x$enable_openmp" = "xyes" && test "x$ac_cv_prog_c_openmp" = "xunsupported" ; then
-  AC_MSG_WARN([OpenMP support requested but found unsupported])
-fi
-
-dnl May not fail to link without -Wall -Werror added
-dnl So try to link only when openmp is supported
-dnl ac_cv_prog_c_openmp is not defined when --disable-openmp is used
-if test "x$ac_cv_prog_c_openmp" != "xunsupported" && test "x$ac_cv_prog_c_openmp" != "x"; then
-  m4_define([openmp_test_program],[dnl
-  #include <stdio.h>
-
-  extern unsigned int lcg_seed;
-  #pragma omp threadprivate(lcg_seed)
-  unsigned int lcg_seed;
-
-  unsigned function(unsigned a, unsigned b)
-  {
-	lcg_seed ^= b;
-	return ((a + b) ^ a ) + lcg_seed;
-  }
-
-  int main(int argc, char **argv)
-  {
-	int i;
-	int n1 = 0, n2 = argc;
-	unsigned checksum = 0;
-	int verbose = argv != NULL;
-	unsigned (*test_function)(unsigned, unsigned);
-	test_function = function;
-	#pragma omp parallel for reduction(+:checksum) default(none) \
-					shared(n1, n2, test_function, verbose)
-	for (i = n1; i < n2; i++)
-	{
-		unsigned crc = test_function (i, 0);
-		if (verbose)
-			printf ("%d: %08X\n", i, crc);
-		checksum += crc;
-	}
-	printf("%u\n", checksum);
-	return 0;
-  }
-  ])
-
-  PIXMAN_LINK_WITH_ENV(
-	[CFLAGS="$OPENMP_CFLAGS" LDFLAGS="$OPENMP_CFLAGS"],
-	[openmp_test_program],
-	[have_openmp=yes],
-	[have_openmp=no])
-  if test "x$have_openmp" = "xyes" ; then
-    AC_DEFINE(USE_OPENMP, 1, [use OpenMP in the test suite])
-  fi
-fi
-AC_SUBST(OPENMP_CFLAGS)
-
-dnl =========================================================================
-dnl -fvisibility stuff
-
-PIXMAN_CHECK_CFLAG([-fvisibility=hidden], [dnl
-#if defined(__GNUC__) && (__GNUC__ >= 4)
-#ifdef _WIN32
-#error Have -fvisibility but it is ignored and generates a warning
-#endif
-#else
-#error Need GCC 4.0 for visibility
-#endif
-])
-
-PIXMAN_CHECK_CFLAG([-xldscope=hidden], [dnl
-#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
-#else
-#error Need Sun Studio 8 for visibility
-#endif
-])
-
-dnl ===========================================================================
-dnl Check for Loongson Multimedia Instructions
-
-if test "x$LS_CFLAGS" = "x" ; then
-    LS_CFLAGS="-mloongson-mmi"
-fi
-
-have_loongson_mmi=no
-AC_MSG_CHECKING(whether to use Loongson MMI assembler)
-
-xserver_save_CFLAGS=$CFLAGS
-CFLAGS=" $LS_CFLAGS $CFLAGS -I$srcdir"
-AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#ifndef __mips_loongson_vector_rev
-#error "Loongson Multimedia Instructions are only available on Loongson"
-#endif
-#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 4))
-#error "Need GCC >= 4.4 for Loongson MMI compilation"
-#endif
-#include "pixman/loongson-mmintrin.h"
-int main () {
-    union {
-        __m64 v;
-        char c[8];
-    } a = { .c = {1, 2, 3, 4, 5, 6, 7, 8} };
-    int b = 4;
-    __m64 c = _mm_srli_pi16 (a.v, b);
-    return 0;
-}]])], have_loongson_mmi=yes)
-CFLAGS=$xserver_save_CFLAGS
-
-AC_ARG_ENABLE(loongson-mmi,
-   [AC_HELP_STRING([--disable-loongson-mmi],
-                   [disable Loongson MMI fast paths])],
-   [enable_loongson_mmi=$enableval], [enable_loongson_mmi=auto])
-
-if test $enable_loongson_mmi = no ; then
-   have_loongson_mmi=disabled
-fi
-
-if test $have_loongson_mmi = yes ; then
-   AC_DEFINE(USE_LOONGSON_MMI, 1, [use Loongson Multimedia Instructions])
-else
-   LS_CFLAGS=
-fi
-
-AC_MSG_RESULT($have_loongson_mmi)
-if test $enable_loongson_mmi = yes && test $have_loongson_mmi = no ; then
-   AC_MSG_ERROR([Loongson MMI not detected])
-fi
-
-AM_CONDITIONAL(USE_LOONGSON_MMI, test $have_loongson_mmi = yes)
-
-dnl ===========================================================================
-dnl Check for MMX
-
-if test "x$MMX_CFLAGS" = "x" ; then
-   if test "x$SUNCC" = "xyes"; then
-      # Sun Studio doesn't have an -xarch=mmx flag, so we have to use sse
-      # but if we're building 64-bit, mmx & sse support is on by default and
-      # -xarch=sse throws an error instead
-      if test "$AMD64_ABI" = "no" ; then
-         MMX_CFLAGS="-xarch=sse"
-      fi
-   else
-      MMX_CFLAGS="-mmmx -Winline"
-   fi
-fi
-
-have_mmx_intrinsics=no
-AC_MSG_CHECKING(whether to use MMX intrinsics)
-xserver_save_CFLAGS=$CFLAGS
-CFLAGS="$MMX_CFLAGS $CFLAGS"
-AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
-#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4))
-#error "Need GCC >= 3.4 for MMX intrinsics"
-#endif
-#include <mmintrin.h>
-#include <stdint.h>
-
-/* Check support for block expressions */
-#define _mm_shuffle_pi16(A, N)						\
-    ({									\
-	__m64 ret;							\
-									\
-	/* Some versions of clang will choke on K */ 			\
-	asm ("pshufw %2, %1, %0\n\t"					\
-	     : "=y" (ret)						\
-	     : "y" (A), "K" ((const int8_t)N)				\
-	);								\
-									\
-	ret;								\
-    })
-
-int main () {
-    __m64 v = _mm_cvtsi32_si64 (1);
-    __m64 w;
-
-    w = _mm_shuffle_pi16(v, 5);
-
-    /* Some versions of clang will choke on this */
-    asm ("pmulhuw %1, %0\n\t"
-	: "+y" (w)
-	: "y" (v)
-    );
-
-    return _mm_cvtsi64_si32 (v);
-}]])], have_mmx_intrinsics=yes)
-CFLAGS=$xserver_save_CFLAGS
-
-AC_ARG_ENABLE(mmx,
-   [AC_HELP_STRING([--disable-mmx],
-                   [disable x86 MMX fast paths])],
-   [enable_mmx=$enableval], [enable_mmx=auto])
-
-if test $enable_mmx = no ; then
-   have_mmx_intrinsics=disabled
-fi
-
-if test $have_mmx_intrinsics = yes ; then
-   AC_DEFINE(USE_X86_MMX, 1, [use x86 MMX compiler intrinsics])
-else
-   MMX_CFLAGS=
-fi
-
-AC_MSG_RESULT($have_mmx_intrinsics)
-if test $enable_mmx = yes && test $have_mmx_intrinsics = no ; then
-   AC_MSG_ERROR([x86 MMX intrinsics not detected])
-fi
-
-AM_CONDITIONAL(USE_X86_MMX, test $have_mmx_intrinsics = yes)
-
-dnl ===========================================================================
-dnl Check for SSE2
-
-if test "x$SSE2_CFLAGS" = "x" ; then
-   if test "x$SUNCC" = "xyes"; then
-      # SSE2 is enabled by default in the Sun Studio 64-bit environment
-      if test "$AMD64_ABI" = "no" ; then
-         SSE2_CFLAGS="-xarch=sse2"
-      fi
-   else
-      SSE2_CFLAGS="-msse2 -Winline"
-   fi
-fi
-
-have_sse2_intrinsics=no
-AC_MSG_CHECKING(whether to use SSE2 intrinsics)
-xserver_save_CFLAGS=$CFLAGS
-CFLAGS="$SSE2_CFLAGS $CFLAGS"
-
-AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
-#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2))
-#   if !defined(__amd64__) && !defined(__x86_64__)
-#      error "Need GCC >= 4.2 for SSE2 intrinsics on x86"
-#   endif
-#endif
-#include <mmintrin.h>
-#include <xmmintrin.h>
-#include <emmintrin.h>
-int param;
-int main () {
-    __m128i a = _mm_set1_epi32 (param), b = _mm_set1_epi32 (param + 1), c;
-	c = _mm_xor_si128 (a, b);
-    return _mm_cvtsi128_si32(c);
-}]])], have_sse2_intrinsics=yes)
-CFLAGS=$xserver_save_CFLAGS
-
-AC_ARG_ENABLE(sse2,
-   [AC_HELP_STRING([--disable-sse2],
-                   [disable SSE2 fast paths])],
-   [enable_sse2=$enableval], [enable_sse2=auto])
-
-if test $enable_sse2 = no ; then
-   have_sse2_intrinsics=disabled
-fi
-
-if test $have_sse2_intrinsics = yes ; then
-   AC_DEFINE(USE_SSE2, 1, [use SSE2 compiler intrinsics])
-fi
-
-AC_MSG_RESULT($have_sse2_intrinsics)
-if test $enable_sse2 = yes && test $have_sse2_intrinsics = no ; then
-   AC_MSG_ERROR([SSE2 intrinsics not detected])
-fi
-
-AM_CONDITIONAL(USE_SSE2, test $have_sse2_intrinsics = yes)
-
-dnl ===========================================================================
-dnl Check for SSSE3
-
-if test "x$SSSE3_CFLAGS" = "x" ; then
-    SSSE3_CFLAGS="-mssse3 -Winline"
-fi
-
-have_ssse3_intrinsics=no
-AC_MSG_CHECKING(whether to use SSSE3 intrinsics)
-xserver_save_CFLAGS=$CFLAGS
-CFLAGS="$SSSE3_CFLAGS $CFLAGS"
-
-AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
-#include <mmintrin.h>
-#include <xmmintrin.h>
-#include <emmintrin.h>
-#include <tmmintrin.h>
-int param;
-int main () {
-    __m128i a = _mm_set1_epi32 (param), b = _mm_set1_epi32 (param + 1), c;
-    c = _mm_maddubs_epi16 (a, b);
-    return _mm_cvtsi128_si32(c);
-}]])], have_ssse3_intrinsics=yes)
-CFLAGS=$xserver_save_CFLAGS
-
-AC_ARG_ENABLE(ssse3,
-   [AC_HELP_STRING([--disable-ssse3],
-                   [disable SSSE3 fast paths])],
-   [enable_ssse3=$enableval], [enable_ssse3=auto])
-
-if test $enable_ssse3 = no ; then
-   have_ssse3_intrinsics=disabled
-fi
-
-if test $have_ssse3_intrinsics = yes ; then
-   AC_DEFINE(USE_SSSE3, 1, [use SSSE3 compiler intrinsics])
-fi
-
-AC_MSG_RESULT($have_ssse3_intrinsics)
-if test $enable_ssse3 = yes && test $have_ssse3_intrinsics = no ; then
-   AC_MSG_ERROR([SSSE3 intrinsics not detected])
-fi
-
-AM_CONDITIONAL(USE_SSSE3, test $have_ssse3_intrinsics = yes)
-
-dnl ===========================================================================
-dnl Other special flags needed when building code using x86 ISA extensions
-case $host_os in
-   solaris*)
-      # When building Solaris binaries, apply a mapfile to ensure that the
-      # binaries aren't flagged as only able to run on MMX/SSE/SSSE3 capable
-      # CPUs since they check at runtime before using those instructions.
-      # Not all linkers grok the mapfile format so we check for that first.
-      if test "$host_cpu" = "i386" -o "$host_cpu" = "x86_64"; then
-	 use_hwcap_mapfile=no
-	 AC_MSG_CHECKING(whether to use a hardware capability map file)
-	 hwcap_save_LDFLAGS="$LDFLAGS"
-	 HWCAP_LDFLAGS='-Wl,-M,$(srcdir)/solaris-hwcap.mapfile'
-	 LDFLAGS="$LDFLAGS -Wl,-M,${srcdir}/pixman/solaris-hwcap.mapfile"
-	 AC_LINK_IFELSE([AC_LANG_SOURCE([[int main() { return 0; }]])],
-			use_hwcap_mapfile=yes,
-			HWCAP_LDFLAGS="")
-	 LDFLAGS="$hwcap_save_LDFLAGS"
-	 AC_MSG_RESULT($use_hwcap_mapfile)
-      fi
-      if test "x$MMX_LDFLAGS" = "x" ; then
-         MMX_LDFLAGS="$HWCAP_LDFLAGS"
-      fi
-      if test "x$SSE2_LDFLAGS" = "x" ; then
-	 SSE2_LDFLAGS="$HWCAP_LDFLAGS"
-      fi
-      if test "x$SSSE3_LDFLAGS" = "x" ; then
-	 SSSE3_LDFLAGS="$HWCAP_LDFLAGS"
-      fi
-      ;;
-esac
-
-AC_SUBST(LS_CFLAGS)
-AC_SUBST(IWMMXT_CFLAGS)
-AC_SUBST(MMX_CFLAGS)
-AC_SUBST(MMX_LDFLAGS)
-AC_SUBST(SSE2_CFLAGS)
-AC_SUBST(SSE2_LDFLAGS)
-AC_SUBST(SSSE3_CFLAGS)
-AC_SUBST(SSSE3_LDFLAGS)
-
-dnl ===========================================================================
-dnl Check for VMX/Altivec
-if test -n "`$CC -v 2>&1 | grep version | grep Apple`"; then
-    VMX_CFLAGS="-faltivec"
-else
-    VMX_CFLAGS="-maltivec -mabi=altivec"
-fi
-
-have_vmx_intrinsics=no
-AC_MSG_CHECKING(whether to use VMX/Altivec intrinsics)
-xserver_save_CFLAGS=$CFLAGS
-CFLAGS="$VMX_CFLAGS $CFLAGS"
-AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
-#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4))
-#error "Need GCC >= 3.4 for sane altivec support"
-#endif
-#include <altivec.h>
-int main () {
-    vector unsigned int v = vec_splat_u32 (1);
-    v = vec_sub (v, v);
-    return 0;
-}]])], have_vmx_intrinsics=yes)
-CFLAGS=$xserver_save_CFLAGS
-
-AC_ARG_ENABLE(vmx,
-   [AC_HELP_STRING([--disable-vmx],
-                   [disable VMX fast paths])],
-   [enable_vmx=$enableval], [enable_vmx=auto])
-
-if test $enable_vmx = no ; then
-   have_vmx_intrinsics=disabled
-fi
-
-if test $have_vmx_intrinsics = yes ; then
-   AC_DEFINE(USE_VMX, 1, [use VMX compiler intrinsics])
-else
-   VMX_CFLAGS=
-fi
-
-AC_MSG_RESULT($have_vmx_intrinsics)
-if test $enable_vmx = yes && test $have_vmx_intrinsics = no ; then
-   AC_MSG_ERROR([VMX intrinsics not detected])
-fi
-
-AC_SUBST(VMX_CFLAGS)
-
-AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
-
-dnl ==========================================================================
-dnl Check if assembler is gas compatible and supports ARM SIMD instructions
-have_arm_simd=no
-AC_MSG_CHECKING(whether to use ARM SIMD assembler)
-xserver_save_CFLAGS=$CFLAGS
-CFLAGS="-x assembler-with-cpp $CFLAGS"
-AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
-.text
-.arch armv6
-.object_arch armv4
-.arm
-.altmacro
-#ifndef __ARM_EABI__
-#error EABI is required (to be sure that calling conventions are compatible)
-#endif
-pld [r0]
-uqadd8 r0, r0, r0]])], have_arm_simd=yes)
-CFLAGS=$xserver_save_CFLAGS
-
-AC_ARG_ENABLE(arm-simd,
-   [AC_HELP_STRING([--disable-arm-simd],
-                   [disable ARM SIMD fast paths])],
-   [enable_arm_simd=$enableval], [enable_arm_simd=auto])
-
-if test $enable_arm_simd = no ; then
-   have_arm_simd=disabled
-fi
-
-if test $have_arm_simd = yes ; then
-   AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD assembly optimizations])
-fi
-
-AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes)
-
-AC_MSG_RESULT($have_arm_simd)
-if test $enable_arm_simd = yes && test $have_arm_simd = no ; then
-   AC_MSG_ERROR([ARM SIMD intrinsics not detected])
-fi
-
-dnl ==========================================================================
-dnl Check if assembler is gas compatible and supports NEON instructions
-have_arm_neon=no
-AC_MSG_CHECKING(whether to use ARM NEON assembler)
-xserver_save_CFLAGS=$CFLAGS
-CFLAGS="-x assembler-with-cpp $CFLAGS"
-AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
-.text
-.fpu neon
-.arch armv7a
-.object_arch armv4
-.eabi_attribute 10, 0
-.arm
-.altmacro
-#ifndef __ARM_EABI__
-#error EABI is required (to be sure that calling conventions are compatible)
-#endif
-pld [r0]
-vmovn.u16 d0, q0]])], have_arm_neon=yes)
-CFLAGS=$xserver_save_CFLAGS
-
-AC_ARG_ENABLE(arm-neon,
-   [AC_HELP_STRING([--disable-arm-neon],
-                   [disable ARM NEON fast paths])],
-   [enable_arm_neon=$enableval], [enable_arm_neon=auto])
-
-if test $enable_arm_neon = no ; then
-   have_arm_neon=disabled
-fi
-
-if test $have_arm_neon = yes ; then
-   AC_DEFINE(USE_ARM_NEON, 1, [use ARM NEON assembly optimizations])
-fi
-
-AM_CONDITIONAL(USE_ARM_NEON, test $have_arm_neon = yes)
-
-AC_MSG_RESULT($have_arm_neon)
-if test $enable_arm_neon = yes && test $have_arm_neon = no ; then
-   AC_MSG_ERROR([ARM NEON intrinsics not detected])
-fi
-
-dnl ==========================================================================
-dnl Check if assembler is gas compatible and supports ARM-a64 NEON instructions
-have_arm_a64_neon=no
-AC_MSG_CHECKING(whether to use ARM A64 NEON assembler)
-xserver_save_CFLAGS=$CFLAGS
-CFLAGS="-x assembler-with-cpp $CFLAGS"
-AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
-.text
-.arch armv8-a
-.altmacro
-prfm pldl2strm, [x0]
-xtn v0.8b, v0.8h]])], have_arm_a64_neon=yes)
-CFLAGS=$xserver_save_CFLAGS
-
-AC_ARG_ENABLE(arm-a64-neon,
-   [AC_HELP_STRING([--disable-arm-a64-neon],
-                   [disable ARM A64 NEON fast paths])],
-   [enable_arm_a64_neon=$enableval], [enable_arm_a64_neon=auto])
-
-if test $enable_arm_a64_neon = no ; then
-   have_arm_a64_neon=disabled
-fi
-
-if test $have_arm_a64_neon = yes ; then
-   AC_DEFINE(USE_ARM_A64_NEON, 1, [use ARM A64_NEON assembly optimizations])
-fi
-
-AM_CONDITIONAL(USE_ARM_A64_NEON, test $have_arm_a64_neon = yes)
-
-AC_MSG_RESULT($have_arm_a64_neon)
-if test $enable_arm_a64_neon = yes && test $have_arm_a64_neon4 = no ; then
-   AC_MSG_ERROR([ARM A64 NEON intrinsics not detected])
-fi
-
-dnl ===========================================================================
-dnl Check for IWMMXT
-
-AC_ARG_ENABLE(arm-iwmmxt,
-   [AC_HELP_STRING([--disable-arm-iwmmxt],
-                   [disable ARM IWMMXT fast paths])],
-   [enable_iwmmxt=$enableval], [enable_iwmmxt=auto])
-
-AC_ARG_ENABLE(arm-iwmmxt2,
-   [AC_HELP_STRING([--disable-arm-iwmmxt2],
-                   [build ARM IWMMXT fast paths with -march=iwmmxt instead of -march=iwmmxt2])],
-   [enable_iwmmxt2=$enableval], [enable_iwmmxt2=auto])
-
-if test "x$IWMMXT_CFLAGS" = "x" ; then
-   IWMMXT_CFLAGS="-flax-vector-conversions -Winline -march=iwmmxt"
-   if test $enable_iwmmxt2 != no ; then
-      IWMMXT_CFLAGS="${IWMMXT_CFLAGS}2"
-   fi
-fi
-
-have_iwmmxt_intrinsics=no
-AC_MSG_CHECKING(whether to use ARM IWMMXT intrinsics)
-xserver_save_CFLAGS=$CFLAGS
-CFLAGS="$CFLAGS $IWMMXT_CFLAGS"
-AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
-#ifndef __arm__
-#error "IWMMXT is only available on ARM"
-#endif
-#ifndef __IWMMXT__
-#error "IWMMXT not enabled (with -march=iwmmxt)"
-#endif
-#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8))
-#error "Need GCC >= 4.8 for IWMMXT intrinsics"
-#endif
-#include <mmintrin.h>
-int main () {
-	union {
-		__m64 v;
-		char c[8];
-	} a = { .c = {1, 2, 3, 4, 5, 6, 7, 8} };
-	int b = 4;
-	__m64 c = _mm_srli_si64 (a.v, b);
-}]])], have_iwmmxt_intrinsics=yes)
-CFLAGS=$xserver_save_CFLAGS
-
-if test $enable_iwmmxt = no ; then
-   have_iwmmxt_intrinsics=disabled
-fi
-
-if test $have_iwmmxt_intrinsics = yes ; then
-   AC_DEFINE(USE_ARM_IWMMXT, 1, [use ARM IWMMXT compiler intrinsics])
-else
-   IWMMXT_CFLAGS=
-fi
-
-AC_MSG_RESULT($have_iwmmxt_intrinsics)
-if test $enable_iwmmxt = yes && test $have_iwmmxt_intrinsics = no ; then
-   AC_MSG_ERROR([IWMMXT intrinsics not detected])
-fi
-
-AM_CONDITIONAL(USE_ARM_IWMMXT, test $have_iwmmxt_intrinsics = yes)
-
-dnl ==========================================================================
-dnl Check if assembler is gas compatible and supports MIPS DSPr2 instructions
-
-have_mips_dspr2=no
-AC_MSG_CHECKING(whether to use MIPS DSPr2 assembler)
-xserver_save_CFLAGS=$CFLAGS
-CFLAGS="-mdspr2 $CFLAGS"
-
-AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
-#if !(defined(__mips__) &&  __mips_isa_rev >= 2)
-#error MIPS DSPr2 is currently only available on MIPS32r2 platforms.
-#endif
-int
-main ()
-{
-    int c = 0, a = 0, b = 0;
-    __asm__ __volatile__ (
-        "precr.qb.ph %[c], %[a], %[b]          \n\t"
-        : [c] "=r" (c)
-        : [a] "r" (a), [b] "r" (b)
-    );
-    return c;
-}]])], have_mips_dspr2=yes)
-CFLAGS=$xserver_save_CFLAGS
-
-AC_ARG_ENABLE(mips-dspr2,
-   [AC_HELP_STRING([--disable-mips-dspr2],
-                   [disable MIPS DSPr2 fast paths])],
-   [enable_mips_dspr2=$enableval], [enable_mips_dspr2=auto])
-
-if test $enable_mips_dspr2 = no ; then
-   have_mips_dspr2=disabled
-fi
-
-if test $have_mips_dspr2 = yes ; then
-   AC_DEFINE(USE_MIPS_DSPR2, 1, [use MIPS DSPr2 assembly optimizations])
-fi
-
-AM_CONDITIONAL(USE_MIPS_DSPR2, test $have_mips_dspr2 = yes)
-
-AC_MSG_RESULT($have_mips_dspr2)
-if test $enable_mips_dspr2 = yes && test $have_mips_dspr2 = no ; then
-   AC_MSG_ERROR([MIPS DSPr2 instructions not detected])
-fi
-
-dnl =========================================================================================
-dnl Check for GNU-style inline assembly support
-
-have_gcc_inline_asm=no
-AC_MSG_CHECKING(whether to use GNU-style inline assembler)
-AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
-int main () {
-    /* Most modern architectures have a NOP instruction, so this is a fairly generic test. */
-	asm volatile ( "\tnop\n" : : : "cc", "memory" );
-    return 0;
-}]])], have_gcc_inline_asm=yes)
-
-AC_ARG_ENABLE(gcc-inline-asm,
-   [AC_HELP_STRING([--disable-gcc-inline-asm],
-                   [disable GNU-style inline assembler])],
-   [enable_gcc_inline_asm=$enableval], [enable_gcc_inline_asm=auto])
-
-if test $enable_gcc_inline_asm = no ; then
-   have_gcc_inline_asm=disabled
-fi
-
-if test $have_gcc_inline_asm = yes ; then
-   AC_DEFINE(USE_GCC_INLINE_ASM, 1, [use GNU-style inline assembler])
-fi
-
-AC_MSG_RESULT($have_gcc_inline_asm)
-if test $enable_gcc_inline_asm = yes && test $have_gcc_inline_asm = no ; then
-   AC_MSG_ERROR([GNU-style inline assembler not detected])
-fi
-
-AM_CONDITIONAL(USE_GCC_INLINE_ASM, test $have_gcc_inline_asm = yes)
-
-dnl ==============================================
-dnl Static test programs
-
-AC_ARG_ENABLE(static-testprogs,
-   [AC_HELP_STRING([--enable-static-testprogs],
-		   [build test programs as static binaries [default=no]])],
-   [enable_static_testprogs=$enableval], [enable_static_testprogs=no])
-
-TESTPROGS_EXTRA_LDFLAGS=
-if test "x$enable_static_testprogs" = "xyes" ; then
-   TESTPROGS_EXTRA_LDFLAGS="-all-static"
-fi
-AC_SUBST(TESTPROGS_EXTRA_LDFLAGS)
-
-dnl ==============================================
-dnl Timers
-
-AC_ARG_ENABLE(timers,
-   [AC_HELP_STRING([--enable-timers],
-		   [enable TIMER_BEGIN and TIMER_END macros [default=no]])],
-   [enable_timers=$enableval], [enable_timers=no])
-
-if test $enable_timers = yes ; then 
-   AC_DEFINE(PIXMAN_TIMERS, 1, [enable TIMER_BEGIN/TIMER_END macros])
-fi
-AC_SUBST(PIXMAN_TIMERS)
-
-dnl ===================================
-dnl gnuplot
-
-AC_ARG_ENABLE(gnuplot,
-   [AC_HELP_STRING([--enable-gnuplot],
-                   [enable output of filters that can be piped to gnuplot [default=no]])],
-   [enable_gnuplot=$enableval], [enable_gnuplot=no])
-
-if test $enable_gnuplot = yes ; then
-   AC_DEFINE(PIXMAN_GNUPLOT, 1, [enable output that can be piped to gnuplot])
-fi
-AC_SUBST(PIXMAN_GNUPLOT)
-
-dnl ===================================
-dnl GTK+
-
-AC_ARG_ENABLE(gtk,
-   [AC_HELP_STRING([--enable-gtk],
-                   [enable tests using GTK+ [default=auto]])],
-   [enable_gtk=$enableval], [enable_gtk=auto])
-
-PKG_PROG_PKG_CONFIG
-
-if test $enable_gtk = yes ; then
-   AC_CHECK_LIB([pixman-1], [pixman_version_string])
-   PKG_CHECK_MODULES(GTK, [gtk+-3.0 pixman-1])
-fi
-
-if test $enable_gtk = auto ; then
-   AC_CHECK_LIB([pixman-1], [pixman_version_string], [enable_gtk=auto], [enable_gtk=no])
-fi
-
-if test $enable_gtk = auto ; then
-   PKG_CHECK_MODULES(GTK, [gtk+-3.0 pixman-1], [enable_gtk=yes], [enable_gtk=no])
-fi
-
-AM_CONDITIONAL(HAVE_GTK, [test "x$enable_gtk" = xyes])
-
-AC_SUBST(GTK_CFLAGS)
-AC_SUBST(GTK_LIBS)
-
-dnl =====================================
-dnl posix_memalign, sigaction, alarm, gettimeofday
-
-AC_CHECK_FUNC(posix_memalign, have_posix_memalign=yes, have_posix_memalign=no)
-if test x$have_posix_memalign = xyes; then
-   AC_DEFINE(HAVE_POSIX_MEMALIGN, 1, [Whether we have posix_memalign()])
-fi
-
-AC_CHECK_FUNC(sigaction, have_sigaction=yes, have_sigaction=no)
-if test x$have_sigaction = xyes; then
-   AC_DEFINE(HAVE_SIGACTION, 1, [Whether we have sigaction()])
-fi
-
-AC_CHECK_FUNC(alarm, have_alarm=yes, have_alarm=no)
-if test x$have_alarm = xyes; then
-   AC_DEFINE(HAVE_ALARM, 1, [Whether we have alarm()])
-fi
-
-AC_CHECK_HEADER([sys/mman.h],
-   [AC_DEFINE(HAVE_SYS_MMAN_H, [1], [Define to 1 if we have <sys/mman.h>])])
-
-AC_CHECK_FUNC(mmap, have_mmap=yes, have_mmap=no)
-if test x$have_mmap = xyes; then
-   AC_DEFINE(HAVE_MMAP, 1, [Whether we have mmap()])
-fi
-
-AC_CHECK_FUNC(mprotect, have_mprotect=yes, have_mprotect=no)
-if test x$have_mprotect = xyes; then
-   AC_DEFINE(HAVE_MPROTECT, 1, [Whether we have mprotect()])
-fi
-
-AC_CHECK_FUNC(getpagesize, have_getpagesize=yes, have_getpagesize=no)
-if test x$have_getpagesize = xyes; then
-   AC_DEFINE(HAVE_GETPAGESIZE, 1, [Whether we have getpagesize()])
-fi
-
-AC_CHECK_HEADER([fenv.h],
-   [AC_DEFINE(HAVE_FENV_H, [1], [Define to 1 if we have <fenv.h>])])
-
-AC_CHECK_LIB(m, feenableexcept, have_feenableexcept=yes, have_feenableexcept=no)
-if test x$have_feenableexcept = xyes; then
-   AC_DEFINE(HAVE_FEENABLEEXCEPT, 1, [Whether we have feenableexcept()])
-fi
-
-AC_CHECK_DECL([FE_DIVBYZERO],
-	[AC_DEFINE(HAVE_FEDIVBYZERO, 1, [Whether we have FE_DIVBYZERO])],
-	[],
-	[[#include <fenv.h>]])
-
-AC_CHECK_FUNC(gettimeofday, have_gettimeofday=yes, have_gettimeofday=no)
-AC_CHECK_HEADER(sys/time.h, have_sys_time_h=yes, have_sys_time_h=no)
-if test x$have_gettimeofday = xyes && test x$have_sys_time_h = xyes; then
-   AC_DEFINE(HAVE_GETTIMEOFDAY, 1, [Whether we have gettimeofday()])
-fi
-
-dnl =====================================
-dnl Check for missing sqrtf() as, e.g., for Solaris 9
-
-AC_SEARCH_LIBS([sqrtf], [m], [],
-               [AC_DEFINE([sqrtf], [sqrt],
-                          [Define to sqrt if you do not have the `sqrtf' function.])])
-
-dnl =====================================
-dnl Thread local storage
-
-AC_MSG_CHECKING(for thread local storage (TLS) support)
-AC_CACHE_VAL(ac_cv_tls, [
-    ac_cv_tls=none
-    keywords="__thread __declspec(thread)"
-    for kw in $keywords ; do
-        AC_TRY_COMPILE([
-#if defined(__MINGW32__) && !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))
-#error This MinGW version has broken __thread support
-#endif
-#ifdef __OpenBSD__
-#error OpenBSD has broken __thread support
-#endif
-
-int $kw test;], [], [ac_cv_tls=$kw; break])
-    done
-])
-AC_MSG_RESULT($ac_cv_tls)
-
-if test "$ac_cv_tls" != "none"; then
-    AC_DEFINE_UNQUOTED([TLS], $ac_cv_tls, [The compiler supported TLS storage class])
-fi
-
-dnl
-dnl posix tls
-dnl
-
-m4_define([pthread_test_program],AC_LANG_SOURCE([[dnl
-#include <stdlib.h>
-#include <pthread.h>
-
-static pthread_once_t once_control = PTHREAD_ONCE_INIT;
-static pthread_key_t key;
-
-static void
-make_key (void)
-{
-    pthread_key_create (&key, NULL);
-}
-
-int
-main ()
-{
-    void *value = NULL;
-
-    if (pthread_once (&once_control, make_key) != 0)
-    {
-	value = NULL;
-    }
-    else
-    {
-	value = pthread_getspecific (key);
-	if (!value)
-	{
-	    value = malloc (100);
-	    pthread_setspecific (key, value);
-	}
-    }
-    return 0;
-}
-]]))
-
-AC_DEFUN([PIXMAN_CHECK_PTHREAD],[dnl
-    if test "z$support_for_pthreads" != "zyes"; then
-	PIXMAN_LINK_WITH_ENV(
-		[$1], [pthread_test_program],
-		[PTHREAD_CFLAGS="$CFLAGS"
-		 PTHREAD_LIBS="$LIBS"
-		 PTHREAD_LDFLAGS="$LDFLAGS"
-		 support_for_pthreads=yes])
-    fi
-])
-
-support_for_pthreads=no
-
-AC_MSG_CHECKING(for pthreads)
-
-PIXMAN_CHECK_PTHREAD([CFLAGS="-pthread"; LDFLAGS="-pthread"])
-PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LIBS="-lpthread"])
-PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LDFLAGS="-lroot"])
-    
-if test $support_for_pthreads = yes; then
-    AC_DEFINE([HAVE_PTHREADS], [], [Whether pthreads is supported])
-    if test $ac_cv_tls = none ; then
-        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
-    fi
-fi
-
-AC_MSG_RESULT($support_for_pthreads)
-
-AC_SUBST(TOOLCHAIN_SUPPORTS__THREAD)
-AC_SUBST(HAVE_PTHREADS)
-AC_SUBST(PTHREAD_LDFLAGS)
-AC_SUBST(PTHREAD_LIBS)
-AC_SUBST(PTHREAD_CFLAGS)
-
-dnl =====================================
-dnl __attribute__((constructor))
-
-support_for_attribute_constructor=no
-
-AC_MSG_CHECKING(for __attribute__((constructor)))
-AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 7))
-/* attribute 'constructor' is supported since gcc 2.7, but some compilers
- * may only pretend to be gcc, so let's try to actually use it
- */
-static int x = 1;
-static void __attribute__((constructor)) constructor_function () { x = 0; }
-int main (void) { return x; }
-#else
-#error not gcc or gcc version is older than 2.7
-#endif
-]])], support_for_attribute_constructor=yes)
-
-if test x$support_for_attribute_constructor = xyes; then
-   AC_DEFINE([TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR],
-             [],[Whether the tool chain supports __attribute__((constructor))])
-fi
-
-AC_MSG_RESULT($support_for_attribute_constructor)
-AC_SUBST(TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR)
-
-dnl =====================================
-dnl __float128
-
-support_for_float128=no
-
-AC_MSG_CHECKING(for __float128)
-AC_LINK_IFELSE([AC_LANG_SOURCE([[
-__float128 a = 1.0Q, b = 2.0Q; int main (void) { return a + b; }
-]])], support_for_float128=yes)
-
-if test x$support_for_float128 = xyes; then
-   AC_DEFINE([HAVE_FLOAT128], [], [Whether the tool chain supports __float128])
-fi
-
-AC_MSG_RESULT($support_for_float128)
-
-dnl =====================================
-dnl __builtin_clz
-
-support_for_builtin_clz=no
-
-AC_MSG_CHECKING(for __builtin_clz)
-AC_LINK_IFELSE([AC_LANG_SOURCE([[
-unsigned int x = 11; int main (void) { return __builtin_clz(x); }
-]])], support_for_builtin_clz=yes)
-
-if test x$support_for_builtin_clz = xyes; then
-   AC_DEFINE([HAVE_BUILTIN_CLZ], [], [Whether the compiler supports __builtin_clz])
-fi
-
-AC_MSG_RESULT($support_for_builtin_clz)
-
-dnl =====================================
-dnl GCC vector extensions
-
-support_for_gcc_vector_extensions=no
-
-AC_MSG_CHECKING(for GCC vector extensions)
-AC_LINK_IFELSE([AC_LANG_SOURCE([[
-unsigned int __attribute__ ((vector_size(16))) e, a, b;
-int main (void) { e = a - ((b << 27) + (b >> (32 - 27))) + 1; return e[0]; }
-]])], support_for_gcc_vector_extensions=yes)
-
-if test x$support_for_gcc_vector_extensions = xyes; then
-   AC_DEFINE([HAVE_GCC_VECTOR_EXTENSIONS], [],
-             [Whether the compiler supports GCC vector extensions])
-fi
-
-AC_MSG_RESULT($support_for_gcc_vector_extensions)
-
-dnl ==================
-dnl libpng
-
-AC_ARG_ENABLE(libpng, AS_HELP_STRING([--enable-libpng], [Build support for libpng (default: auto)]),
-                      [have_libpng=$enableval], [have_libpng=auto])
-
-case x$have_libpng in
-	xyes) PKG_CHECK_MODULES(PNG, [libpng]) ;;
-	xno) ;;
-	*) PKG_CHECK_MODULES(PNG, [libpng], have_libpng=yes, have_libpng=no) ;;
-esac
-
-if test x$have_libpng = xyes; then
-    AC_DEFINE([HAVE_LIBPNG], [1], [Whether we have libpng])
-fi
-
-AC_SUBST(HAVE_LIBPNG)
-
-AC_OUTPUT([pixman-1.pc
-           pixman-1-uninstalled.pc
-           Makefile
-	   pixman/Makefile
-	   pixman/pixman-version.h
-	   demos/Makefile
-	   test/Makefile])
-
-m4_if(m4_eval(pixman_minor % 2), [1], [
-   echo
-   echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
-   echo
-   echo "      Thanks for testing this development snapshot of pixman. Please"
-   echo "      report any problems you find, either by sending email to "
-   echo
-   echo "          pixman@lists.freedesktop.org"
-   echo
-   echo "      or by filing a bug at "
-   echo
-   echo "          https://gitlab.freedesktop.org/pixman/pixman/-/issues/new "
-   echo
-   echo "      If you are looking for a stable release of pixman, please note "
-   echo "      that stable releases have _even_ minor version numbers. Ie., "
-   echo "      pixman-0.]m4_eval(pixman_minor & ~1)[.x are stable releases, whereas pixman-$PIXMAN_VERSION_MAJOR.$PIXMAN_VERSION_MINOR.$PIXMAN_VERSION_MICRO is a "
-   echo "      development snapshot that may contain bugs and experimental "
-   echo "      features. "
-   echo 
-   echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
-   echo
-])
diff --git a/vendor/pixman/meson.build b/vendor/pixman/meson.build
deleted file mode 100644
index 42dbe93d5..000000000
--- a/vendor/pixman/meson.build
+++ /dev/null
@@ -1,581 +0,0 @@
-# Copyright © 2018 Intel Corporation
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-project(
-  'pixman',
-  ['c'],
-  version : '0.42.3',
-  license : 'MIT',
-  meson_version : '>= 0.52.0',
-  default_options : ['c_std=gnu99', 'buildtype=debugoptimized'],
-)
-
-config = configuration_data()
-cc = meson.get_compiler('c')
-null_dep = dependency('', required : false)
-
-add_project_arguments(
-  cc.get_supported_arguments([
-    '-Wdeclaration-after-statement',
-    '-fno-strict-aliasing',
-    '-fvisibility=hidden',
-    '-Wundef',
-    # -ftrapping-math is the default for gcc, but -fno-trapping-math is the
-    # default for clang.  The FLOAT_IS_ZERO macro is used to guard against
-    # floating-point exceptions, however with -fno-trapping-math, the compiler
-    # can reorder floating-point operations so that they occur before the guard.
-    # Note, this function is ignored in clang < 10.0.0.
-    '-ftrapping-math'
-  ]),
-  language : ['c']
-)
-
-# GCC and Clang both ignore -Wno options that they don't recognize, so test for
-# -W<opt>, then add -Wno-<opt> if it's ignored
-foreach opt : ['unused-local-typedefs']
-  if cc.has_argument('-W' + opt)
-    add_project_arguments(['-Wno-' + opt], language : ['c'])
-  endif
-endforeach
-
-use_loongson_mmi = get_option('loongson-mmi')
-have_loongson_mmi = false
-loongson_mmi_flags = ['-mloongson-mmi']
-if not use_loongson_mmi.disabled()
-  if host_machine.cpu_family() == 'mips64' and cc.compiles('''
-      #ifndef __mips_loongson_vector_rev
-      #error "Loongson Multimedia Instructions are only available on Loongson"
-      #endif
-      #if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 4))
-      #error "Need GCC >= 4.4 for Loongson MMI compilation"
-      #endif
-      #include "pixman/loongson-mmintrin.h"
-      int main () {
-        union {
-          __m64 v;
-          char c[8];
-        } a = { .c = {1, 2, 3, 4, 5, 6, 7, 8} };
-        int b = 4;
-        __m64 c = _mm_srli_pi16 (a.v, b);
-        return 0;
-      }''',
-      args : loongson_mmi_flags,
-      include_directories : include_directories('.'),
-      name : 'Loongson MMI Intrinsic Support')
-    have_loongson_mmi = true
-  endif
-endif
-
-if have_loongson_mmi
-  config.set10('USE_LOONGSON_MMI', true)
-elif use_loongson_mmi.enabled()
-  error('Loongson MMI Support unavailable, but required')
-endif
-
-use_mmx = get_option('mmx')
-have_mmx = false
-mmx_flags = []
-
-if cc.get_id() == 'msvc'
-  mmx_flags = ['/w14710', '/w14714', '/wd4244']
-elif cc.get_id() == 'sun'
-  mmx_flags = ['-xarch=sse']
-else
-  mmx_flags = ['-mmmx', '-Winline']
-endif
-if not use_mmx.disabled()
-  if host_machine.cpu_family() == 'x86_64' or cc.get_id() == 'msvc'
-    have_mmx = true
-  elif host_machine.cpu_family() == 'x86' and cc.compiles('''
-      #include <mmintrin.h>
-      #include <stdint.h>
-
-      /* Check support for block expressions */
-      #define _mm_shuffle_pi16(A, N)                    \
-        ({                                              \
-        __m64 ret;                                      \
-                                                        \
-        /* Some versions of clang will choke on K */    \
-        asm ("pshufw %2, %1, %0\n\t"                    \
-             : "=y" (ret)                               \
-             : "y" (A), "K" ((const int8_t)N)           \
-        );                                              \
-                                                        \
-        ret;                                            \
-        })
-
-      int main () {
-          __m64 v = _mm_cvtsi32_si64 (1);
-          __m64 w;
-
-          w = _mm_shuffle_pi16(v, 5);
-
-          /* Some versions of clang will choke on this */
-          asm ("pmulhuw %1, %0\n\t"
-               : "+y" (w)
-               : "y" (v)
-          );
-
-          return _mm_cvtsi64_si32 (v);
-      }''',
-      args : mmx_flags,
-      name : 'MMX Intrinsic Support')
-    have_mmx = true
-  endif
-endif
-
-if have_mmx
-  # Inline assembly do not work on X64 MSVC, so we use
-  # compatibility intrinsics there
-  if cc.get_id() != 'msvc' or host_machine.cpu_family() != 'x86_64'
-    config.set10('USE_X86_MMX', true)
-  endif
-elif use_mmx.enabled()
-  error('MMX Support unavailable, but required')
-endif
-
-use_sse2 = get_option('sse2')
-have_sse2 = false
-sse2_flags = []
-if cc.get_id() == 'sun'
-  sse2_flags = ['-xarch=sse2']
-elif cc.get_id() != 'msvc'
-  sse2_flags = ['-msse2', '-Winline']
-endif
-if not use_sse2.disabled()
-  if host_machine.cpu_family() == 'x86'
-    if cc.compiles('''
-        #if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2))
-        #   if !defined(__amd64__) && !defined(__x86_64__)
-        #      error "Need GCC >= 4.2 for SSE2 intrinsics on x86"
-        #   endif
-        #endif
-        #include <mmintrin.h>
-        #include <xmmintrin.h>
-        #include <emmintrin.h>
-        int param;
-        int main () {
-          __m128i a = _mm_set1_epi32 (param), b = _mm_set1_epi32 (param + 1), c;
-          c = _mm_xor_si128 (a, b);
-          return _mm_cvtsi128_si32(c);
-        }''',
-        args : sse2_flags,
-        name : 'SSE2 Intrinsic Support')
-      have_sse2 = true
-    endif
-  elif host_machine.cpu_family() == 'x86_64'
-    have_sse2 = true
-  endif
-endif
-
-if have_sse2
-  config.set10('USE_SSE2', true)
-elif use_sse2.enabled()
-  error('sse2 Support unavailable, but required')
-endif
-
-use_ssse3 = get_option('ssse3')
-have_ssse3 = false
-ssse3_flags = []
-if cc.get_id() != 'msvc'
-  ssse3_flags = ['-mssse3', '-Winline']
-endif
-
-# x64 pre-2010 MSVC compilers crashes when building the ssse3 code
-if not use_ssse3.disabled() and not (cc.get_id() == 'msvc' and cc.version().version_compare('<16') and host_machine.cpu_family() == 'x86_64')
-  if host_machine.cpu_family().startswith('x86')
-    if cc.compiles('''
-        #include <mmintrin.h>
-        #include <xmmintrin.h>
-        #include <emmintrin.h>
-        int param;
-        int main () {
-          __m128i a = _mm_set1_epi32 (param), b = _mm_set1_epi32 (param + 1), c;
-          c = _mm_xor_si128 (a, b);
-          return _mm_cvtsi128_si32(c);
-        }''',
-        args : ssse3_flags,
-        name : 'SSSE3 Intrinsic Support')
-      have_ssse3 = true
-    endif
-  endif
-endif
-
-if have_ssse3
-  config.set10('USE_SSSE3', true)
-elif use_ssse3.enabled()
-  error('ssse3 Support unavailable, but required')
-endif
-
-use_vmx = get_option('vmx')
-have_vmx = false
-vmx_flags = ['-maltivec', '-mabi=altivec']
-if not use_vmx.disabled()
-  if host_machine.cpu_family().startswith('ppc')
-    if cc.compiles('''
-        #include <altivec.h>
-        int main () {
-            vector unsigned int v = vec_splat_u32 (1);
-            v = vec_sub (v, v);
-            return 0;
-        }''',
-        args : vmx_flags,
-        name : 'VMX/Altivec Intrinsic Support')
-      have_vmx = true
-    endif
-  endif
-endif
-
-if have_vmx
-  config.set10('USE_VMX', true)
-elif use_vmx.enabled()
-  error('vmx Support unavailable, but required')
-endif
-
-use_armv6_simd = get_option('arm-simd')
-have_armv6_simd = false
-if not use_armv6_simd.disabled()
-  if host_machine.cpu_family() == 'arm'
-    if cc.compiles(files('arm-simd-test.S'), name : 'ARMv6 SIMD Intrinsic Support')
-      have_armv6_simd = true
-    endif
-  endif
-endif
-
-if have_armv6_simd
-  config.set10('USE_ARM_SIMD', true)
-elif use_armv6_simd.enabled()
-  error('ARMv6 SIMD Support unavailable, but required')
-endif
-
-use_neon = get_option('neon')
-have_neon = false
-if not use_neon.disabled()
-  if host_machine.cpu_family() == 'arm'
-    if cc.compiles(files('neon-test.S'), name : 'NEON Intrinsic Support')
-      have_neon = true
-    endif
-  endif
-endif
-
-if have_neon
-  config.set10('USE_ARM_NEON', true)
-elif use_neon.enabled()
-  error('NEON Support unavailable, but required')
-endif
-
-use_a64neon = get_option('a64-neon')
-have_a64neon = false
-if not use_a64neon.disabled()
-  if host_machine.cpu_family() == 'aarch64'
-    if cc.compiles(files('a64-neon-test.S'), name : 'NEON A64 Intrinsic Support')
-      have_a64neon = true
-    endif
-  endif
-endif
-
-if have_a64neon
-  config.set10('USE_ARM_A64_NEON', true)
-elif use_a64neon.enabled()
-  error('A64 NEON Support unavailable, but required')
-endif
-
-use_iwmmxt = get_option('iwmmxt')
-have_iwmmxt = false
-iwmmxt_flags = ['-flax-vector-conversions', '-Winline']
-if not use_iwmmxt.disabled()
-  if get_option('iwmmxt2')
-    iwmmxt_flags += '-march=iwmmxt2'
-  else
-    iwmmxt_flags += '-march=iwmmxt'
-  endif
-
-  if host_machine.cpu_family() == 'arm'
-    if cc.compiles('''
-        #ifndef __IWMMXT__
-        #error "IWMMXT not enabled (with -march=iwmmxt)"
-        #endif
-        #if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8))
-        #error "Need GCC >= 4.8 for IWMMXT intrinsics"
-        #endif
-        #include <mmintrin.h>
-        int main () {
-          union {
-            __m64 v;
-            char c[8];
-          } a = { .c = {1, 2, 3, 4, 5, 6, 7, 8} };
-          int b = 4;
-          __m64 c = _mm_srli_si64 (a.v, b);
-        }
-        ''',
-        args : iwmmxt_flags,
-        name : 'IWMMXT Intrinsic Support')
-      have_iwmmxt = true
-    endif
-  endif
-endif
-
-if have_iwmmxt
-  config.set10('USE_ARM_IWMMXT', true)
-elif use_iwmmxt.enabled()
-  error('IWMMXT Support unavailable, but required')
-endif
-
-use_mips_dspr2 = get_option('mips-dspr2')
-have_mips_dspr2 = false
-mips_dspr2_flags = ['-mdspr2']
-if not use_mips_dspr2.disabled()
-  if host_machine.cpu_family() == 'mips32'
-    if cc.compiles('''
-        #if !(defined(__mips__) &&  __mips_isa_rev >= 2)
-        #error MIPS DSPr2 is currently only available on MIPS32r2 platforms.
-        #endif
-        int
-        main ()
-        {
-            int c = 0, a = 0, b = 0;
-            __asm__ __volatile__ (
-                "precr.qb.ph %[c], %[a], %[b]          \n\t"
-                : [c] "=r" (c)
-                : [a] "r" (a), [b] "r" (b)
-            );
-            return c;
-        }''',
-        args : mipds_dspr2_flags,
-        name : 'DSPr2 Intrinsic Support')
-      have_mips_dspr2 = true
-    endif
-  endif
-endif
-
-if have_mips_dspr2
-  config.set10('USE_MIPS_DSPR2', true)
-elif use_mips_dspr2.enabled()
-  error('MIPS DSPr2 Support unavailable, but required')
-endif
-
-use_gnu_asm = get_option('gnu-inline-asm')
-if not use_gnu_asm.disabled()
-  if cc.compiles('''
-      int main () {
-        /* Most modern architectures have a NOP instruction, so this is a fairly generic test. */
-        asm volatile ( "\tnop\n" : : : "cc", "memory" );
-        return 0;
-      }
-      ''',
-      name : 'GNU Inline ASM support.')
-    config.set10('USE_GCC_INLINE_ASM', true)
-  elif use_gnu_asm.enabled()
-    error('GNU inline assembly support missing but required.')
-  endif
-endif
-
-if get_option('timers')
-  config.set('PIXMAN_TIMERS', 1)
-endif
-if get_option('gnuplot')
-  config.set('PIXMAN_GNUPLOT', 1)
-endif
-
-if cc.get_id() != 'msvc'
-  dep_openmp = dependency('openmp', required : get_option('openmp'))
-  if dep_openmp.found()
-    config.set10('USE_OPENMP', true)
-  elif meson.version().version_compare('<0.51.0')
-  # In versions of meson before 0.51 the openmp dependency can still
-  # inject arguments in the the auto case when it is not found, the
-  # detection does work correctly in that case however, so we just
-  # replace dep_openmp with null_dep to work around this.
-    dep_openmp = null_dep
-  endif
-else
-  # the MSVC implementation of openmp is not compliant enough for our
-  # uses here, so we disable it here.
-  # Please see: https://stackoverflow.com/questions/12560243/using-threadprivate-directive-in-visual-studio
-  dep_openmp = null_dep
-endif
-
-dep_gtk = dependency('gtk+-3.0', required : get_option('gtk'), required: get_option('demos'))
-dep_glib = dependency('glib-2.0', required : get_option('gtk'), required: get_option('demos'))
-
-dep_png = null_dep
-if not get_option('libpng').disabled()
-  dep_png = dependency('libpng', required : false)
-
-  # We need to look for the right library to link to for libpng,
-  # when looking for libpng manually
-  foreach png_ver : [ '16', '15', '14', '13', '12', '10' ]
-    if not dep_png.found()
-      dep_png = cc.find_library('libpng@0@'.format(png_ver), has_headers : ['png.h'], required : false)
-    endif
-  endforeach
-
-  if get_option('libpng').enabled() and not dep_png.found()
-    error('libpng support requested but libpng library not found')
-  endif
-endif
-
-if dep_png.found()
-  config.set('HAVE_LIBPNG', 1)
-endif
-dep_m = cc.find_library('m', required : false)
-dep_threads = dependency('threads')
-
-# MSVC-style compilers do not come with pthreads, so we must link
-# to it explicitly, currently pthreads-win32 is supported
-pthreads_found = false
-
-if dep_threads.found() and cc.has_header('pthread.h')
-  if cc.get_argument_syntax() == 'msvc'
-    pthread_lib = null_dep
-    foreach pthread_type : ['VC3', 'VSE3', 'VCE3', 'VC2', 'VSE2', 'VCE2']
-      if not pthread_lib.found()
-        pthread_lib = cc.find_library('pthread@0@'.format(pthread_type), required : false)
-      endif
-    endforeach
-    if pthread_lib.found()
-      pthreads_found = true
-      dep_threads = pthread_lib
-    endif
-  else
-    pthreads_found = true
-  endif
-endif
-
-if pthreads_found
-  config.set('HAVE_PTHREADS', 1)
-endif
-
-funcs = ['sigaction', 'alarm', 'mprotect', 'getpagesize', 'mmap', 'getisax', 'gettimeofday']
-# mingw claimes to have posix_memalign, but it doesn't
-if host_machine.system() != 'windows'
-  funcs += 'posix_memalign'
-endif
-
-foreach f : funcs
-  if cc.has_function(f)
-    config.set('HAVE_@0@'.format(f.to_upper()), 1)
-  endif
-endforeach
-
-# This is only used in one test, that defines _GNU_SOURCE
-if cc.has_function('feenableexcept',
-                   prefix : '#define _GNU_SOURCE\n#include <fenv.h>',
-                   dependencies : dep_m)
-  config.set('HAVE_FEENABLEEXCEPT', 1)
-endif
-
-if cc.has_header_symbol('fenv.h', 'FE_DIVBYZERO')
-  config.set('HAVE_FEDIVBYZERO', 1)
-endif
-
-foreach h : ['sys/mman.h', 'fenv.h', 'unistd.h']
-  if cc.check_header(h)
-    config.set('HAVE_@0@'.format(h.underscorify().to_upper()), 1)
-  endif
-endforeach
-
-use_tls = get_option('tls')
-have_tls = ''
-if not use_tls.disabled()
-  # gcc on Windows only warns that __declspec(thread) isn't supported,
-  # passing -Werror=attributes makes it fail.
-  if (host_machine.system() == 'windows' and
-      cc.compiles('int __declspec(thread) foo;',
-                  args : cc.get_supported_arguments(['-Werror=attributes']),
-                  name : 'TLS via __declspec(thread)'))
-    have_tls = '__declspec(thread)'
-  elif cc.compiles('int __thread foo;', name : 'TLS via __thread')
-    have_tls = '__thread'
-  endif
-endif
-
-if have_tls != ''
-  config.set('TLS', have_tls)
-elif use_tls.enabled()
-  error('Compiler TLS Support unavailable, but required')
-endif
-
-if cc.links('''
-    static int x = 1;
-    static void __attribute__((constructor)) constructor_function () { x = 0; }
-    int main (void) { return x; }
-    ''',
-    name : '__attribute__((constructor))')
-  config.set('TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR', 1)
-endif
-
-if cc.links(
-    ' __float128 a = 1.0Q, b = 2.0Q; int main (void) { return a + b; }',
-    name : 'Has float128 support')
-  config.set('HAVE_FLOAT128', 1)
-endif
-
-if cc.has_function('clz')
-  config.set('HAVE_BUILTIN_CLZ', 1)
-endif
-
-if cc.links('''
-    unsigned int __attribute__ ((vector_size(16))) e, a, b;
-    int main (void) { e = a - ((b << 27) + (b >> (32 - 27))) + 1; return e[0]; }
-    ''',
-    name : 'Support for GCC vector extensions')
-  config.set('HAVE_GCC_VECTOR_EXTENSIONS', 1)
-endif
-
-if host_machine.endian() == 'big'
-  config.set('WORDS_BIGENDIAN', 1)
-endif
-
-config.set('SIZEOF_LONG', cc.sizeof('long'))
-
-# Required to make pixman-private.h
-config.set('PACKAGE', 'foo')
-
-version_conf = configuration_data()
-split = meson.project_version().split('.')
-version_conf.set('PIXMAN_VERSION_MAJOR', split[0])
-version_conf.set('PIXMAN_VERSION_MINOR', split[1])
-version_conf.set('PIXMAN_VERSION_MICRO', split[2])
-
-add_project_arguments('-DHAVE_CONFIG_H', language : ['c'])
-
-subdir('pixman')
-
-if not get_option('tests').disabled() or not get_option('demos').disabled()
-  subdir(join_paths('test', 'utils'))
-endif
-
-if not get_option('demos').disabled()
-  subdir('demos')
-endif
-
-if not get_option('tests').disabled()
-  subdir('test')
-endif
-
-pkg = import('pkgconfig')
-pkg.generate(libpixman,
-  name : 'Pixman',
-  filebase : 'pixman-1',
-  description : 'The pixman library (version 1)',
-  subdirs: 'pixman-1',
-  version : meson.project_version(),
-)
diff --git a/vendor/pixman/meson_options.txt b/vendor/pixman/meson_options.txt
deleted file mode 100644
index df10889c0..000000000
--- a/vendor/pixman/meson_options.txt
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright © 2018 Intel Corporation
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-option(
-  'loongson-mmi',
-  type : 'feature',
-  description : 'Use Loongson MMI intrinsic optimized paths',
-)
-option(
-  'mmx',
-  type : 'feature',
-  description : 'Use X86 MMX intrinsic optimized paths',
-)
-option(
-  'sse2',
-  type : 'feature',
-  description : 'Use X86 SSE2 intrinsic optimized paths',
-)
-option(
-  'ssse3',
-  type : 'feature',
-  description : 'Use X86 SSSE3 intrinsic optimized paths',
-)
-option(
-  'vmx',
-  type : 'feature',
-  description : 'Use PPC VMX/Altivec intrinsic optimized paths',
-)
-option(
-  'arm-simd',
-  type : 'feature',
-  description : 'Use ARMv6 SIMD intrinsic optimized paths',
-)
-option(
-  'neon',
-  type : 'feature',
-  description : 'Use ARM NEON intrinsic optimized paths',
-)
-option(
-  'a64-neon',
-  type : 'feature',
-  description : 'Use ARM A64 NEON intrinsic optimized paths',
-)
-option(
-  'iwmmxt',
-  type : 'feature',
-  description : 'Use ARM IWMMXT intrinsic optimized paths',
-)
-option(
-  'iwmmxt2',
-  type : 'boolean',
-  value : true,
-  description : 'Use ARM IWMMXT2 intrinsic instead of IWMMXT',
-)
-option(
-  'mips-dspr2',
-  type : 'feature',
-  description : 'Use MIPS32 DSPr2 intrinsic optimized paths',
-)
-option(
-  'gnu-inline-asm',
-  type : 'feature',
-  description : 'Use GNU style inline assembler',
-)
-option(
-  'tls',
-  type : 'feature',
-  description : 'Use compiler support for thread-local storage',
-)
-option(
-  'cpu-features-path',
-  type : 'string',
-  description : 'Path to platform-specific cpu-features.[ch] for systems that do not provide it (e.g. Android)',
-)
-option(
-  'openmp',
-  type : 'feature',
-  description : 'Enable OpenMP for tests',
-)
-option(
-  'timers',
-  type : 'boolean',
-  value : false,
-  description : 'Enable TIMER_* macros',
-)
-option(
-  'gnuplot',
-  type : 'boolean',
-  value : false,
-  description : 'Enable output of filters that can be piped to gnuplot',
-)
-option(
-  'gtk',
-  type : 'feature',
-  description : 'Enable demos using GTK',
-)
-option(
-  'libpng',
-  type : 'feature',
-  description : 'Use libpng in tests'
-)
-option(
-  'tests',
-  type : 'feature',
-  description : 'Build tests'
-)
-option(
-  'demos',
-  type : 'feature',
-  description : 'Build demos'
-)
diff --git a/vendor/pixman/neon-test.S b/vendor/pixman/neon-test.S
deleted file mode 100644
index c30a3990b..000000000
--- a/vendor/pixman/neon-test.S
+++ /dev/null
@@ -1,12 +0,0 @@
-.text
-.fpu neon
-.arch armv7a
-.object_arch armv4
-.eabi_attribute 10, 0
-.arm
-.altmacro
-#ifndef __ARM_EABI__
-#error EABI is required (to be sure that calling conventions are compatible)
-#endif
-pld [r0]
-vmovn.u16 d0, q0
diff --git a/vendor/pixman/pixman-1-uninstalled.pc.in b/vendor/pixman/pixman-1-uninstalled.pc.in
deleted file mode 100644
index e0347d010..000000000
--- a/vendor/pixman/pixman-1-uninstalled.pc.in
+++ /dev/null
@@ -1,5 +0,0 @@
-Name: Pixman
-Description: The pixman library (version 1)
-Version: @PACKAGE_VERSION@
-Cflags: -I${pc_top_builddir}/${pcfiledir}/pixman
-Libs: ${pc_top_builddir}/${pcfiledir}/pixman/libpixman-1.la
diff --git a/vendor/pixman/pixman-1.pc.in b/vendor/pixman/pixman-1.pc.in
deleted file mode 100644
index e3b9711ae..000000000
--- a/vendor/pixman/pixman-1.pc.in
+++ /dev/null
@@ -1,11 +0,0 @@
-prefix=@prefix@
-exec_prefix=@exec_prefix@
-libdir=@libdir@
-includedir=@includedir@
-
-Name: Pixman
-Description: The pixman library (version 1)
-Version: @PACKAGE_VERSION@
-Cflags: -I${includedir}/pixman-1
-Libs: -L${libdir} -lpixman-1
-
diff --git a/vendor/pixman/pixman/Makefile.am b/vendor/pixman/pixman/Makefile.am
deleted file mode 100644
index f05e2adc5..000000000
--- a/vendor/pixman/pixman/Makefile.am
+++ /dev/null
@@ -1,158 +0,0 @@
-include $(top_srcdir)/pixman/Makefile.sources
-
-lib_LTLIBRARIES = libpixman-1.la
-
-libpixman_1_la_LDFLAGS = -version-info $(LT_VERSION_INFO) -no-undefined @PTHREAD_LDFLAGS@ 
-libpixman_1_la_LIBADD = @PTHREAD_LIBS@ -lm
-libpixman_1_la_SOURCES = $(libpixman_sources) $(libpixman_headers)
-
-libpixmanincludedir = $(includedir)/pixman-1
-libpixmaninclude_HEADERS = pixman.h pixman-version.h
-noinst_LTLIBRARIES = 
-
-EXTRA_DIST =				\
-	Makefile.win32			\
-	dither/make-blue-noise.c	\
-	pixman-region.c			\
-	solaris-hwcap.mapfile		\
-	meson.build			\
-	$(NULL)
-
-# mmx code
-if USE_X86_MMX
-noinst_LTLIBRARIES += libpixman-mmx.la
-libpixman_mmx_la_SOURCES = \
-	pixman-mmx.c
-libpixman_mmx_la_CFLAGS = $(MMX_CFLAGS)
-libpixman_1_la_LDFLAGS += $(MMX_LDFLAGS)
-libpixman_1_la_LIBADD += libpixman-mmx.la
-
-ASM_CFLAGS_mmx=$(MMX_CFLAGS)
-endif
-
-# vmx code
-if USE_VMX
-noinst_LTLIBRARIES += libpixman-vmx.la
-libpixman_vmx_la_SOURCES = \
-	pixman-vmx.c \
-	pixman-combine32.h
-libpixman_vmx_la_CFLAGS = $(VMX_CFLAGS)
-libpixman_1_la_LIBADD += libpixman-vmx.la
-
-ASM_CFLAGS_vmx=$(VMX_CFLAGS)
-endif
-
-# sse2 code
-if USE_SSE2
-noinst_LTLIBRARIES += libpixman-sse2.la
-libpixman_sse2_la_SOURCES = \
-	pixman-sse2.c
-libpixman_sse2_la_CFLAGS = $(SSE2_CFLAGS)
-libpixman_1_la_LDFLAGS += $(SSE2_LDFLAGS)
-libpixman_1_la_LIBADD += libpixman-sse2.la
-
-ASM_CFLAGS_sse2=$(SSE2_CFLAGS)
-endif
-
-# ssse3 code
-if USE_SSSE3
-noinst_LTLIBRARIES += libpixman-ssse3.la
-libpixman_ssse3_la_SOURCES = \
-	pixman-ssse3.c
-libpixman_ssse3_la_CFLAGS = $(SSSE3_CFLAGS)
-libpixman_1_la_LDFLAGS += $(SSSE3_LDFLAGS)
-libpixman_1_la_LIBADD += libpixman-ssse3.la
-
-ASM_CFLAGS_ssse3=$(SSSE3_CFLAGS)
-endif
-
-# arm simd code
-if USE_ARM_SIMD
-noinst_LTLIBRARIES += libpixman-arm-simd.la
-libpixman_arm_simd_la_SOURCES = \
-	pixman-arm-simd.c	\
-	pixman-arm-common.h	\
-	pixman-arm-simd-asm.S   \
-	pixman-arm-simd-asm-scaled.S \
-	pixman-arm-asm.h	\
-	pixman-arm-simd-asm.h
-libpixman_1_la_LIBADD += libpixman-arm-simd.la
-
-ASM_CFLAGS_arm_simd=
-endif
-
-# arm neon code
-if USE_ARM_NEON
-noinst_LTLIBRARIES += libpixman-arm-neon.la
-libpixman_arm_neon_la_SOURCES = \
-        pixman-arm-neon.c	\
-        pixman-arm-common.h	\
-        pixman-arm-neon-asm.S	\
-		pixman-arm-neon-asm-bilinear.S \
-        pixman-arm-asm.h	\
-        pixman-arm-neon-asm.h
-libpixman_1_la_LIBADD += libpixman-arm-neon.la
-
-ASM_CFLAGS_arm_neon=
-endif
-
-# arm a64 neon code
-if USE_ARM_A64_NEON
-noinst_LTLIBRARIES += libpixman-arma64-neon.la
-libpixman_arma64_neon_la_SOURCES = \
-        pixman-arm-neon.c        \
-        pixman-arm-common.h      \
-        pixman-arma64-neon-asm.S \
-        pixman-arma64-neon-asm-bilinear.S \
-        pixman-arm-asm.h         \
-        pixman-arma64-neon-asm.h
-libpixman_1_la_LIBADD += libpixman-arma64-neon.la
-
-ASM_CFLAGS_arm_neon=
-endif
-
-# iwmmxt code
-if USE_ARM_IWMMXT
-libpixman_iwmmxt_la_SOURCES = pixman-mmx.c
-noinst_LTLIBRARIES += libpixman-iwmmxt.la
-libpixman_1_la_LIBADD += libpixman-iwmmxt.la
-
-libpixman_iwmmxt_la-pixman-mmx.lo: pixman-mmx.c
-	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(AM_CPPFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $(IWMMXT_CFLAGS) -MT libpixman_iwmmxt_la-pixman-mmx.lo -MD -MP -MF $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo -c -o libpixman_iwmmxt_la-pixman-mmx.lo `test -f 'pixman-mmx.c' || echo '$(srcdir)/'`pixman-mmx.c
-	$(AM_V_at)$(am__mv) $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Plo
-
-libpixman_iwmmxt_la_DEPENDENCIES = $(am__DEPENDENCIES_1)
-libpixman_iwmmxt_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
-        $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
-	$(CFLAGS) $(IWMMXT_CFLAGS) $(AM_LDFLAGS) \
-	$(LDFLAGS) -o $@
-
-libpixman-iwmmxt.la: libpixman_iwmmxt_la-pixman-mmx.lo $(libpixman_iwmmxt_la_DEPENDENCIES) 
-	$(AM_V_CCLD)$(libpixman_iwmmxt_la_LINK) libpixman_iwmmxt_la-pixman-mmx.lo $(libpixman_iwmmxt_la_LIBADD) $(LIBS)
-endif
-
-# mips dspr2 code
-if USE_MIPS_DSPR2
-noinst_LTLIBRARIES += libpixman-mips-dspr2.la
-libpixman_mips_dspr2_la_SOURCES = \
-        pixman-mips-dspr2.c \
-        pixman-mips-dspr2.h \
-        pixman-mips-dspr2-asm.S \
-        pixman-mips-dspr2-asm.h \
-        pixman-mips-memcpy-asm.S
-libpixman_1_la_LIBADD += libpixman-mips-dspr2.la
-
-ASM_CFLAGS_mips_dspr2=
-endif
-
-# loongson code
-if USE_LOONGSON_MMI
-noinst_LTLIBRARIES += libpixman-loongson-mmi.la
-libpixman_loongson_mmi_la_SOURCES = pixman-mmx.c loongson-mmintrin.h
-libpixman_loongson_mmi_la_CFLAGS = $(LS_CFLAGS)
-libpixman_1_la_LDFLAGS += $(LS_LDFLAGS)
-libpixman_1_la_LIBADD += libpixman-loongson-mmi.la
-endif
-
-.c.s : $(libpixmaninclude_HEADERS)
-	$(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) $(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) -I$(builddir) -I$(top_builddir) -S -o $@ $<
diff --git a/vendor/pixman/pixman/Makefile.sources b/vendor/pixman/pixman/Makefile.sources
deleted file mode 100644
index 23d1d974d..000000000
--- a/vendor/pixman/pixman/Makefile.sources
+++ /dev/null
@@ -1,43 +0,0 @@
-libpixman_sources =			\
-	pixman.c			\
-	pixman-access.c			\
-	pixman-access-accessors.c	\
-	pixman-bits-image.c		\
-	pixman-combine32.c		\
-	pixman-combine-float.c		\
-	pixman-conical-gradient.c	\
-	pixman-filter.c			\
-	pixman-x86.c			\
-	pixman-mips.c			\
-	pixman-arm.c			\
-	pixman-ppc.c			\
-	pixman-edge.c			\
-	pixman-edge-accessors.c		\
-	pixman-fast-path.c		\
-	pixman-glyph.c			\
-	pixman-general.c		\
-	pixman-gradient-walker.c	\
-	pixman-image.c			\
-	pixman-implementation.c		\
-	pixman-linear-gradient.c	\
-	pixman-matrix.c			\
-	pixman-noop.c			\
-	pixman-radial-gradient.c	\
-	pixman-region16.c		\
-	pixman-region32.c		\
-	pixman-solid-fill.c		\
-	pixman-timer.c			\
-	pixman-trap.c			\
-	pixman-utils.c			\
-	$(NULL)
-
-libpixman_headers =			\
-	dither/blue-noise-64x64.h	\
-	pixman.h			\
-	pixman-accessor.h		\
-	pixman-combine32.h		\
-	pixman-compiler.h		\
-	pixman-edge-imp.h		\
-	pixman-inlines.h		\
-	pixman-private.h		\
-	$(NULL)
diff --git a/vendor/pixman/pixman/Makefile.win32 b/vendor/pixman/pixman/Makefile.win32
deleted file mode 100644
index 7b64033bc..000000000
--- a/vendor/pixman/pixman/Makefile.win32
+++ /dev/null
@@ -1,93 +0,0 @@
-default: all
-
-top_srcdir = ..
-include $(top_srcdir)/pixman/Makefile.sources
-include $(top_srcdir)/Makefile.win32.common
-
-MMX_VAR = $(MMX)
-ifeq ($(MMX_VAR),)
-MMX_VAR=on
-endif
-
-SSE2_VAR = $(SSE2)
-ifeq ($(SSE2_VAR),)
-SSE2_VAR=on
-endif
-
-SSSE3_VAR = $(SSSE3)
-ifeq ($(SSSE3_VAR),)
-SSSE3_VAR=on
-endif
-
-MMX_CFLAGS = -DUSE_X86_MMX -w14710 -w14714
-SSE2_CFLAGS = -DUSE_SSE2
-SSSE3_CFLAGS = -DUSE_SSSE3
-
-# MMX compilation flags
-ifeq ($(MMX_VAR),on)
-PIXMAN_CFLAGS += $(MMX_CFLAGS)
-libpixman_sources += pixman-mmx.c
-endif
-
-# SSE2 compilation flags
-ifeq ($(SSE2_VAR),on)
-PIXMAN_CFLAGS += $(SSE2_CFLAGS)
-libpixman_sources += pixman-sse2.c
-endif
-
-# SSSE3 compilation flags
-ifeq ($(SSSE3_VAR),on)
-PIXMAN_CFLAGS += $(SSSE3_CFLAGS)
-libpixman_sources += pixman-ssse3.c
-endif
-
-OBJECTS = $(patsubst %.c, $(CFG_VAR)/%.obj, $(libpixman_sources))
-
-# targets
-all: inform informMMX informSSE2 informSSSE3 $(CFG_VAR)/$(LIBRARY).lib
-
-informMMX:
-ifneq ($(MMX),off)
-ifneq ($(MMX),on)
-ifneq ($(MMX),)
-	@echo "Invalid specified MMX option : "$(MMX_VAR)"."
-	@echo
-	@echo "Possible choices for MMX are 'on' or 'off'"
-	@exit 1
-endif
-	@echo "Setting MMX flag to default value 'on'... (use MMX=on or MMX=off)"
-endif
-endif
-
-informSSE2:
-ifneq ($(SSE2),off)
-ifneq ($(SSE2),on)
-ifneq ($(SSE2),)
-	@echo "Invalid specified SSE option : "$(SSE2)"."
-	@echo
-	@echo "Possible choices for SSE2 are 'on' or 'off'"
-	@exit 1
-endif
-	@echo "Setting SSE2 flag to default value 'on'... (use SSE2=on or SSE2=off)"
-endif
-endif
-
-informSSSE3:
-ifneq ($(SSSE3),off)
-ifneq ($(SSSE3),on)
-ifneq ($(SSSE3),)
-	@echo "Invalid specified SSE option : "$(SSSE3)"."
-	@echo
-	@echo "Possible choices for SSSE3 are 'on' or 'off'"
-	@exit 1
-endif
-	@echo "Setting SSSE3 flag to default value 'on'... (use SSSE3=on or SSSE3=off)"
-endif
-endif
-
-
-# pixman linking
-$(CFG_VAR)/$(LIBRARY).lib: $(OBJECTS)
-	@$(AR) $(PIXMAN_ARFLAGS) -OUT:$@ $^
-
-.PHONY: all informMMX informSSE2 informSSSE3
diff --git a/vendor/pixman/pixman/dither/blue-noise-64x64.h b/vendor/pixman/pixman/dither/blue-noise-64x64.h
deleted file mode 100644
index 93c8805b5..000000000
--- a/vendor/pixman/pixman/dither/blue-noise-64x64.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/* WARNING: This file is generated by make-blue-noise.c
- * Please edit that file instead of this one.
- */
-
-#ifndef BLUE_NOISE_64X64_H
-#define BLUE_NOISE_64X64_H
-
-#include <stdint.h>
-
-static const uint16_t dither_blue_noise_64x64[4096] = {
-    3039, 1368, 3169, 103, 2211, 1248, 2981, 668, 2633, 37, 3963, 2903, 384, 2564, 3115, 1973, 3348, 830, 2505, 1293, 3054, 1060, 1505, 3268, 400, 1341, 593, 3802, 3384, 429, 4082, 1411, 2503, 3863, 126, 1292, 1887, 2855, 205, 2094, 2977, 1899, 3924, 356, 3088, 2500, 3942, 1409, 2293, 1734, 3732, 1291, 3227, 277, 2054, 786, 2871, 411, 2425, 1678, 3986, 455, 2879, 2288,
-    388, 1972, 3851, 778, 2768, 3697, 944, 2123, 1501, 3533, 937, 1713, 1381, 3888, 156, 1242, 516, 2888, 1607, 3676, 632, 2397, 3804, 2673, 1898, 3534, 2593, 1777, 1170, 2299, 3013, 1838, 523, 3053, 1647, 3601, 3197, 959, 1520, 3633, 893, 2437, 3367, 2187, 1258, 137, 1965, 401, 3546, 643, 3087, 2498, 733, 2786, 3371, 4053, 1266, 1977, 3663, 183, 2570, 2107, 1183, 3708,
-    907, 2473, 1151, 3363, 1527, 1902, 232, 3903, 3060, 496, 2486, 3206, 2165, 861, 2387, 3653, 2101, 3972, 132, 2162, 3437, 1827, 215, 895, 3114, 271, 969, 2932, 197, 1598, 878, 3696, 1140, 2120, 904, 2431, 302, 3846, 2675, 481, 3187, 66, 1440, 650, 3833, 2826, 3435, 901, 2936, 2111, 250, 1875, 3609, 1174, 1747, 162, 2346, 3420, 913, 3172, 1383, 752, 3298, 1735,
-    3540, 2938, 249, 2324, 526, 3099, 2561, 1324, 2347, 1861, 1200, 3702, 257, 3442, 1514, 2999, 992, 1766, 2735, 1163, 478, 2943, 1279, 3635, 2177, 1464, 3672, 2386, 3871, 3340, 2690, 64, 3489, 2811, 3999, 633, 1948, 1243, 2269, 1807, 1143, 2750, 3729, 1790, 2363, 1053, 1537, 2636, 4065, 1076, 1476, 3869, 450, 2200, 2676, 658, 2979, 1548, 544, 1913, 2838, 3911, 116, 2698,
-    517, 1295, 3997, 1739, 3665, 1083, 3509, 599, 3400, 118, 2956, 720, 2689, 1907, 567, 2523, 284, 3397, 711, 3219, 2450, 3985, 1665, 2549, 562, 3011, 1855, 729, 1355, 528, 1908, 2456, 1384, 337, 1540, 2654, 3138, 3513, 703, 4080, 3314, 2047, 855, 3037, 209, 3317, 577, 1828, 17, 2336, 3193, 2748, 962, 3441, 1450, 3246, 1075, 3878, 2615, 3497, 1033, 2310, 1442, 2183,
-    1654, 3254, 2061, 738, 2832, 148, 2030, 1670, 909, 3850, 2109, 1533, 4046, 1085, 3098, 3897, 1378, 2248, 3829, 1495, 1966, 23, 797, 3427, 1124, 4057, 95, 2787, 2190, 3074, 3950, 742, 3194, 1999, 3386, 1113, 16, 1657, 2804, 201, 1543, 383, 2559, 1325, 3604, 2068, 2493, 3771, 1284, 3460, 710, 1716, 2447, 80, 3811, 2032, 347, 2227, 15, 1689, 397, 3084, 662, 3798,
-    973, 43, 2608, 3143, 1459, 2423, 4066, 2770, 3191, 1283, 2630, 314, 3235, 2289, 72, 1822, 2840, 924, 350, 2653, 1057, 3715, 2235, 2775, 346, 2083, 1553, 3292, 1081, 274, 1686, 1188, 2327, 3743, 578, 2234, 3916, 2519, 1011, 3056, 2207, 3438, 3890, 537, 1617, 837, 3094, 373, 2795, 1980, 276, 3951, 1353, 3015, 844, 1724, 3651, 2923, 1316, 4092, 2504, 3627, 1936, 2854,
-    2461, 3929, 1193, 421, 3746, 820, 1180, 286, 2261, 532, 3625, 1812, 802, 1327, 3527, 670, 3730, 2025, 3124, 3565, 529, 2960, 1769, 1390, 3196, 2494, 3756, 796, 3618, 2602, 3463, 2847, 166, 953, 1745, 2900, 438, 2070, 1418, 3741, 639, 1205, 1891, 2882, 2282, 4012, 1182, 1696, 3630, 951, 2904, 2170, 3530, 375, 2320, 2742, 1132, 701, 3216, 2023, 847, 1230, 310, 3431,
-    770, 1961, 3531, 1702, 2181, 3370, 1877, 3072, 1571, 3389, 1071, 2415, 3782, 2803, 1610, 2454, 1211, 182, 1655, 2322, 1282, 3372, 287, 3935, 704, 1232, 415, 1910, 2286, 1399, 556, 1964, 4068, 2444, 3605, 1272, 3345, 816, 3526, 256, 2402, 2777, 955, 345, 3289, 111, 2727, 635, 2396, 1488, 3331, 600, 1032, 1575, 4026, 515, 3507, 2433, 1605, 460, 3364, 2783, 1810, 1397,
-    2334, 223, 2945, 688, 2533, 99, 2705, 624, 3944, 2073, 46, 2978, 508, 2132, 269, 3173, 3453, 2631, 4076, 694, 1892, 2586, 972, 2178, 3470, 1695, 2849, 3141, 77, 3884, 994, 3029, 1536, 673, 3083, 124, 2583, 1722, 2821, 1944, 4027, 1661, 3176, 3728, 1337, 1813, 3503, 2035, 3930, 157, 2537, 1865, 3096, 2646, 1941, 3252, 1449, 135, 2836, 3758, 2139, 84, 3678, 3106,
-    3862, 1545, 3307, 1320, 3955, 1031, 3664, 1306, 2460, 776, 1487, 3294, 1187, 3990, 1903, 1021, 549, 1484, 943, 3027, 97, 3853, 1499, 2880, 198, 2575, 3995, 1089, 1587, 2475, 3282, 339, 2657, 1158, 2105, 1493, 3943, 580, 3232, 1287, 846, 48, 2480, 2112, 771, 2534, 459, 3134, 850, 1298, 3790, 325, 3652, 1249, 193, 940, 2202, 3895, 1829, 911, 1366, 2577, 1069, 534,
-    2104, 1009, 2667, 392, 1983, 2917, 1645, 324, 3439, 2869, 3705, 1767, 2592, 756, 2916, 3683, 2276, 2850, 2053, 3594, 2403, 3181, 634, 3699, 1933, 906, 519, 2150, 3673, 764, 1770, 2220, 3795, 3336, 502, 3547, 2339, 1110, 301, 2210, 3354, 3643, 569, 1518, 2940, 3973, 1138, 1613, 2773, 2127, 2983, 1671, 769, 2161, 3800, 2730, 3127, 1179, 533, 3259, 2284, 4014, 1651, 2820,
-    3566, 653, 1839, 3455, 2399, 789, 3149, 2244, 1863, 1099, 474, 2307, 158, 3541, 1312, 1711, 0, 3902, 360, 1629, 1091, 395, 1781, 1191, 2374, 3353, 1419, 3225, 206, 2931, 3553, 1046, 54, 1646, 2470, 910, 1860, 3137, 3770, 2635, 1562, 2809, 1215, 3788, 222, 2199, 3335, 67, 3606, 524, 1001, 3309, 2410, 3473, 591, 1619, 291, 2502, 3629, 2891, 335, 741, 3378, 168,
-    2384, 3129, 4051, 22, 1444, 3613, 543, 3893, 186, 2665, 4062, 933, 3058, 2142, 449, 2711, 3224, 849, 1330, 3349, 2195, 2670, 3484, 2993, 32, 3774, 2722, 1859, 2548, 1268, 583, 2027, 3165, 2807, 4029, 227, 2897, 1434, 721, 1816, 195, 905, 2066, 3258, 1754, 970, 2674, 1880, 2338, 3915, 1485, 2660, 14, 1313, 2914, 2046, 4074, 791, 1917, 1301, 1725, 2687, 2019, 1443,
-    418, 1186, 1664, 2859, 1049, 2056, 2741, 1226, 1589, 3186, 2042, 1377, 3449, 1574, 3941, 1063, 1930, 2501, 3751, 2930, 671, 4031, 888, 2081, 1544, 684, 1117, 351, 4052, 1698, 2393, 3881, 1439, 785, 1277, 2013, 3488, 441, 2459, 3980, 3061, 3481, 2543, 419, 3020, 609, 3515, 1350, 799, 2878, 348, 2034, 3966, 1824, 950, 3281, 1394, 2239, 3452, 55, 3922, 3119, 892, 3785,
-    3023, 2140, 782, 2492, 3817, 241, 3355, 2424, 856, 3639, 612, 2556, 245, 2858, 705, 2316, 3562, 495, 1748, 128, 1912, 1454, 280, 2552, 3905, 3130, 2274, 3472, 834, 3055, 240, 2692, 471, 2272, 3301, 2632, 1080, 3693, 2136, 1029, 1364, 590, 1611, 4067, 1190, 2360, 3827, 261, 3180, 1768, 3471, 1103, 3003, 520, 3674, 151, 2571, 555, 3033, 982, 2353, 504, 1259, 2555,
-    149, 3889, 3380, 493, 3178, 1681, 663, 1924, 2990, 49, 1792, 3861, 1192, 1987, 3273, 297, 1457, 3043, 1177, 2292, 3249, 2829, 3682, 1154, 1758, 428, 2872, 1993, 1500, 3703, 1129, 3421, 1840, 3754, 163, 659, 1733, 3182, 38, 2875, 1957, 3614, 2237, 78, 1873, 2801, 1513, 2121, 1074, 2516, 667, 3710, 1429, 2430, 2088, 2830, 1072, 3557, 1531, 2733, 1955, 3286, 3590, 1826,
-    2778, 1068, 1932, 1452, 2279, 1185, 3564, 3952, 1391, 2726, 3313, 2331, 870, 3709, 1674, 2772, 4085, 808, 2596, 3848, 927, 538, 2335, 3334, 773, 3597, 1347, 109, 2663, 608, 2108, 2994, 936, 1524, 2922, 3968, 2422, 1467, 845, 3870, 321, 2704, 1073, 3308, 3680, 823, 430, 3375, 4030, 112, 2171, 2695, 267, 3374, 731, 1627, 3919, 1871, 352, 3839, 1370, 234, 794, 1532,
-    3245, 647, 3575, 74, 3045, 2766, 285, 2174, 498, 1059, 1551, 385, 3125, 2598, 143, 1128, 2095, 3395, 318, 1590, 3524, 1345, 1969, 242, 2759, 2092, 947, 3926, 3244, 2356, 1658, 6, 3593, 2554, 1172, 1995, 371, 2755, 3417, 2294, 1570, 3164, 748, 2517, 1401, 3111, 2420, 1662, 2910, 1276, 3276, 854, 1804, 4000, 1253, 2987, 229, 2344, 3184, 649, 2196, 2921, 4095, 2389,
-    1289, 2193, 2579, 4023, 757, 1858, 986, 3199, 2514, 3475, 4021, 2154, 651, 1432, 3468, 2404, 574, 1799, 3105, 2145, 86, 2614, 3218, 1565, 4088, 2481, 3079, 1815, 323, 1212, 3837, 759, 2159, 435, 3223, 784, 3659, 1114, 1888, 550, 1221, 3786, 1803, 499, 2117, 185, 3763, 942, 589, 2001, 3838, 1483, 3154, 2256, 468, 2544, 3403, 898, 1208, 2610, 3622, 967, 1929, 378,
-    3781, 220, 1656, 1115, 3347, 2428, 3822, 1577, 712, 1959, 110, 2765, 1762, 3854, 979, 2928, 3714, 1371, 746, 3969, 2884, 975, 3779, 641, 1142, 159, 1460, 702, 3485, 2866, 2495, 3330, 1305, 3937, 1635, 2229, 2962, 146, 4055, 3091, 2417, 100, 3508, 2933, 4006, 1167, 1920, 2760, 3552, 2545, 433, 2845, 142, 1056, 1886, 3616, 1435, 2099, 3803, 1749, 27, 1446, 3350, 2843,
-    884, 3310, 2948, 2103, 447, 1351, 187, 2895, 3655, 1256, 3036, 932, 3325, 2257, 451, 1915, 40, 2780, 2438, 1112, 1814, 423, 2290, 1905, 2898, 3419, 2306, 3760, 1938, 486, 1019, 1791, 3010, 2628, 203, 3408, 1269, 2507, 1606, 862, 2779, 2078, 952, 1529, 2638, 708, 3332, 1413, 2, 1726, 1156, 3500, 2392, 3791, 3076, 812, 107, 2861, 501, 3050, 3487, 2455, 594, 1731,
-    2685, 1498, 680, 3908, 2621, 3529, 1786, 2236, 342, 2569, 1526, 3722, 230, 1290, 3203, 3947, 1609, 3516, 467, 3267, 3685, 1461, 3140, 3569, 367, 1759, 928, 2754, 1332, 2219, 4034, 260, 655, 1984, 978, 3814, 617, 2086, 3525, 279, 3841, 1373, 3361, 319, 2251, 3066, 407, 2382, 3918, 3133, 2168, 762, 1523, 507, 2641, 1677, 4025, 2413, 1584, 793, 2049, 1109, 3962, 2218,
-    1194, 3692, 266, 1687, 981, 3103, 740, 3983, 1005, 3434, 570, 2383, 1942, 2718, 676, 2462, 1007, 2089, 1308, 2222, 233, 2568, 829, 1241, 2669, 3987, 514, 3303, 69, 3142, 1603, 3560, 2295, 3288, 1497, 2696, 1764, 2865, 1058, 3271, 1914, 477, 2529, 3927, 1736, 1273, 3752, 2029, 1012, 565, 2798, 4078, 1949, 3305, 1175, 2179, 380, 3366, 1195, 3849, 2637, 416, 2959, 125,
-    3396, 2467, 2036, 3234, 2340, 68, 2819, 1436, 2011, 3139, 1704, 4073, 860, 3582, 1468, 2969, 211, 3157, 4056, 866, 2935, 2000, 3923, 31, 2157, 1477, 2429, 1147, 3792, 2557, 774, 2802, 1153, 3747, 464, 3192, 42, 3904, 539, 1474, 2283, 803, 2876, 1061, 75, 3477, 747, 2893, 1538, 3626, 251, 1322, 2506, 189, 2791, 3667, 939, 2991, 1971, 175, 3195, 1416, 3648, 1857,
-    3052, 454, 851, 3789, 1271, 1906, 3694, 2484, 406, 2757, 26, 1189, 2909, 296, 2215, 3784, 1864, 637, 2715, 1673, 3445, 581, 1572, 3059, 3469, 761, 2984, 1737, 2058, 440, 1414, 1921, 121, 2527, 894, 2223, 1302, 2377, 3077, 2666, 3759, 3198, 1811, 3661, 2166, 2731, 1883, 359, 3285, 2458, 1805, 3459, 926, 3834, 675, 1893, 1496, 2612, 657, 3523, 1763, 2354, 564, 961,
-    1367, 3977, 1588, 2714, 322, 3446, 1088, 625, 3887, 1354, 3535, 2090, 3316, 1760, 1127, 483, 3491, 1421, 2301, 94, 1202, 3740, 2311, 1014, 1878, 3836, 180, 3412, 991, 2868, 3953, 3450, 3081, 1632, 4071, 1882, 3543, 726, 1719, 179, 1171, 364, 1420, 622, 3090, 1490, 946, 4007, 2212, 1102, 619, 2739, 2189, 1669, 2937, 3426, 39, 3940, 2191, 1264, 887, 4091, 2792, 2135,
-    4, 2883, 2281, 631, 3044, 1641, 2232, 3243, 1773, 2319, 827, 2591, 629, 3938, 2426, 3222, 2629, 1044, 3879, 3293, 1952, 2749, 275, 2590, 472, 1372, 2496, 660, 3669, 2264, 208, 915, 2167, 561, 2828, 307, 3265, 1104, 3964, 2155, 3425, 1951, 4077, 2391, 283, 3387, 2581, 115, 1415, 3069, 3896, 141, 3158, 1214, 442, 2405, 1349, 3085, 425, 2528, 3002, 312, 1602, 3588,
-    1137, 3323, 1963, 1002, 3578, 2521, 127, 925, 2970, 273, 3737, 1573, 167, 2863, 1509, 800, 147, 2059, 2942, 409, 921, 3151, 1451, 3909, 3333, 2844, 2096, 1512, 3136, 1210, 1798, 2709, 1331, 3586, 1034, 1521, 2441, 2926, 488, 2585, 775, 3031, 2693, 879, 3602, 1173, 2028, 3654, 2781, 841, 1975, 1507, 3646, 768, 3991, 2012, 996, 3544, 1666, 3810, 1990, 3360, 753, 2597,
-    3736, 304, 1473, 3828, 485, 1334, 4008, 2072, 3495, 1136, 2806, 2004, 3236, 1010, 2130, 3819, 1750, 3567, 644, 2515, 1794, 3636, 698, 2137, 1162, 832, 3761, 326, 2613, 513, 3302, 3820, 357, 3163, 2259, 3733, 101, 1922, 1386, 3587, 1640, 28, 1286, 2141, 1761, 2918, 693, 1639, 457, 3250, 2434, 365, 2599, 1729, 3284, 2643, 306, 2793, 689, 1090, 104, 1309, 2305, 1831,
-    2776, 859, 2446, 2915, 1778, 3337, 2677, 614, 1508, 2409, 469, 4033, 1321, 3563, 402, 3131, 2720, 1093, 1569, 4042, 1229, 2277, 216, 3046, 1817, 57, 3006, 1684, 4059, 2016, 795, 2440, 1652, 1960, 610, 2763, 920, 3864, 3110, 1026, 2326, 3762, 3233, 521, 3856, 173, 2457, 3939, 2138, 1262, 3572, 989, 3021, 2238, 119, 1445, 3832, 1809, 2297, 3467, 2700, 3684, 3102, 394,
-    4036, 2050, 3256, 89, 2198, 1079, 248, 1845, 3805, 3104, 880, 1779, 2688, 717, 2373, 1375, 262, 2249, 3071, 13, 2813, 3429, 1600, 3984, 2416, 3603, 1299, 2298, 998, 3492, 1393, 2951, 10, 4009, 1247, 3462, 1679, 2204, 414, 2736, 316, 1894, 2816, 1050, 3373, 1462, 3107, 817, 3464, 21, 1835, 4070, 568, 1178, 3718, 875, 3168, 466, 2974, 1458, 2084, 616, 1564, 1018,
-    1693, 546, 1244, 3899, 716, 3160, 3608, 2877, 1220, 334, 3443, 2270, 44, 3000, 1843, 3928, 3405, 766, 3686, 2040, 587, 993, 2647, 387, 930, 2753, 630, 3274, 150, 2808, 453, 3638, 1092, 2352, 3030, 239, 2562, 700, 3240, 1257, 4016, 730, 1515, 2203, 2551, 417, 1866, 1123, 2348, 2902, 1550, 2678, 2075, 3238, 1630, 2531, 2115, 1255, 4054, 840, 290, 3874, 2477, 3399,
-    2250, 3577, 2817, 1626, 2576, 1356, 2315, 792, 2087, 2618, 1612, 3855, 1263, 3637, 1036, 494, 1535, 2553, 1198, 1715, 3867, 3170, 1359, 1954, 3483, 1539, 2069, 3886, 1772, 2487, 1534, 2045, 3242, 806, 1578, 2018, 3948, 1423, 3596, 2076, 2466, 3424, 139, 3688, 871, 4049, 2852, 3342, 547, 3719, 327, 852, 3505, 207, 2794, 542, 3600, 45, 2411, 3324, 1788, 3012, 1235, 61,
-    2655, 917, 253, 1986, 3738, 313, 1706, 4072, 120, 3229, 957, 597, 2024, 3262, 2453, 2857, 2002, 3190, 210, 2784, 2206, 300, 2400, 3766, 553, 3152, 218, 1150, 2988, 883, 3753, 627, 2664, 3831, 437, 3385, 1008, 2957, 60, 1636, 891, 2899, 1776, 3062, 1315, 2026, 194, 1643, 2079, 1296, 3201, 2465, 1379, 1927, 3898, 1125, 1847, 2846, 1552, 1028, 2725, 2169, 787, 3202,
-    1441, 3982, 3032, 1052, 3251, 605, 2639, 3073, 1431, 3642, 2329, 2949, 341, 1634, 833, 129, 4020, 916, 3571, 669, 1506, 3411, 821, 2856, 1207, 2337, 2683, 3448, 340, 2214, 3128, 235, 1738, 1288, 2833, 2419, 606, 1884, 2668, 552, 3765, 1176, 399, 2302, 596, 3591, 2634, 767, 3845, 2767, 995, 3967, 491, 3057, 814, 2300, 3422, 691, 3797, 254, 3645, 509, 3478, 1836,
-    2119, 475, 2445, 1525, 2175, 3539, 914, 1926, 473, 1157, 1800, 3971, 2701, 3739, 2129, 3486, 1333, 1784, 2366, 2982, 1070, 4089, 1802, 73, 1642, 3958, 835, 1837, 1480, 4043, 1217, 2469, 3416, 2113, 88, 3668, 1240, 3255, 3920, 2355, 3167, 2003, 2645, 3936, 3228, 1592, 1144, 3474, 2394, 79, 1820, 2241, 1594, 3656, 2584, 153, 1448, 3034, 2005, 2511, 1692, 1335, 3913, 217,
-    2822, 3391, 745, 3813, 192, 1274, 2941, 3847, 2489, 3440, 744, 161, 1422, 1086, 572, 3004, 2617, 338, 3807, 2031, 236, 2472, 3065, 2098, 3358, 362, 2163, 3574, 497, 2788, 1970, 948, 3885, 685, 3100, 1712, 2228, 292, 1408, 1016, 164, 3537, 1417, 941, 34, 2172, 3001, 358, 1491, 3147, 699, 3356, 258, 1149, 2946, 1787, 3931, 382, 1146, 3291, 818, 2890, 2379, 1096,
-    3679, 1328, 1901, 3162, 2747, 1730, 2253, 5, 1556, 2818, 2093, 3166, 2522, 3410, 2287, 1701, 956, 3237, 620, 1596, 3300, 1307, 511, 3701, 1020, 2939, 1362, 2532, 3208, 749, 3641, 160, 1522, 2624, 1095, 4086, 826, 2841, 3583, 2173, 1727, 723, 2925, 1911, 2482, 3726, 863, 1962, 4028, 1111, 2835, 3773, 2449, 2022, 582, 3278, 923, 2619, 2152, 4039, 92, 1934, 3145, 677,
-    2530, 53, 2303, 1003, 458, 3989, 739, 3321, 1064, 369, 3556, 877, 1900, 426, 3876, 1, 3617, 2106, 1197, 2805, 3634, 857, 2706, 1504, 2418, 682, 3868, 20, 1139, 1688, 2333, 3311, 2907, 1945, 265, 2385, 3433, 1601, 636, 2620, 3095, 4044, 386, 3382, 1184, 527, 2814, 3414, 2342, 465, 1889, 1343, 874, 3479, 1502, 2233, 3689, 1385, 559, 2745, 1463, 3465, 376, 1718,
-    3217, 4045, 1580, 3612, 2525, 1228, 3018, 1958, 3725, 2358, 1361, 3996, 1581, 3063, 1224, 2737, 1475, 2442, 3946, 191, 1796, 2128, 3975, 134, 1916, 3318, 1597, 2071, 3749, 2672, 403, 1278, 602, 3745, 3220, 1374, 445, 2064, 3830, 243, 1252, 2390, 1563, 2724, 3875, 1818, 1346, 165, 1650, 3264, 2680, 117, 2998, 4081, 343, 2799, 9, 3122, 1743, 3724, 1040, 2231, 3842, 1209,
-    900, 398, 2851, 697, 1797, 3482, 293, 2679, 1649, 566, 2954, 91, 2697, 714, 2060, 3211, 781, 480, 3040, 1038, 2611, 666, 2989, 3458, 1201, 2796, 548, 2975, 839, 3121, 1850, 4001, 2208, 1631, 790, 2558, 2972, 1148, 3213, 1849, 3624, 971, 2102, 108, 772, 3101, 2589, 3777, 1042, 656, 3907, 2097, 1615, 2540, 805, 1935, 1231, 3494, 2451, 268, 2995, 750, 2682, 2020,
-    3024, 1392, 2124, 3279, 106, 2217, 1387, 822, 3214, 3825, 2160, 1000, 2395, 3691, 228, 4038, 1872, 3413, 1608, 2225, 3536, 303, 1653, 886, 2541, 224, 4037, 2252, 1428, 172, 3504, 958, 2848, 113, 3628, 1834, 3979, 19, 2317, 779, 2797, 518, 3174, 3549, 1482, 2266, 444, 2014, 3555, 2439, 1213, 3113, 535, 1135, 3204, 3858, 2309, 931, 623, 2009, 3359, 1566, 140, 3550,
-    1808, 3872, 2488, 1152, 3764, 2892, 3960, 2412, 353, 1223, 1825, 3444, 3116, 1717, 1082, 2313, 1280, 2661, 82, 3852, 1389, 3200, 2330, 3812, 2038, 3581, 1728, 1039, 3339, 2427, 586, 2580, 1238, 3328, 2280, 1047, 595, 2662, 1363, 3338, 1620, 3934, 2497, 1881, 1054, 3954, 3215, 864, 2887, 1801, 320, 3519, 2378, 3704, 1753, 424, 2958, 1660, 4005, 2601, 1116, 3912, 2381, 573,
-    2740, 200, 828, 1667, 432, 1931, 1035, 1616, 3598, 2640, 728, 264, 1437, 557, 3501, 2966, 372, 3734, 974, 1978, 758, 2719, 1145, 452, 1433, 725, 2681, 408, 3843, 1918, 1547, 3906, 1996, 503, 1456, 3019, 3493, 1700, 3742, 355, 2134, 176, 1311, 615, 2867, 315, 1680, 1314, 8, 3297, 1494, 783, 1950, 83, 2656, 1382, 3561, 138, 2834, 1404, 330, 1904, 3156, 1027,
-    1357, 3381, 3041, 3666, 2729, 734, 3415, 177, 3051, 2021, 4079, 2823, 3775, 2186, 2616, 869, 1668, 3148, 2367, 3315, 393, 4075, 1870, 2920, 3343, 2362, 3188, 1303, 2782, 825, 3171, 259, 2905, 3717, 2538, 184, 2074, 838, 2860, 2407, 1024, 3496, 3008, 3706, 1985, 2349, 3623, 2582, 4058, 2184, 2694, 3873, 2964, 990, 3346, 690, 2033, 1066, 2201, 3490, 2971, 718, 3700, 2188,
-    4061, 391, 1989, 2325, 1430, 3150, 2125, 2526, 592, 1403, 976, 2351, 1165, 1851, 114, 3921, 2063, 613, 1358, 2785, 1623, 2254, 25, 3542, 1045, 246, 1852, 3554, 87, 2243, 3615, 1169, 727, 1705, 968, 3957, 3185, 1251, 500, 4063, 1751, 2622, 842, 1519, 90, 3393, 819, 490, 1874, 999, 571, 1275, 2271, 1586, 4040, 2448, 3126, 3731, 436, 885, 1708, 2421, 24, 1599,
-    889, 2563, 1199, 645, 70, 4013, 1237, 3723, 1694, 3499, 3, 3266, 484, 2997, 3390, 1233, 2842, 3687, 152, 3480, 1084, 3698, 881, 2490, 1542, 3992, 2209, 692, 1690, 3022, 1470, 2625, 2114, 3512, 2359, 381, 2684, 1897, 3368, 1395, 3080, 289, 2065, 3981, 2758, 1141, 3097, 1472, 2870, 3352, 3707, 225, 3159, 505, 1895, 214, 1222, 1774, 2686, 3978, 3275, 1196, 3518, 2825,
-    3270, 1720, 3796, 3466, 2650, 1841, 298, 899, 2862, 2091, 2671, 1744, 3735, 801, 1560, 349, 2262, 903, 1833, 2524, 512, 3117, 1793, 2827, 476, 3038, 1216, 2550, 3826, 980, 431, 4048, 35, 2992, 1265, 1595, 765, 3675, 76, 2247, 696, 3456, 1254, 2452, 664, 1757, 2133, 3750, 145, 2332, 1554, 1981, 3580, 2712, 868, 3640, 2919, 638, 2275, 1427, 309, 2595, 2006, 492,
-    2226, 178, 2911, 836, 1528, 3028, 2240, 3327, 404, 3970, 707, 1294, 2464, 2131, 4032, 2600, 3319, 1406, 2913, 3974, 2156, 1425, 221, 3877, 2017, 811, 3662, 272, 3287, 1988, 2408, 3357, 1746, 598, 3239, 3823, 2182, 2934, 1078, 2604, 3840, 1697, 2906, 413, 3210, 3880, 331, 2644, 1260, 848, 3042, 2535, 1077, 1438, 3261, 2365, 1561, 3799, 85, 3082, 1876, 674, 3932, 1101,
-    3644, 1344, 1943, 2401, 390, 3835, 1048, 2572, 1541, 1133, 3075, 3584, 308, 2889, 1065, 1869, 601, 3783, 282, 1181, 736, 3312, 2368, 1126, 3383, 1675, 2734, 1426, 628, 2873, 1317, 843, 2717, 2048, 1004, 2536, 333, 1782, 3295, 1517, 219, 2153, 815, 3502, 1579, 2268, 987, 3409, 1780, 4018, 354, 665, 3914, 47, 1956, 456, 1006, 2010, 3406, 1130, 3621, 2894, 1549, 3092,
-    2485, 640, 3993, 3179, 1270, 3436, 585, 1925, 3757, 2304, 136, 1976, 1486, 646, 3520, 50, 3155, 1637, 2435, 3522, 1937, 2756, 3748, 661, 2224, 58, 3230, 2357, 1830, 3892, 170, 3607, 1447, 3949, 190, 3392, 1336, 584, 4010, 918, 3016, 3670, 1155, 2406, 52, 1304, 3009, 607, 2085, 2699, 3205, 1848, 2291, 3402, 2764, 3865, 3048, 2508, 735, 2710, 443, 2341, 897, 263,
-    1785, 2769, 983, 56, 2197, 1685, 2703, 202, 2944, 810, 3377, 2626, 3787, 3047, 2055, 1236, 2752, 2122, 945, 3093, 96, 1624, 439, 3014, 1388, 4015, 977, 448, 3506, 1098, 2242, 3026, 506, 2361, 2952, 1862, 3619, 2790, 1992, 2483, 525, 1868, 2652, 4093, 1998, 3595, 2478, 3816, 122, 1412, 929, 3716, 1166, 1648, 813, 1300, 199, 1489, 3998, 1771, 1310, 3808, 2052, 3423,
-    434, 3712, 1625, 3558, 2955, 853, 4019, 1348, 3511, 1732, 1246, 487, 934, 1672, 2510, 3965, 788, 3711, 396, 1369, 4090, 1055, 2603, 1879, 3528, 2518, 2067, 3005, 1516, 2588, 751, 1740, 3418, 1131, 1576, 686, 2296, 1118, 18, 3263, 1365, 3401, 294, 737, 3177, 410, 867, 1633, 2963, 3579, 2375, 252, 2881, 479, 2471, 3576, 2180, 3306, 332, 2255, 3035, 41, 2648, 1396,
-    2929, 2230, 1219, 2512, 446, 2008, 3189, 2388, 626, 2164, 2831, 4047, 2376, 174, 3272, 368, 1469, 3226, 2578, 1991, 2874, 2263, 3681, 876, 188, 1239, 683, 3776, 226, 3183, 4083, 2148, 63, 2649, 3859, 299, 3086, 3933, 1585, 2185, 3767, 988, 1707, 2908, 1407, 1844, 2771, 2245, 1161, 560, 1755, 3376, 2051, 4064, 3135, 1832, 652, 2853, 1051, 3649, 760, 3290, 1105, 3945,
-    872, 154, 3207, 713, 3780, 1453, 281, 1087, 3695, 30, 3299, 1919, 1400, 3551, 1119, 1890, 2314, 618, 1703, 3428, 724, 295, 3146, 1557, 3341, 2896, 1683, 2723, 1974, 1017, 541, 1380, 3720, 804, 3280, 2082, 997, 2567, 777, 2961, 213, 2707, 2328, 3632, 1025, 3891, 3304, 255, 4003, 3108, 2587, 1323, 743, 1479, 105, 1013, 3901, 1618, 2044, 2627, 1465, 1846, 576, 1994,
-    2560, 3521, 1742, 2118, 2800, 3404, 1783, 2609, 2968, 1582, 1022, 412, 2713, 687, 2976, 3857, 2761, 3620, 62, 1108, 3844, 1340, 2100, 540, 2345, 3925, 405, 3457, 1319, 2468, 3362, 2815, 1867, 2372, 1281, 1714, 3690, 482, 3498, 1842, 1285, 3994, 558, 2039, 81, 2499, 678, 1481, 1923, 964, 12, 3824, 2980, 2205, 2762, 3432, 2398, 181, 3247, 462, 4094, 2350, 3589, 3089,
-    1555, 1094, 4041, 247, 1267, 908, 3959, 2041, 732, 3860, 2343, 3132, 3769, 2144, 1621, 237, 912, 1329, 3025, 2146, 2642, 1775, 3721, 2746, 1121, 1953, 902, 2285, 130, 3671, 1659, 278, 3153, 522, 2721, 123, 2996, 1466, 2380, 377, 3231, 873, 1510, 3476, 3123, 1250, 2147, 3650, 2839, 3451, 2323, 1122, 3545, 379, 1765, 1218, 603, 3768, 1360, 938, 2885, 133, 1245, 363,
-    2364, 554, 2743, 3344, 2474, 530, 3112, 169, 1297, 3430, 536, 1741, 98, 1043, 2574, 3253, 2246, 1854, 4022, 510, 3283, 204, 858, 3398, 36, 3118, 1478, 3794, 2986, 706, 2176, 922, 3559, 1097, 3976, 3322, 2149, 1160, 2810, 3883, 2007, 2513, 2953, 328, 1721, 3793, 422, 2566, 807, 329, 1638, 1967, 648, 2520, 3727, 3109, 2116, 2927, 2491, 1939, 3365, 1709, 2728, 3815,
-    2037, 3120, 831, 1405, 1896, 3592, 1622, 2369, 2864, 2151, 1107, 2542, 3532, 1410, 3917, 427, 3568, 709, 2509, 1503, 1037, 2973, 2436, 1604, 4035, 2594, 563, 1819, 2659, 1234, 4004, 2565, 1511, 2273, 1823, 336, 882, 3772, 575, 1628, 171, 3570, 1120, 2260, 2716, 935, 3064, 1806, 1342, 3144, 3900, 2744, 3296, 985, 1546, 238, 896, 1663, 305, 3660, 695, 2213, 960, 3407,
-    144, 1795, 3894, 2267, 51, 2708, 1023, 3818, 366, 1821, 4087, 2985, 755, 2057, 2912, 949, 1583, 2774, 231, 3447, 2258, 3866, 1982, 672, 1225, 2077, 3320, 1062, 370, 3241, 1968, 7, 3068, 681, 3631, 2573, 1567, 3175, 2321, 1067, 3070, 722, 1856, 3744, 642, 1471, 4084, 131, 3514, 2443, 531, 1227, 155, 2265, 4024, 2658, 3326, 3910, 1168, 3078, 1530, 3956, 489, 1424,
-    3647, 1203, 420, 2924, 3755, 719, 3248, 1376, 3067, 890, 196, 1559, 3269, 270, 2432, 1885, 3212, 1164, 3778, 1752, 579, 1338, 344, 3585, 3017, 288, 3658, 2371, 3882, 1691, 611, 2789, 3809, 1339, 389, 2950, 2015, 59, 3548, 2751, 2158, 4011, 1352, 29, 3388, 2370, 2812, 1946, 954, 2110, 1558, 2947, 3573, 1909, 1326, 679, 1853, 2312, 551, 2702, 33, 2414, 3209, 2824,
-    2547, 2143, 3379, 966, 1492, 1979, 2479, 463, 2194, 3657, 2738, 2318, 1261, 3713, 604, 4002, 11, 2192, 2967, 919, 2607, 3369, 2837, 1676, 2539, 984, 1568, 93, 2901, 1318, 3538, 1041, 2216, 1756, 3454, 1030, 4050, 1402, 798, 1723, 311, 3277, 2546, 2886, 2043, 461, 1206, 3677, 361, 3260, 3988, 809, 2605, 470, 3007, 3517, 102, 3221, 1398, 2062, 3611, 1134, 1928, 865,
-    4060, 621, 1710, 2606, 3510, 317, 4017, 1682, 3329, 1159, 1940, 654, 3461, 1789, 1015, 2691, 1455, 3599, 374, 1947, 4069, 71, 2126, 763, 3961, 2278, 3161, 1997, 824, 2623, 2080, 244, 3257, 780, 2732, 2308, 545, 3351, 2476, 3806, 1204, 588, 1591, 963, 3610, 1699, 754, 3049, 2651, 1106, 65, 2221, 1644, 3821, 1100, 2463, 1614, 3801, 965, 2965, 715, 3394, 1593, 212,
-};
-
-#endif /* BLUE_NOISE_64X64_H */
diff --git a/vendor/pixman/pixman/dither/make-blue-noise.c b/vendor/pixman/pixman/dither/make-blue-noise.c
deleted file mode 100644
index f9974b4d4..000000000
--- a/vendor/pixman/pixman/dither/make-blue-noise.c
+++ /dev/null
@@ -1,679 +0,0 @@
-/* Blue noise generation using the void-and-cluster method as described in
- *
- *     The void-and-cluster method for dither array generation
- *     Ulichney, Robert A (1993)
- *
- *     http://cv.ulichney.com/papers/1993-void-cluster.pdf
- *
- * Note that running with openmp (-DUSE_OPENMP) will trigger additional
- * randomness due to computing reductions in parallel, and is not recommended
- * unless generating very large dither arrays.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <math.h>
-#include <stdio.h>
-
-/* Booleans and utility functions */
-
-#ifndef TRUE
-#   define TRUE 1
-#endif
-
-#ifndef FALSE
-#   define FALSE 0
-#endif
-
-typedef int bool_t;
-
-int
-imin (int x, int y)
-{
-    return x < y ? x : y;
-}
-
-/* Memory allocation */
-void *
-malloc_abc (unsigned int a, unsigned int b, unsigned int c)
-{
-    if (a >= INT32_MAX / b)
-	return NULL;
-    else if (a * b >= INT32_MAX / c)
-	return NULL;
-    else
-	return malloc (a * b * c);
-}
-
-/* Random number generation */
-typedef uint32_t xorwow_state_t[5];
-
-uint32_t
-xorwow_next (xorwow_state_t *state)
-{
-    uint32_t s = (*state)[0],
-    t = (*state)[3];
-    (*state)[3] = (*state)[2];
-    (*state)[2] = (*state)[1];
-    (*state)[1] = s;
-
-    t ^= t >> 2;
-    t ^= t << 1;
-    t ^= s ^ (s << 4);
-
-    (*state)[0] = t;
-    (*state)[4] += 362437;
-
-    return t + (*state)[4];
-}
-
-float
-xorwow_float (xorwow_state_t *s)
-{
-    return (xorwow_next (s) >> 9) / (float)((1 << 23) - 1);
-}
-
-/* Floating point matrices
- *
- * Used to cache the cluster sizes.
- */
-typedef struct matrix_t {
-    int width;
-    int height;
-    float *buffer;
-} matrix_t;
-
-bool_t
-matrix_init (matrix_t *matrix, int width, int height)
-{
-    float *buffer;
-
-    if (!matrix)
-	return FALSE;
-
-    buffer = malloc_abc (width, height, sizeof (float));
-
-    if (!buffer)
-	return FALSE;
-
-    matrix->buffer = buffer;
-    matrix->width  = width;
-    matrix->height = height;
-
-    return TRUE;
-}
-
-bool_t
-matrix_copy (matrix_t *dst, matrix_t const *src)
-{
-    float *srcbuf = src->buffer,
-	  *srcend = src->buffer + src->width * src->height,
-	  *dstbuf = dst->buffer;
-
-    if (dst->width != src->width || dst->height != src->height)
-	return FALSE;
-
-    while (srcbuf < srcend)
-	*dstbuf++ = *srcbuf++;
-
-    return TRUE;
-}
-
-float *
-matrix_get (matrix_t *matrix, int x, int y)
-{
-    return &matrix->buffer[y * matrix->width + x];
-}
-
-void
-matrix_destroy (matrix_t *matrix)
-{
-    free (matrix->buffer);
-}
-
-/* Binary patterns */
-typedef struct pattern_t {
-    int width;
-    int height;
-    bool_t *buffer;
-} pattern_t;
-
-bool_t
-pattern_init (pattern_t *pattern, int width, int height)
-{
-    bool_t *buffer;
-
-    if (!pattern)
-	return FALSE;
-
-    buffer = malloc_abc (width, height, sizeof (bool_t));
-
-    if (!buffer)
-	return FALSE;
-
-    pattern->buffer = buffer;
-    pattern->width  = width;
-    pattern->height = height;
-
-    return TRUE;
-}
-
-bool_t
-pattern_copy (pattern_t *dst, pattern_t const *src)
-{
-    bool_t *srcbuf = src->buffer,
-	   *srcend = src->buffer + src->width * src->height,
-	   *dstbuf = dst->buffer;
-
-    if (dst->width != src->width || dst->height != src->height)
-	return FALSE;
-
-    while (srcbuf < srcend)
-	*dstbuf++ = *srcbuf++;
-
-    return TRUE;
-}
-
-bool_t *
-pattern_get (pattern_t *pattern, int x, int y)
-{
-    return &pattern->buffer[y * pattern->width + x];
-}
-
-void
-pattern_fill_white_noise (pattern_t *pattern, float fraction,
-			  xorwow_state_t *s)
-{
-    bool_t *buffer = pattern->buffer;
-    bool_t *end    = buffer + (pattern->width * pattern->height);
-
-    while (buffer < end)
-	*buffer++ = xorwow_float (s) < fraction;
-}
-
-void
-pattern_destroy (pattern_t *pattern)
-{
-    free (pattern->buffer);
-}
-
-/* Dither arrays */
-typedef struct array_t {
-    int width;
-    int height;
-    uint32_t *buffer;
-} array_t;
-
-bool_t
-array_init (array_t *array, int width, int height)
-{
-    uint32_t *buffer;
-
-    if (!array)
-	return FALSE;
-
-    buffer = malloc_abc (width, height, sizeof (uint32_t));
-
-    if (!buffer)
-	return FALSE;
-
-    array->buffer = buffer;
-    array->width  = width;
-    array->height = height;
-
-    return TRUE;
-}
-
-uint32_t *
-array_get (array_t *array, int x, int y)
-{
-    return &array->buffer[y * array->width + x];
-}
-
-bool_t
-array_save_ppm (array_t *array, const char *filename)
-{
-    FILE *f = fopen(filename, "wb");
-
-    int i   = 0;
-    int bpp = 2;
-    uint8_t buffer[1024];
-
-    if (!f)
-	return FALSE;
-
-    if (array->width * array->height - 1 < 256)
-	bpp = 1;
-
-    fprintf(f, "P5 %d %d %d\n", array->width, array->height,
-	    array->width * array->height - 1);
-    while (i < array->width * array->height)
-    {
-	    int j = 0;
-	    for (; j < 1024 / bpp && j < array->width * array->height; ++j)
-	    {
-		    uint32_t v = array->buffer[i + j];
-		    if (bpp == 2)
-		    {
-			buffer[2 * j] = v & 0xff;
-			buffer[2 * j + 1] = (v & 0xff00) >> 8;
-		    } else {
-			buffer[j] = v;
-		    }
-	    }
-
-	    fwrite((void *)buffer, bpp, j, f);
-	    i += j;
-    }
-
-    if (fclose(f) != 0)
-	return FALSE;
-
-    return TRUE;
-}
-
-bool_t
-array_save (array_t *array, const char *filename)
-{
-    int x, y;
-    FILE *f = fopen(filename, "wb");
-
-    if (!f)
-	return FALSE;
-
-    fprintf (f, 
-"/* WARNING: This file is generated by make-blue-noise.c\n"
-" * Please edit that file instead of this one.\n"
-" */\n"
-"\n"
-"#ifndef BLUE_NOISE_%dX%d_H\n"
-"#define BLUE_NOISE_%dX%d_H\n"
-"\n"
-"#include <stdint.h>\n"
-"\n", array->width, array->height, array->width, array->height);
-
-    fprintf (f, "static const uint16_t dither_blue_noise_%dx%d[%d] = {\n",
-	     array->width, array->height, array->width * array->height);
-
-    for (y = 0; y < array->height; ++y)
-    {
-	fprintf (f, "    ");
-	for (x = 0; x < array->width; ++x)
-	{
-	    if (x != 0)
-		fprintf (f, ", ");
-
-	    fprintf (f, "%d", *array_get (array, x, y));
-	}
-
-	fprintf (f, ",\n");
-    }
-    fprintf (f, "};\n");
-
-    fprintf (f, "\n#endif /* BLUE_NOISE_%dX%d_H */\n",
-	     array->width, array->height);
-
-    if (fclose(f) != 0)
-	return FALSE;
-
-    return TRUE;
-}
-
-void
-array_destroy (array_t *array)
-{
-    free (array->buffer);
-}
-
-/* Dither array generation */
-bool_t
-compute_cluster_sizes (pattern_t *pattern, matrix_t *matrix)
-{
-    int width  = pattern->width,
-	height = pattern->height;
-
-    if (matrix->width != width || matrix->height != height)
-	return FALSE;
-
-    int px, py, qx, qy, dx, dy;
-    float tsqsi = 2.f * 1.5f * 1.5f;
-
-#ifdef USE_OPENMP
-#pragma omp parallel for default (none) \
-    private (py, px, qy, qx, dx, dy) \
-    shared (height, width, pattern, matrix, tsqsi)
-#endif
-    for (py = 0; py < height; ++py)
-    {
-	for (px = 0; px < width; ++px)
-	{
-	    bool_t pixel = *pattern_get (pattern, px, py);
-	    float dist   = 0.f;
-
-	    for (qx = 0; qx < width; ++qx)
-	    {
-		dx = imin (abs (qx - px), width - abs (qx - px));
-		dx = dx * dx;
-
-		for (qy = 0; qy < height; ++qy)
-		{
-		    dy = imin (abs (qy - py), height - abs (qy - py));
-		    dy = dy * dy;
-
-		    dist += (pixel == *pattern_get (pattern, qx, qy))
-			* expf (- (dx + dy) / tsqsi);
-		}
-	    }
-
-	    *matrix_get (matrix, px, py) = dist;
-	}
-    }
-
-    return TRUE;
-}
-
-bool_t
-swap_pixel (pattern_t *pattern, matrix_t *matrix, int x, int y)
-{
-    int width  = pattern->width,
-	height = pattern->height;
-
-    bool_t new;
-
-    float f,
-          dist  = 0.f,
-	  tsqsi = 2.f * 1.5f * 1.5f;
-
-    int px, py, dx, dy;
-    bool_t b;
-
-    new = !*pattern_get (pattern, x, y);
-    *pattern_get (pattern, x, y) = new;
-
-    if (matrix->width != width || matrix->height != height)
-	return FALSE;
-
-
-#ifdef USE_OPENMP
-#pragma omp parallel for reduction (+:dist) default (none) \
-    private (px, py, dx, dy, b, f) \
-    shared (x, y, width, height, pattern, matrix, new, tsqsi)
-#endif
-    for (py = 0; py < height; ++py)
-    {
-	dy = imin (abs (py - y), height - abs (py - y));
-	dy = dy * dy;
-
-	for (px = 0; px < width; ++px)
-	{
-	    dx = imin (abs (px - x), width - abs (px - x));
-	    dx = dx * dx;
-
-	    b = (*pattern_get (pattern, px, py) == new);
-	    f = expf (- (dx + dy) / tsqsi);
-	    *matrix_get (matrix, px, py) += (2 * b - 1) * f;
-
-	    dist += b * f;
-	}
-    }
-
-    *matrix_get (matrix, x, y) = dist;
-    return TRUE;
-}
-
-void
-largest_cluster (pattern_t *pattern, matrix_t *matrix,
-		 bool_t pixel, int *xmax, int *ymax)
-{
-    int width       = pattern->width,
-	height      = pattern->height;
-
-    int   x, y;
-
-    float vmax = -INFINITY;
-
-#ifdef USE_OPENMP
-#pragma omp parallel default (none) \
-    private (x, y) \
-    shared (height, width, pattern, matrix, pixel, xmax, ymax, vmax)
-#endif
-    {
-	int xbest = -1,
-	    ybest = -1;
-
-#ifdef USE_OPENMP
-	float vbest = -INFINITY;
-
-#pragma omp for reduction (max: vmax) collapse (2)
-#endif
-	for (y = 0; y < height; ++y)
-	{
-	    for (x = 0; x < width; ++x)
-	    {
-		if (*pattern_get (pattern, x, y) != pixel)
-		    continue;
-
-		if (*matrix_get (matrix, x, y) > vmax)
-		{
-		    vmax = *matrix_get (matrix, x, y);
-#ifdef USE_OPENMP
-		    vbest = vmax;
-#endif
-		    xbest = x;
-		    ybest = y;
-		}
-	    }
-	}
-
-#ifdef USE_OPENMP
-#pragma omp barrier
-#pragma omp critical
-	{
-	    if (vmax == vbest)
-	    {
-		*xmax = xbest;
-		*ymax = ybest;
-	    }
-	}
-#else
-	*xmax = xbest;
-	*ymax = ybest;
-#endif
-    }
-
-    assert (vmax > -INFINITY);
-}
-
-void
-generate_initial_binary_pattern (pattern_t *pattern, matrix_t *matrix)
-{
-    int xcluster = 0,
-	ycluster = 0,
-	xvoid    = 0,
-	yvoid    = 0;
-
-    for (;;)
-    {
-	largest_cluster (pattern, matrix, TRUE, &xcluster, &ycluster);
-	assert (*pattern_get (pattern, xcluster, ycluster) == TRUE);
-	swap_pixel (pattern, matrix, xcluster, ycluster);
-
-	largest_cluster (pattern, matrix, FALSE, &xvoid, &yvoid);
-	assert (*pattern_get (pattern, xvoid, yvoid) == FALSE);
-	swap_pixel (pattern, matrix, xvoid, yvoid);
-
-	if (xcluster == xvoid && ycluster == yvoid)
-	    return;
-    }
-}
-
-bool_t
-generate_dither_array (array_t *array,
-		       pattern_t const *prototype, matrix_t const *matrix,
-		       pattern_t *temp_pattern, matrix_t *temp_matrix)
-{
-    int width        = prototype->width,
-	height       = prototype->height;
-
-    int x, y, rank;
-
-    int initial_rank = 0;
-
-    if (array->width != width || array->height != height)
-	return FALSE;
-
-    // Make copies of the prototype and associated sizes matrix since we will
-    // trash them
-    if (!pattern_copy (temp_pattern, prototype))
-	return FALSE;
-
-    if (!matrix_copy (temp_matrix, matrix))
-	return FALSE;
-
-    // Compute initial rank
-    for (y = 0; y < height; ++y)
-    {
-	for (x = 0; x < width; ++x)
-	{
-	    if (*pattern_get (temp_pattern, x, y))
-		initial_rank += 1;
-
-	    *array_get (array, x, y) = 0;
-	}
-    }
-
-    // Phase 1
-    for (rank = initial_rank; rank > 0; --rank)
-    {
-	largest_cluster (temp_pattern, temp_matrix, TRUE, &x, &y);
-	swap_pixel (temp_pattern, temp_matrix, x, y);
-	*array_get (array, x, y) = rank - 1;
-    }
-
-    // Make copies again for phases 2 & 3
-    if (!pattern_copy (temp_pattern, prototype))
-	return FALSE;
-
-    if (!matrix_copy (temp_matrix, matrix))
-	return FALSE;
-
-    // Phase 2 & 3
-    for (rank = initial_rank; rank < width * height; ++rank)
-    {
-	largest_cluster (temp_pattern, temp_matrix, FALSE, &x, &y);
-	swap_pixel (temp_pattern, temp_matrix, x, y);
-	*array_get (array, x, y) = rank;
-    }
-
-    return TRUE;
-}
-
-bool_t
-generate (int size, xorwow_state_t *s,
-	  char const *c_filename, char const *ppm_filename)
-{
-    bool_t ok = TRUE;
-
-    pattern_t prototype, temp_pattern;
-    array_t   array;
-    matrix_t  matrix, temp_matrix;
-
-    printf ("Generating %dx%d blue noise...\n", size, size);
-
-    if (!pattern_init (&prototype, size, size))
-	return FALSE;
-
-    if (!pattern_init (&temp_pattern, size, size))
-    {
-	pattern_destroy (&prototype);
-	return FALSE;
-    }
-
-    if (!matrix_init (&matrix, size, size))
-    {
-	pattern_destroy (&temp_pattern);
-	pattern_destroy (&prototype);
-	return FALSE;
-    }
-
-    if (!matrix_init (&temp_matrix, size, size))
-    {
-	matrix_destroy (&matrix);
-	pattern_destroy (&temp_pattern);
-	pattern_destroy (&prototype);
-	return FALSE;
-    }
-
-    if (!array_init (&array, size, size))
-    {
-	matrix_destroy (&temp_matrix);
-	matrix_destroy (&matrix);
-	pattern_destroy (&temp_pattern);
-	pattern_destroy (&prototype);
-	return FALSE;
-    }
-
-    printf("Filling initial binary pattern with white noise...\n");
-    pattern_fill_white_noise (&prototype, .1, s);
-
-    printf("Initializing cluster sizes...\n");
-    if (!compute_cluster_sizes (&prototype, &matrix))
-    {
-	fprintf (stderr, "Error while computing cluster sizes\n");
-	ok = FALSE;
-	goto out;
-    }
-
-    printf("Generating initial binary pattern...\n");
-    generate_initial_binary_pattern (&prototype, &matrix);
-
-    printf("Generating dither array...\n");
-    if (!generate_dither_array (&array, &prototype, &matrix,
-			 &temp_pattern, &temp_matrix))
-    {
-	fprintf (stderr, "Error while generating dither array\n");
-	ok = FALSE;
-	goto out;
-    }
-
-    printf("Saving dither array...\n");
-    if (!array_save (&array, c_filename))
-    {
-	fprintf (stderr, "Error saving dither array\n");
-	ok = FALSE;
-	goto out;
-    }
-
-#if SAVE_PPM
-    if (!array_save_ppm (&array, ppm_filename))
-    {
-	fprintf (stderr, "Error saving dither array PPM\n");
-	ok = FALSE;
-	goto out;
-    }
-#else
-    (void)ppm_filename;
-#endif
-
-    printf("All done!\n");
-
-out:
-    array_destroy (&array);
-    matrix_destroy (&temp_matrix);
-    matrix_destroy (&matrix);
-    pattern_destroy (&temp_pattern);
-    pattern_destroy (&prototype);
-    return ok;
-}
-
-int
-main (void)
-{
-    xorwow_state_t s = {1185956906, 12385940, 983948, 349208051, 901842};
-
-    if (!generate (64, &s, "blue-noise-64x64.h", "blue-noise-64x64.ppm"))
-	return -1;
-
-    return 0;
-}
diff --git a/vendor/pixman/pixman/loongson-mmintrin.h b/vendor/pixman/pixman/loongson-mmintrin.h
deleted file mode 100644
index 0e79e8648..000000000
--- a/vendor/pixman/pixman/loongson-mmintrin.h
+++ /dev/null
@@ -1,412 +0,0 @@
-/* The gcc-provided loongson intrinsic functions are way too fucking broken
- * to be of any use, otherwise I'd use them.
- *
- * - The hardware instructions are very similar to MMX or iwMMXt. Certainly
- *   close enough that they could have implemented the _mm_*-style intrinsic
- *   interface and had a ton of optimized code available to them. Instead they
- *   implemented something much, much worse.
- *
- * - pshuf takes a dead first argument, causing extra instructions to be
- *   generated.
- *
- * - There are no 64-bit shift or logical intrinsics, which means you have
- *   to implement them with inline assembly, but this is a nightmare because
- *   gcc doesn't understand that the integer vector datatypes are actually in
- *   floating-point registers, so you end up with braindead code like
- *
- *	punpcklwd	$f9,$f9,$f5
- *	    dmtc1	v0,$f8
- *	punpcklwd	$f19,$f19,$f5
- *	    dmfc1	t9,$f9
- *	    dmtc1	v0,$f9
- *	    dmtc1	t9,$f20
- *	    dmfc1	s0,$f19
- *	punpcklbh	$f20,$f20,$f2
- *
- *   where crap just gets copied back and forth between integer and floating-
- *   point registers ad nauseum.
- *
- * Instead of trying to workaround the problems from these crap intrinsics, I
- * just implement the _mm_* intrinsics needed for pixman-mmx.c using inline
- * assembly.
- */
-
-#include <stdint.h>
-
-/* vectors are stored in 64-bit floating-point registers */
-typedef double __m64;
-/* having a 32-bit datatype allows us to use 32-bit loads in places like load8888 */
-typedef float  __m32;
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setzero_si64 (void)
-{
-	return 0.0;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_pi16 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("paddh %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_pi32 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("paddw %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_adds_pu16 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("paddush %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_adds_pu8 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("paddusb %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_and_si64 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("and %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("pcmpeqw %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_empty (void)
-{
-
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_madd_pi16 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("pmaddhw %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mulhi_pu16 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("pmulhuh %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("pmullh %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_or_si64 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("or %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_packs_pu16 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("packushb %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_packs_pi32 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("packsswh %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
- (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_pi16 (uint16_t __w3, uint16_t __w2, uint16_t __w1, uint16_t __w0)
-{
-	if (__builtin_constant_p (__w3) &&
-	    __builtin_constant_p (__w2) &&
-	    __builtin_constant_p (__w1) &&
-	    __builtin_constant_p (__w0))
-	{
-		uint64_t val = ((uint64_t)__w3 << 48)
-			     | ((uint64_t)__w2 << 32)
-			     | ((uint64_t)__w1 << 16)
-			     | ((uint64_t)__w0 <<  0);
-		return *(__m64 *)&val;
-	}
-	else if (__w3 == __w2 && __w2 == __w1 && __w1 == __w0)
-	{
-		/* TODO: handle other cases */
-		uint64_t val = __w3;
-		uint64_t imm = _MM_SHUFFLE (0, 0, 0, 0);
-		__m64 ret;
-		asm("pshufh %0, %1, %2\n\t"
-		    : "=f" (ret)
-		    : "f" (*(__m64 *)&val), "f" (*(__m64 *)&imm)
-		);
-		return ret;
-	} else {
-		uint64_t val = ((uint64_t)__w3 << 48)
-			     | ((uint64_t)__w2 << 32)
-			     | ((uint64_t)__w1 << 16)
-			     | ((uint64_t)__w0 <<  0);
-		return *(__m64 *)&val;
-	}
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_pi32 (unsigned __i1, unsigned __i0)
-{
-	if (__builtin_constant_p (__i1) &&
-	    __builtin_constant_p (__i0))
-	{
-		uint64_t val = ((uint64_t)__i1 << 32)
-			     | ((uint64_t)__i0 <<  0);
-		return *(__m64 *)&val;
-	}
-	else if (__i1 == __i0)
-	{
-		uint64_t imm = _MM_SHUFFLE (1, 0, 1, 0);
-		__m64 ret;
-		asm("pshufh %0, %1, %2\n\t"
-		    : "=f" (ret)
-		    : "f" (*(__m32 *)&__i1), "f" (*(__m64 *)&imm)
-		);
-		return ret;
-	} else {
-		uint64_t val = ((uint64_t)__i1 << 32)
-			     | ((uint64_t)__i0 <<  0);
-		return *(__m64 *)&val;
-	}
-}
-#undef _MM_SHUFFLE
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shuffle_pi16 (__m64 __m, int64_t __n)
-{
-	__m64 ret;
-	asm("pshufh %0, %1, %2\n\t"
-	    : "=f" (ret)
-	    : "f" (__m), "f" (*(__m64 *)&__n)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_slli_pi16 (__m64 __m, int64_t __count)
-{
-	__m64 ret;
-	asm("psllh  %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m), "f" (*(__m64 *)&__count)
-	);
-	return ret;
-}
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_slli_si64 (__m64 __m, int64_t __count)
-{
-	__m64 ret;
-	asm("dsll  %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m), "f" (*(__m64 *)&__count)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srli_pi16 (__m64 __m, int64_t __count)
-{
-	__m64 ret;
-	asm("psrlh %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m), "f" (*(__m64 *)&__count)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srli_pi32 (__m64 __m, int64_t __count)
-{
-	__m64 ret;
-	asm("psrlw %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m), "f" (*(__m64 *)&__count)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srli_si64 (__m64 __m, int64_t __count)
-{
-	__m64 ret;
-	asm("dsrl  %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m), "f" (*(__m64 *)&__count)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_pi16 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("psubh %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("punpckhbh %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("punpckhhw %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("punpcklbh %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-/* Since punpcklbh doesn't care about the high 32-bits, we use the __m32 datatype which
- * allows load8888 to use 32-bit loads */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpacklo_pi8_f (__m32 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("punpcklbh %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("punpcklhw %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_xor_si64 (__m64 __m1, __m64 __m2)
-{
-	__m64 ret;
-	asm("xor %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-loongson_extract_pi16 (__m64 __m, int64_t __pos)
-{
-	__m64 ret;
-	asm("pextrh %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m), "f" (*(__m64 *)&__pos)
-	);
-	return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-loongson_insert_pi16 (__m64 __m1, __m64 __m2, int64_t __pos)
-{
-	__m64 ret;
-	asm("pinsrh_%3 %0, %1, %2\n\t"
-	   : "=f" (ret)
-	   : "f" (__m1), "f" (__m2), "i" (__pos)
-	);
-	return ret;
-}
diff --git a/vendor/pixman/pixman/make-srgb.pl b/vendor/pixman/pixman/make-srgb.pl
deleted file mode 100644
index 8bba160cc..000000000
--- a/vendor/pixman/pixman/make-srgb.pl
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-
-sub linear_to_srgb
-{
-    my ($c) = @_;
-
-    if ($c < 0.0031308)
-    {
-	return $c * 12.92;
-    }
-    else
-    {
-	return 1.055 * $c ** (1.0/2.4) - 0.055;
-    }
-}
-
-sub srgb_to_linear
-{
-    my ($c) = @_;
-
-    if ($c < 0.04045)
-    {
-	return $c / 12.92;
-    }
-    else
-    {
-	return (($c + 0.055) / 1.055) ** 2.4
-    }
-}
-
-my @linear_to_srgb;
-for my $linear (0 .. 4095)
-{
-    my $srgb = int(linear_to_srgb($linear / 4095.0) * 255.0 + 0.5);
-    push @linear_to_srgb, $srgb;
-}
-
-my @srgb_to_linear;
-for my $srgb (0 .. 255)
-{
-    my $linear = int(srgb_to_linear($srgb / 255.0) * 65535.0 + 0.5);
-    push @srgb_to_linear, $linear;
-}
-
-# Ensure that we have a lossless sRGB and back conversion loop.
-# some of the darkest shades need a little bias -- maximum is just
-# 5 increments out of 16. This gives us useful property with
-# least amount of error in the sRGB-to-linear table, and keeps the actual
-# table lookup in the other direction as simple as possible.
-for my $srgb (0 .. $#srgb_to_linear)
-{
-    my $add = 0;
-    while (1)
-    {
-	my $linear = $srgb_to_linear[$srgb];
-	my $srgb_lossy = $linear_to_srgb[$linear >> 4];
-	last if $srgb == $srgb_lossy;
-
-	# Add slight bias to this component until it rounds correctly
-	$srgb_to_linear[$srgb] ++;
-	$add ++;
-    }
-    die "Too many adds at $srgb" if $add > 5;
-}
-
-print <<"PROLOG";
-/* WARNING: This file is generated by $0.
- * Please edit that file instead of this one.
- */
-
-#include <stdint.h>
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include "pixman-private.h"
-
-PROLOG
-
-print "const uint8_t linear_to_srgb[" . @linear_to_srgb . "] =\n";
-print "{\n";
-for my $linear (0 .. $#linear_to_srgb)
-{
-    if (($linear % 10) == 0)
-    {
-	print "\t";
-    }
-    print sprintf("%d, ", $linear_to_srgb[$linear]);
-    if (($linear % 10) == 9)
-    {
-	print "\n";
-    }
-}
-print "\n};\n";
-print "\n";
-
-print "const uint16_t srgb_to_linear[" . @srgb_to_linear . "] =\n";
-print "{\n";
-for my $srgb (0 .. $#srgb_to_linear)
-{
-    if (($srgb % 10) == 0)
-    {
-	print "\t";
-    }
-    print sprintf("%d, ", $srgb_to_linear[$srgb]);
-    if (($srgb % 10) == 9)
-    {
-	print "\n";
-    }
-}
-print "\n};\n";
-
diff --git a/vendor/pixman/pixman/meson.build b/vendor/pixman/pixman/meson.build
deleted file mode 100644
index 62ec66bec..000000000
--- a/vendor/pixman/pixman/meson.build
+++ /dev/null
@@ -1,143 +0,0 @@
-# Copyright © 2018 Intel Corporation
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-config_h = configure_file(
-  configuration : config,
-  output : 'pixman-config.h'
-)
-
-version_h = configure_file(
-  configuration : version_conf,
-  input : 'pixman-version.h.in',
-  output : 'pixman-version.h',
-  install_dir : join_paths(get_option('prefix'), get_option('includedir'), 'pixman-1')
-)
-
-libpixman_extra_cargs = []
-default_library = get_option('default_library')
-if default_library != 'static' and cc.has_function_attribute('dllexport')
-  libpixman_extra_cargs = ['-DPIXMAN_API=__declspec(dllexport)']
-endif
-
-pixman_simd_libs = []
-simds = [
-  # the mmx library can be compiled with mmx on x86/x86_64, iwmmxt on
-  # some arm cores, or loongson mmi on loongson mips systems. The
-  # libraries will all have the same name, "pixman-mmx", but there is
-  # no chance of more than one version being built in the same build
-  # because no system could have mmx, iwmmxt, and mmi, and it
-  # simplifies the build logic to give them the same name.
-  ['mmx', have_mmx, mmx_flags, []],
-  ['mmx', have_loongson_mmi, loongson_mmi_flags, []],
-  ['mmx', have_iwmmxt, iwmmxt_flags, []],
-
-  ['sse2', have_sse2, sse2_flags, []],
-  ['ssse3', have_ssse3, ssse3_flags, []],
-  ['vmx', have_vmx, vmx_flags, []],
-  ['arm-simd', have_armv6_simd, [],
-   ['pixman-arm-simd-asm.S', 'pixman-arm-simd-asm-scaled.S']],
-  ['arm-neon', have_neon, [],
-   ['pixman-arm-neon-asm.S', 'pixman-arm-neon-asm-bilinear.S']],
-  ['arm-neon', have_a64neon, [],
-   ['pixman-arma64-neon-asm.S', 'pixman-arma64-neon-asm-bilinear.S']],
-  ['mips-dspr2', have_mips_dspr2, mips_dspr2_flags,
-   ['pixman-mips-dspr2-asm.S', 'pixman-mips-memcpy-asm.S']],
-]
-
-foreach simd : simds
-  if simd[1]
-    name = 'pixman-' + simd[0]
-    pixman_simd_libs += static_library(
-      name,
-      [name + '.c', config_h, version_h, simd[3]],
-      c_args : simd[2]
-    )
-  endif
-endforeach
-
-pixman_files = files(
-  'pixman.c',
-  'pixman-access.c',
-  'pixman-access-accessors.c',
-  'pixman-bits-image.c',
-  'pixman-combine32.c',
-  'pixman-combine-float.c',
-  'pixman-conical-gradient.c',
-  'pixman-filter.c',
-  'pixman-x86.c',
-  'pixman-mips.c',
-  'pixman-arm.c',
-  'pixman-ppc.c',
-  'pixman-edge.c',
-  'pixman-edge-accessors.c',
-  'pixman-fast-path.c',
-  'pixman-glyph.c',
-  'pixman-general.c',
-  'pixman-gradient-walker.c',
-  'pixman-image.c',
-  'pixman-implementation.c',
-  'pixman-linear-gradient.c',
-  'pixman-matrix.c',
-  'pixman-noop.c',
-  'pixman-radial-gradient.c',
-  'pixman-region16.c',
-  'pixman-region32.c',
-  'pixman-solid-fill.c',
-  'pixman-timer.c',
-  'pixman-trap.c',
-  'pixman-utils.c',
-)
-
-# Android cpu-features
-cpu_features_path = get_option('cpu-features-path')
-cpu_features_sources = []
-cpu_features_inc = []
-if cpu_features_path != ''
-  message('Using cpu-features.[ch] from ' + cpu_features_path)
-  cpu_features_sources = files(
-    cpu_features_path / 'cpu-features.h',
-    cpu_features_path / 'cpu-features.c',
-  )
-  cpu_features_inc = include_directories(cpu_features_path)
-endif
-
-libpixman = library(
-  'pixman-1',
-  [pixman_files, config_h, version_h, cpu_features_sources],
-  link_with: pixman_simd_libs,
-  c_args : libpixman_extra_cargs,
-  dependencies : [dep_m, dep_threads],
-  include_directories : cpu_features_inc,
-  version : meson.project_version(),
-  install : true,
-)
-
-inc_pixman = include_directories('.')
-
-idep_pixman = declare_dependency(
-  link_with: libpixman,
-  include_directories : inc_pixman,
-)
-
-if meson.version().version_compare('>= 0.54.0')
-  meson.override_dependency('pixman-1', idep_pixman)
-endif
-
-install_headers('pixman.h', subdir : 'pixman-1')
diff --git a/vendor/pixman/pixman/pixman-access-accessors.c b/vendor/pixman/pixman/pixman-access-accessors.c
deleted file mode 100644
index 3263582f1..000000000
--- a/vendor/pixman/pixman/pixman-access-accessors.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#define PIXMAN_FB_ACCESSORS
-
-#include "pixman-access.c"
diff --git a/vendor/pixman/pixman/pixman-access.c b/vendor/pixman/pixman/pixman-access.c
deleted file mode 100644
index 892e70b73..000000000
--- a/vendor/pixman/pixman/pixman-access.c
+++ /dev/null
@@ -1,1715 +0,0 @@
-/*
- *
- * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
- *             2005 Lars Knoll & Zack Rusin, Trolltech
- *             2008 Aaron Plattner, NVIDIA Corporation
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Keith Packard makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include <math.h>
-
-#include "pixman-accessor.h"
-#include "pixman-private.h"
-
-#define CONVERT_RGB24_TO_Y15(s)						\
-    (((((s) >> 16) & 0xff) * 153 +					\
-      (((s) >>  8) & 0xff) * 301 +					\
-      (((s)      ) & 0xff) * 58) >> 2)
-
-#define CONVERT_RGB24_TO_RGB15(s)                                       \
-    ((((s) >> 3) & 0x001f) |                                            \
-     (((s) >> 6) & 0x03e0) |                                            \
-     (((s) >> 9) & 0x7c00))
-
-/* Fetch macros */
-
-#ifdef WORDS_BIGENDIAN
-#define FETCH_1(img,l,o)						\
-    (((READ ((img), ((uint32_t *)(l)) + ((o) >> 5))) >> (0x1f - ((o) & 0x1f))) & 0x1)
-#else
-#define FETCH_1(img,l,o)						\
-    ((((READ ((img), ((uint32_t *)(l)) + ((o) >> 5))) >> ((o) & 0x1f))) & 0x1)
-#endif
-
-#define FETCH_8(img,l,o)    (READ (img, (((uint8_t *)(l)) + ((o) >> 3))))
-
-#ifdef WORDS_BIGENDIAN
-#define FETCH_4(img,l,o)						\
-    (((4 * (o)) & 4) ? (FETCH_8 (img,l, 4 * (o)) & 0xf) : (FETCH_8 (img,l,(4 * (o))) >> 4))
-#else
-#define FETCH_4(img,l,o)						\
-    (((4 * (o)) & 4) ? (FETCH_8 (img, l, 4 * (o)) >> 4) : (FETCH_8 (img, l, (4 * (o))) & 0xf))
-#endif
-
-#ifdef WORDS_BIGENDIAN
-#define FETCH_24(img,l,o)                                              \
-    ((uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 16)    |       \
-     (uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8)     |       \
-     (uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 0))
-#else
-#define FETCH_24(img,l,o)						\
-    ((uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 0)	|	\
-     (uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8)	|	\
-     (uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 16))
-#endif
-
-/* Store macros */
-
-#ifdef WORDS_BIGENDIAN
-#define STORE_1(img,l,o,v)						\
-    do									\
-    {									\
-	uint32_t  *__d = ((uint32_t *)(l)) + ((o) >> 5);		\
-	uint32_t __m, __v;						\
-									\
-	__m = 1U << (0x1f - ((o) & 0x1f));				\
-	__v = (v)? __m : 0;						\
-									\
-	WRITE((img), __d, (READ((img), __d) & ~__m) | __v);		\
-    }									\
-    while (0)
-#else
-#define STORE_1(img,l,o,v)						\
-    do									\
-    {									\
-	uint32_t  *__d = ((uint32_t *)(l)) + ((o) >> 5);		\
-	uint32_t __m, __v;						\
-									\
-	__m = 1U << ((o) & 0x1f);					\
-	__v = (v)? __m : 0;						\
-									\
-	WRITE((img), __d, (READ((img), __d) & ~__m) | __v);		\
-    }									\
-    while (0)
-#endif
-
-#define STORE_8(img,l,o,v)  (WRITE (img, (uint8_t *)(l) + ((o) >> 3), (v)))
-
-#ifdef WORDS_BIGENDIAN
-#define STORE_4(img,l,o,v)						\
-    do									\
-    {									\
-	int bo = 4 * (o);						\
-	int v4 = (v) & 0x0f;						\
-									\
-	STORE_8 (img, l, bo, (						\
-		     bo & 4 ?						\
-		     (FETCH_8 (img, l, bo) & 0xf0) | (v4) :		\
-		     (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4)));	\
-    } while (0)
-#else
-#define STORE_4(img,l,o,v)						\
-    do									\
-    {									\
-	int bo = 4 * (o);						\
-	int v4 = (v) & 0x0f;						\
-									\
-	STORE_8 (img, l, bo, (						\
-		     bo & 4 ?						\
-		     (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4) :	\
-		     (FETCH_8 (img, l, bo) & 0xf0) | (v4)));		\
-    } while (0)
-#endif
-
-#ifdef WORDS_BIGENDIAN
-#define STORE_24(img,l,o,v)                                            \
-    do                                                                 \
-    {                                                                  \
-	uint8_t *__tmp = (l) + 3 * (o);				       \
-        							       \
-	WRITE ((img), __tmp++, ((v) & 0x00ff0000) >> 16);	       \
-	WRITE ((img), __tmp++, ((v) & 0x0000ff00) >>  8);	       \
-	WRITE ((img), __tmp++, ((v) & 0x000000ff) >>  0);	       \
-    }                                                                  \
-    while (0)
-#else
-#define STORE_24(img,l,o,v)                                            \
-    do                                                                 \
-    {                                                                  \
-	uint8_t *__tmp = (l) + 3 * (o);				       \
-        							       \
-	WRITE ((img), __tmp++, ((v) & 0x000000ff) >>  0);	       \
-	WRITE ((img), __tmp++, ((v) & 0x0000ff00) >>  8);	       \
-	WRITE ((img), __tmp++, ((v) & 0x00ff0000) >> 16);	       \
-    }								       \
-    while (0)
-#endif
-
-/*
- * YV12 setup and access macros
- */
-
-#define YV12_SETUP(image)                                               \
-    bits_image_t *__bits_image = (bits_image_t *)image;                 \
-    uint32_t *bits = __bits_image->bits;                                \
-    int stride = __bits_image->rowstride;                               \
-    int offset0 = stride < 0 ?                                          \
-    ((-stride) >> 1) * ((__bits_image->height - 1) >> 1) - stride :	\
-    stride * __bits_image->height;					\
-    int offset1 = stride < 0 ?                                          \
-    offset0 + ((-stride) >> 1) * ((__bits_image->height) >> 1) :	\
-	offset0 + (offset0 >> 2)
-
-/* Note no trailing semicolon on the above macro; if it's there, then
- * the typical usage of YV12_SETUP(image); will have an extra trailing ;
- * that some compilers will interpret as a statement -- and then any further
- * variable declarations will cause an error.
- */
-
-#define YV12_Y(line)                                                    \
-    ((uint8_t *) ((bits) + (stride) * (line)))
-
-#define YV12_U(line)                                                    \
-    ((uint8_t *) ((bits) + offset1 +                                    \
-                  ((stride) >> 1) * ((line) >> 1)))
-
-#define YV12_V(line)                                                    \
-    ((uint8_t *) ((bits) + offset0 +                                    \
-                  ((stride) >> 1) * ((line) >> 1)))
-
-/* Misc. helpers */
-
-static force_inline void
-get_shifts (pixman_format_code_t  format,
-	    int			 *a,
-	    int			 *r,
-	    int                  *g,
-	    int                  *b)
-{
-    switch (PIXMAN_FORMAT_TYPE (format))
-    {
-    case PIXMAN_TYPE_A:
-	*b = 0;
-	*g = 0;
-	*r = 0;
-	*a = 0;
-	break;
-
-    case PIXMAN_TYPE_ARGB:
-    case PIXMAN_TYPE_ARGB_SRGB:
-	*b = 0;
-	*g = *b + PIXMAN_FORMAT_B (format);
-	*r = *g + PIXMAN_FORMAT_G (format);
-	*a = *r + PIXMAN_FORMAT_R (format);
-	break;
-
-    case PIXMAN_TYPE_ABGR:
-	*r = 0;
-	*g = *r + PIXMAN_FORMAT_R (format);
-	*b = *g + PIXMAN_FORMAT_G (format);
-	*a = *b + PIXMAN_FORMAT_B (format);
-	break;
-
-    case PIXMAN_TYPE_BGRA:
-	/* With BGRA formats we start counting at the high end of the pixel */
-	*b = PIXMAN_FORMAT_BPP (format) - PIXMAN_FORMAT_B (format);
-	*g = *b - PIXMAN_FORMAT_B (format);
-	*r = *g - PIXMAN_FORMAT_G (format);
-	*a = *r - PIXMAN_FORMAT_R (format);
-	break;
-
-    case PIXMAN_TYPE_RGBA:
-	/* With BGRA formats we start counting at the high end of the pixel */
-	*r = PIXMAN_FORMAT_BPP (format) - PIXMAN_FORMAT_R (format);
-	*g = *r - PIXMAN_FORMAT_R (format);
-	*b = *g - PIXMAN_FORMAT_G (format);
-	*a = *b - PIXMAN_FORMAT_B (format);
-	break;
-
-    default:
-	assert (0);
-	break;
-    }
-}
-
-static force_inline uint32_t
-convert_channel (uint32_t pixel, uint32_t def_value,
-		 int n_from_bits, int from_shift,
-		 int n_to_bits, int to_shift)
-{
-    uint32_t v;
-
-    if (n_from_bits && n_to_bits)
-	v  = unorm_to_unorm (pixel >> from_shift, n_from_bits, n_to_bits);
-    else if (n_to_bits)
-	v = def_value;
-    else
-	v = 0;
-
-    return (v & ((1 << n_to_bits) - 1)) << to_shift;
-}
-
-static force_inline uint32_t
-convert_pixel (pixman_format_code_t from, pixman_format_code_t to, uint32_t pixel)
-{
-    int a_from_shift, r_from_shift, g_from_shift, b_from_shift;
-    int a_to_shift, r_to_shift, g_to_shift, b_to_shift;
-    uint32_t a, r, g, b;
-
-    get_shifts (from, &a_from_shift, &r_from_shift, &g_from_shift, &b_from_shift);
-    get_shifts (to, &a_to_shift, &r_to_shift, &g_to_shift, &b_to_shift);
-
-    a = convert_channel (pixel, ~0,
-			 PIXMAN_FORMAT_A (from), a_from_shift,
-			 PIXMAN_FORMAT_A (to), a_to_shift);
-
-    r = convert_channel (pixel, 0,
-			 PIXMAN_FORMAT_R (from), r_from_shift,
-			 PIXMAN_FORMAT_R (to), r_to_shift);
-
-    g = convert_channel (pixel, 0,
-			 PIXMAN_FORMAT_G (from), g_from_shift,
-			 PIXMAN_FORMAT_G (to), g_to_shift);
-
-    b = convert_channel (pixel, 0,
-			 PIXMAN_FORMAT_B (from), b_from_shift,
-			 PIXMAN_FORMAT_B (to), b_to_shift);
-
-    return a | r | g | b;
-}
-
-static force_inline uint32_t
-convert_pixel_to_a8r8g8b8 (bits_image_t *image,
-			   pixman_format_code_t format,
-			   uint32_t pixel)
-{
-    if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY		||
-	PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR)
-    {
-	return image->indexed->rgba[pixel];
-    }
-    else
-    {
-	return convert_pixel (format, PIXMAN_a8r8g8b8, pixel);
-    }
-}
-
-static force_inline uint32_t
-convert_pixel_from_a8r8g8b8 (pixman_image_t *image,
-			     pixman_format_code_t format, uint32_t pixel)
-{
-    if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY)
-    {
-	pixel = CONVERT_RGB24_TO_Y15 (pixel);
-
-	return image->bits.indexed->ent[pixel & 0x7fff];
-    }
-    else if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR)
-    {
-	pixel = convert_pixel (PIXMAN_a8r8g8b8, PIXMAN_x1r5g5b5, pixel);
-
-	return image->bits.indexed->ent[pixel & 0x7fff];
-    }
-    else
-    {
-	return convert_pixel (PIXMAN_a8r8g8b8, format, pixel);
-    }
-}
-
-static force_inline uint32_t
-fetch_and_convert_pixel (bits_image_t *		image,
-			 const uint8_t *	bits,
-			 int			offset,
-			 pixman_format_code_t	format)
-{
-    uint32_t pixel;
-
-    switch (PIXMAN_FORMAT_BPP (format))
-    {
-    case 1:
-	pixel = FETCH_1 (image, bits, offset);
-	break;
-
-    case 4:
-	pixel = FETCH_4 (image, bits, offset);
-	break;
-
-    case 8:
-	pixel = READ (image, bits + offset);
-	break;
-
-    case 16:
-	pixel = READ (image, ((uint16_t *)bits + offset));
-	break;
-
-    case 24:
-	pixel = FETCH_24 (image, bits, offset);
-	break;
-
-    case 32:
-	pixel = READ (image, ((uint32_t *)bits + offset));
-	break;
-
-    default:
-	pixel = 0xffff00ff; /* As ugly as possible to detect the bug */
-	break;
-    }
-
-    return convert_pixel_to_a8r8g8b8 (image, format, pixel);
-}
-
-static force_inline void
-convert_and_store_pixel (bits_image_t *		image,
-			 uint8_t *		dest,
-			 int                    offset,
-			 pixman_format_code_t	format,
-			 uint32_t		pixel)
-{
-    uint32_t converted = convert_pixel_from_a8r8g8b8 (
-	(pixman_image_t *)image, format, pixel);
-
-    switch (PIXMAN_FORMAT_BPP (format))
-    {
-    case 1:
-	STORE_1 (image, dest, offset, converted & 0x01);
-	break;
-
-    case 4:
-	STORE_4 (image, dest, offset, converted & 0xf);
-	break;
-
-    case 8:
-	WRITE (image, (dest + offset), converted & 0xff);
-	break;
-
-    case 16:
-	WRITE (image, ((uint16_t *)dest + offset), converted & 0xffff);
-	break;
-
-    case 24:
-	STORE_24 (image, dest, offset, converted);
-	break;
-
-    case 32:
-	WRITE (image, ((uint32_t *)dest + offset), converted);
-	break;
-
-    default:
-	*dest = 0x0;
-	break;
-    }
-}
-
-#define MAKE_ACCESSORS(format)						\
-    static void								\
-    fetch_scanline_ ## format (bits_image_t *image,			\
-			       int	       x,			\
-			       int             y,			\
-			       int             width,			\
-			       uint32_t *      buffer,			\
-			       const uint32_t *mask)			\
-    {									\
-	uint8_t *bits =							\
-	    (uint8_t *)(image->bits + y * image->rowstride);		\
-	int i;								\
-									\
-	for (i = 0; i < width; ++i)					\
-	{								\
-	    *buffer++ =							\
-		fetch_and_convert_pixel (image, bits, x + i, PIXMAN_ ## format); \
-	}								\
-    }									\
-									\
-    static void								\
-    store_scanline_ ## format (bits_image_t *  image,			\
-			       int             x,			\
-			       int             y,			\
-			       int             width,			\
-			       const uint32_t *values)			\
-    {									\
-	uint8_t *dest =							\
-	    (uint8_t *)(image->bits + y * image->rowstride);		\
-	int i;								\
-									\
-	for (i = 0; i < width; ++i)					\
-	{								\
-	    convert_and_store_pixel (					\
-		image, dest, i + x, PIXMAN_ ## format, values[i]);	\
-	}								\
-    }									\
-									\
-    static uint32_t							\
-    fetch_pixel_ ## format (bits_image_t *image,			\
-			    int		offset,				\
-			    int		line)				\
-    {									\
-	uint8_t *bits =							\
-	    (uint8_t *)(image->bits + line * image->rowstride);		\
-									\
-	return fetch_and_convert_pixel (				\
-	    image, bits, offset, PIXMAN_ ## format);			\
-    }									\
-									\
-    static const void *const __dummy__ ## format
-
-MAKE_ACCESSORS(a8r8g8b8);
-MAKE_ACCESSORS(x8r8g8b8);
-MAKE_ACCESSORS(a8b8g8r8);
-MAKE_ACCESSORS(x8b8g8r8);
-MAKE_ACCESSORS(x14r6g6b6);
-MAKE_ACCESSORS(b8g8r8a8);
-MAKE_ACCESSORS(b8g8r8x8);
-MAKE_ACCESSORS(r8g8b8x8);
-MAKE_ACCESSORS(r8g8b8a8);
-MAKE_ACCESSORS(r8g8b8);
-MAKE_ACCESSORS(b8g8r8);
-MAKE_ACCESSORS(r5g6b5);
-MAKE_ACCESSORS(b5g6r5);
-MAKE_ACCESSORS(a1r5g5b5);
-MAKE_ACCESSORS(x1r5g5b5);
-MAKE_ACCESSORS(a1b5g5r5);
-MAKE_ACCESSORS(x1b5g5r5);
-MAKE_ACCESSORS(a4r4g4b4);
-MAKE_ACCESSORS(x4r4g4b4);
-MAKE_ACCESSORS(a4b4g4r4);
-MAKE_ACCESSORS(x4b4g4r4);
-MAKE_ACCESSORS(a8);
-MAKE_ACCESSORS(c8);
-MAKE_ACCESSORS(g8);
-MAKE_ACCESSORS(r3g3b2);
-MAKE_ACCESSORS(b2g3r3);
-MAKE_ACCESSORS(a2r2g2b2);
-MAKE_ACCESSORS(a2b2g2r2);
-MAKE_ACCESSORS(x4a4);
-MAKE_ACCESSORS(a4);
-MAKE_ACCESSORS(g4);
-MAKE_ACCESSORS(c4);
-MAKE_ACCESSORS(r1g2b1);
-MAKE_ACCESSORS(b1g2r1);
-MAKE_ACCESSORS(a1r1g1b1);
-MAKE_ACCESSORS(a1b1g1r1);
-MAKE_ACCESSORS(a1);
-MAKE_ACCESSORS(g1);
-
-/********************************** Fetch ************************************/
-/* Table mapping sRGB-encoded 8 bit numbers to linearly encoded
- * floating point numbers. We assume that single precision
- * floating point follows the IEEE 754 format.
- */
-static const uint32_t to_linear_u[256] =
-{
-    0x00000000, 0x399f22b4, 0x3a1f22b4, 0x3a6eb40e, 0x3a9f22b4, 0x3ac6eb61,
-    0x3aeeb40e, 0x3b0b3e5d, 0x3b1f22b4, 0x3b33070b, 0x3b46eb61, 0x3b5b518a,
-    0x3b70f18a, 0x3b83e1c5, 0x3b8fe614, 0x3b9c87fb, 0x3ba9c9b5, 0x3bb7ad6d,
-    0x3bc63547, 0x3bd5635f, 0x3be539bd, 0x3bf5ba70, 0x3c0373b5, 0x3c0c6152,
-    0x3c15a703, 0x3c1f45bc, 0x3c293e68, 0x3c3391f4, 0x3c3e4149, 0x3c494d43,
-    0x3c54b6c7, 0x3c607eb1, 0x3c6ca5df, 0x3c792d22, 0x3c830aa8, 0x3c89af9e,
-    0x3c9085db, 0x3c978dc5, 0x3c9ec7c0, 0x3ca63432, 0x3cadd37d, 0x3cb5a601,
-    0x3cbdac20, 0x3cc5e639, 0x3cce54ab, 0x3cd6f7d2, 0x3cdfd00e, 0x3ce8ddb9,
-    0x3cf2212c, 0x3cfb9ac1, 0x3d02a569, 0x3d0798dc, 0x3d0ca7e4, 0x3d11d2ae,
-    0x3d171963, 0x3d1c7c2e, 0x3d21fb3a, 0x3d2796af, 0x3d2d4ebb, 0x3d332380,
-    0x3d39152b, 0x3d3f23e3, 0x3d454fd0, 0x3d4b991c, 0x3d51ffeb, 0x3d588466,
-    0x3d5f26b7, 0x3d65e6fe, 0x3d6cc564, 0x3d73c210, 0x3d7add25, 0x3d810b65,
-    0x3d84b793, 0x3d88732e, 0x3d8c3e48, 0x3d9018f4, 0x3d940343, 0x3d97fd48,
-    0x3d9c0714, 0x3da020b9, 0x3da44a48, 0x3da883d6, 0x3daccd70, 0x3db12728,
-    0x3db59110, 0x3dba0b38, 0x3dbe95b2, 0x3dc3308f, 0x3dc7dbe0, 0x3dcc97b4,
-    0x3dd1641c, 0x3dd6412a, 0x3ddb2eec, 0x3de02d75, 0x3de53cd3, 0x3dea5d16,
-    0x3def8e52, 0x3df4d091, 0x3dfa23e5, 0x3dff885e, 0x3e027f06, 0x3e05427f,
-    0x3e080ea2, 0x3e0ae376, 0x3e0dc104, 0x3e10a752, 0x3e139669, 0x3e168e50,
-    0x3e198f0e, 0x3e1c98ab, 0x3e1fab2e, 0x3e22c6a0, 0x3e25eb08, 0x3e29186a,
-    0x3e2c4ed0, 0x3e2f8e42, 0x3e32d6c4, 0x3e362861, 0x3e39831e, 0x3e3ce702,
-    0x3e405416, 0x3e43ca5e, 0x3e4749e4, 0x3e4ad2ae, 0x3e4e64c2, 0x3e520027,
-    0x3e55a4e6, 0x3e595303, 0x3e5d0a8a, 0x3e60cb7c, 0x3e6495e0, 0x3e6869bf,
-    0x3e6c4720, 0x3e702e08, 0x3e741e7f, 0x3e78188c, 0x3e7c1c34, 0x3e8014c0,
-    0x3e822039, 0x3e84308b, 0x3e8645b8, 0x3e885fc3, 0x3e8a7eb0, 0x3e8ca281,
-    0x3e8ecb3a, 0x3e90f8df, 0x3e932b72, 0x3e9562f6, 0x3e979f6f, 0x3e99e0e0,
-    0x3e9c274e, 0x3e9e72b8, 0x3ea0c322, 0x3ea31892, 0x3ea57308, 0x3ea7d28a,
-    0x3eaa3718, 0x3eaca0b7, 0x3eaf0f69, 0x3eb18332, 0x3eb3fc16, 0x3eb67a15,
-    0x3eb8fd34, 0x3ebb8576, 0x3ebe12de, 0x3ec0a56e, 0x3ec33d2a, 0x3ec5da14,
-    0x3ec87c30, 0x3ecb2380, 0x3ecdd008, 0x3ed081ca, 0x3ed338c9, 0x3ed5f508,
-    0x3ed8b68a, 0x3edb7d52, 0x3ede4962, 0x3ee11abe, 0x3ee3f168, 0x3ee6cd64,
-    0x3ee9aeb6, 0x3eec955d, 0x3eef815d, 0x3ef272ba, 0x3ef56976, 0x3ef86594,
-    0x3efb6717, 0x3efe6e02, 0x3f00bd2b, 0x3f02460c, 0x3f03d1a5, 0x3f055ff8,
-    0x3f06f105, 0x3f0884ce, 0x3f0a1b54, 0x3f0bb499, 0x3f0d509f, 0x3f0eef65,
-    0x3f1090ef, 0x3f12353c, 0x3f13dc50, 0x3f15862a, 0x3f1732cc, 0x3f18e237,
-    0x3f1a946d, 0x3f1c4970, 0x3f1e013f, 0x3f1fbbde, 0x3f21794c, 0x3f23398c,
-    0x3f24fca0, 0x3f26c286, 0x3f288b42, 0x3f2a56d3, 0x3f2c253d, 0x3f2df680,
-    0x3f2fca9d, 0x3f31a195, 0x3f337b6a, 0x3f35581e, 0x3f3737b1, 0x3f391a24,
-    0x3f3aff7a, 0x3f3ce7b2, 0x3f3ed2d0, 0x3f40c0d2, 0x3f42b1bc, 0x3f44a58e,
-    0x3f469c49, 0x3f4895ee, 0x3f4a9280, 0x3f4c91ff, 0x3f4e946c, 0x3f5099c8,
-    0x3f52a216, 0x3f54ad55, 0x3f56bb88, 0x3f58ccae, 0x3f5ae0cb, 0x3f5cf7de,
-    0x3f5f11ec, 0x3f612ef0, 0x3f634eef, 0x3f6571ea, 0x3f6797e1, 0x3f69c0d6,
-    0x3f6beccb, 0x3f6e1bc0, 0x3f704db6, 0x3f7282af, 0x3f74baac, 0x3f76f5ae,
-    0x3f7933b6, 0x3f7b74c6, 0x3f7db8de, 0x3f800000
-};
-
-static const float * const to_linear = (const float *)to_linear_u;
-
-static uint8_t
-to_srgb (float f)
-{
-    uint8_t low = 0;
-    uint8_t high = 255;
-
-    while (high - low > 1)
-    {
-	uint8_t mid = (low + high) / 2;
-
-	if (to_linear[mid] > f)
-	    high = mid;
-	else
-	    low = mid;
-    }
-
-    if (to_linear[high] - f < f - to_linear[low])
-	return high;
-    else
-	return low;
-}
-
-static void
-fetch_scanline_a8r8g8b8_sRGB_float (bits_image_t *  image,
-				    int             x,
-				    int             y,
-				    int             width,
-				    uint32_t *      b,
-				    const uint32_t *mask)
-{
-    const uint32_t *bits = image->bits + y * image->rowstride;
-    const uint32_t *pixel = bits + x;
-    const uint32_t *end = pixel + width;
-    argb_t *buffer = (argb_t *)b;
-
-    while (pixel < end)
-    {
-	uint32_t p = READ (image, pixel++);
-	argb_t *argb = buffer;
-
-	argb->a = pixman_unorm_to_float ((p >> 24) & 0xff, 8);
-
-	argb->r = to_linear [(p >> 16) & 0xff];
-	argb->g = to_linear [(p >>  8) & 0xff];
-	argb->b = to_linear [(p >>  0) & 0xff];
-
-	buffer++;
-    }
-}
-
-static void
-fetch_scanline_r8g8b8_sRGB_float (bits_image_t *  image,
-				  int             x,
-				  int             y,
-				  int             width,
-				  uint32_t *      b,
-				  const uint32_t *mask)
-{
-    const uint8_t *bits = (uint8_t *)(image->bits + y * image->rowstride);
-    argb_t *buffer = (argb_t *)b;
-    int i;
-    for (i = x; i < width; ++i)
-    {
-	uint32_t p = FETCH_24 (image, bits, i);
-	argb_t *argb = buffer;
-
-	argb->a = 1.0f;
-
-	argb->r = to_linear[(p >> 16) & 0xff];
-	argb->g = to_linear[(p >>  8) & 0xff];
-	argb->b = to_linear[(p >>  0) & 0xff];
-
-	buffer++;
-    }
-}
-
-/* Expects a float buffer */
-static void
-fetch_scanline_a2r10g10b10_float (bits_image_t *  image,
-				  int             x,
-				  int             y,
-				  int             width,
-				  uint32_t *      b,
-				  const uint32_t *mask)
-{
-    const uint32_t *bits = image->bits + y * image->rowstride;
-    const uint32_t *pixel = bits + x;
-    const uint32_t *end = pixel + width;
-    argb_t *buffer = (argb_t *)b;
-
-    while (pixel < end)
-    {
-	uint32_t p = READ (image, pixel++);
-	uint64_t a = p >> 30;
-	uint64_t r = (p >> 20) & 0x3ff;
-	uint64_t g = (p >> 10) & 0x3ff;
-	uint64_t b = p & 0x3ff;
-
-	buffer->a = pixman_unorm_to_float (a, 2);
-	buffer->r = pixman_unorm_to_float (r, 10);
-	buffer->g = pixman_unorm_to_float (g, 10);
-	buffer->b = pixman_unorm_to_float (b, 10);
-
-	buffer++;
-    }
-}
-
-/* Expects a float buffer */
-#ifndef PIXMAN_FB_ACCESSORS
-static void
-fetch_scanline_rgbf_float (bits_image_t   *image,
-			   int             x,
-			   int             y,
-			   int             width,
-			   uint32_t *      b,
-			   const uint32_t *mask)
-{
-    const float *bits = (float *)image->bits + y * image->rowstride;
-    const float *pixel = bits + x * 3;
-    argb_t *buffer = (argb_t *)b;
-
-    for (; width--; buffer++) {
-	buffer->r = *pixel++;
-	buffer->g = *pixel++;
-	buffer->b = *pixel++;
-	buffer->a = 1.f;
-    }
-}
-
-static void
-fetch_scanline_rgbaf_float (bits_image_t   *image,
-			    int             x,
-			    int             y,
-			    int             width,
-			    uint32_t *      b,
-			    const uint32_t *mask)
-{
-    const float *bits = (float *)image->bits + y * image->rowstride;
-    const float *pixel = bits + x * 4;
-    argb_t *buffer = (argb_t *)b;
-
-    for (; width--; buffer++) {
-	buffer->r = *pixel++;
-	buffer->g = *pixel++;
-	buffer->b = *pixel++;
-	buffer->a = *pixel++;
-    }
-}
-#endif
-
-static void
-fetch_scanline_x2r10g10b10_float (bits_image_t   *image,
-				  int             x,
-				  int             y,
-				  int             width,
-				  uint32_t *      b,
-				  const uint32_t *mask)
-{
-    const uint32_t *bits = image->bits + y * image->rowstride;
-    const uint32_t *pixel = (uint32_t *)bits + x;
-    const uint32_t *end = pixel + width;
-    argb_t *buffer = (argb_t *)b;
-
-    while (pixel < end)
-    {
-	uint32_t p = READ (image, pixel++);
-	uint64_t r = (p >> 20) & 0x3ff;
-	uint64_t g = (p >> 10) & 0x3ff;
-	uint64_t b = p & 0x3ff;
-
-	buffer->a = 1.0;
-	buffer->r = pixman_unorm_to_float (r, 10);
-	buffer->g = pixman_unorm_to_float (g, 10);
-	buffer->b = pixman_unorm_to_float (b, 10);
-
-	buffer++;
-    }
-}
-
-/* Expects a float buffer */
-static void
-fetch_scanline_a2b10g10r10_float (bits_image_t   *image,
-				  int             x,
-				  int             y,
-				  int             width,
-				  uint32_t *      b,
-				  const uint32_t *mask)
-{
-    const uint32_t *bits = image->bits + y * image->rowstride;
-    const uint32_t *pixel = bits + x;
-    const uint32_t *end = pixel + width;
-    argb_t *buffer = (argb_t *)b;
-
-    while (pixel < end)
-    {
-	uint32_t p = READ (image, pixel++);
-	uint64_t a = p >> 30;
-	uint64_t b = (p >> 20) & 0x3ff;
-	uint64_t g = (p >> 10) & 0x3ff;
-	uint64_t r = p & 0x3ff;
-
-	buffer->a = pixman_unorm_to_float (a, 2);
-	buffer->r = pixman_unorm_to_float (r, 10);
-	buffer->g = pixman_unorm_to_float (g, 10);
-	buffer->b = pixman_unorm_to_float (b, 10);
-
-	buffer++;
-    }
-}
-
-/* Expects a float buffer */
-static void
-fetch_scanline_x2b10g10r10_float (bits_image_t   *image,
-				  int             x,
-				  int             y,
-				  int             width,
-				  uint32_t *      b,
-				  const uint32_t *mask)
-{
-    const uint32_t *bits = image->bits + y * image->rowstride;
-    const uint32_t *pixel = (uint32_t *)bits + x;
-    const uint32_t *end = pixel + width;
-    argb_t *buffer = (argb_t *)b;
-
-    while (pixel < end)
-    {
-	uint32_t p = READ (image, pixel++);
-	uint64_t b = (p >> 20) & 0x3ff;
-	uint64_t g = (p >> 10) & 0x3ff;
-	uint64_t r = p & 0x3ff;
-
-	buffer->a = 1.0;
-	buffer->r = pixman_unorm_to_float (r, 10);
-	buffer->g = pixman_unorm_to_float (g, 10);
-	buffer->b = pixman_unorm_to_float (b, 10);
-
-	buffer++;
-    }
-}
-
-static void
-fetch_scanline_yuy2 (bits_image_t   *image,
-                     int             x,
-                     int             line,
-                     int             width,
-                     uint32_t *      buffer,
-                     const uint32_t *mask)
-{
-    const uint32_t *bits = image->bits + image->rowstride * line;
-    int i;
-    
-    for (i = 0; i < width; i++)
-    {
-	int16_t y, u, v;
-	int32_t r, g, b;
-	
-	y = ((uint8_t *) bits)[(x + i) << 1] - 16;
-	u = ((uint8_t *) bits)[(((x + i) << 1) & - 4) + 1] - 128;
-	v = ((uint8_t *) bits)[(((x + i) << 1) & - 4) + 3] - 128;
-	
-	/* R = 1.164(Y - 16) + 1.596(V - 128) */
-	r = 0x012b27 * y + 0x019a2e * v;
-	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
-	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
-	/* B = 1.164(Y - 16) + 2.018(U - 128) */
-	b = 0x012b27 * y + 0x0206a2 * u;
-	
-	*buffer++ = 0xff000000 |
-	    (r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
-	    (g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
-	    (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
-    }
-}
-
-static void
-fetch_scanline_yv12 (bits_image_t   *image,
-                     int             x,
-                     int             line,
-                     int             width,
-                     uint32_t *      buffer,
-                     const uint32_t *mask)
-{
-    YV12_SETUP (image);
-    uint8_t *y_line = YV12_Y (line);
-    uint8_t *u_line = YV12_U (line);
-    uint8_t *v_line = YV12_V (line);
-    int i;
-    
-    for (i = 0; i < width; i++)
-    {
-	int16_t y, u, v;
-	int32_t r, g, b;
-
-	y = y_line[x + i] - 16;
-	u = u_line[(x + i) >> 1] - 128;
-	v = v_line[(x + i) >> 1] - 128;
-
-	/* R = 1.164(Y - 16) + 1.596(V - 128) */
-	r = 0x012b27 * y + 0x019a2e * v;
-	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
-	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
-	/* B = 1.164(Y - 16) + 2.018(U - 128) */
-	b = 0x012b27 * y + 0x0206a2 * u;
-
-	*buffer++ = 0xff000000 |
-	    (r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
-	    (g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
-	    (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
-    }
-}
-
-/**************************** Pixel wise fetching *****************************/
-
-#ifndef PIXMAN_FB_ACCESSORS
-static argb_t
-fetch_pixel_rgbf_float (bits_image_t *image,
-			int	    offset,
-			int	    line)
-{
-    float *bits = (float *)image->bits + line * image->rowstride;
-    argb_t argb;
-
-    argb.r = bits[offset * 3];
-    argb.g = bits[offset * 3 + 1];
-    argb.b = bits[offset * 3 + 2];
-    argb.a = 1.f;
-
-    return argb;
-}
-
-static argb_t
-fetch_pixel_rgbaf_float (bits_image_t *image,
-			 int	    offset,
-			 int	    line)
-{
-    float *bits = (float *)image->bits + line * image->rowstride;
-    argb_t argb;
-
-    argb.r = bits[offset * 4];
-    argb.g = bits[offset * 4 + 1];
-    argb.b = bits[offset * 4 + 2];
-    argb.a = bits[offset * 4 + 3];
-
-    return argb;
-}
-#endif
-
-static argb_t
-fetch_pixel_x2r10g10b10_float (bits_image_t *image,
-			       int	   offset,
-			       int           line)
-{
-    uint32_t *bits = image->bits + line * image->rowstride;
-    uint32_t p = READ (image, bits + offset);
-    uint64_t r = (p >> 20) & 0x3ff;
-    uint64_t g = (p >> 10) & 0x3ff;
-    uint64_t b = p & 0x3ff;
-    argb_t argb;
-
-    argb.a = 1.0;
-    argb.r = pixman_unorm_to_float (r, 10);
-    argb.g = pixman_unorm_to_float (g, 10);
-    argb.b = pixman_unorm_to_float (b, 10);
-
-    return argb;
-}
-
-static argb_t
-fetch_pixel_a2r10g10b10_float (bits_image_t *image,
-			       int	     offset,
-			       int           line)
-{
-    uint32_t *bits = image->bits + line * image->rowstride;
-    uint32_t p = READ (image, bits + offset);
-    uint64_t a = p >> 30;
-    uint64_t r = (p >> 20) & 0x3ff;
-    uint64_t g = (p >> 10) & 0x3ff;
-    uint64_t b = p & 0x3ff;
-    argb_t argb;
-
-    argb.a = pixman_unorm_to_float (a, 2);
-    argb.r = pixman_unorm_to_float (r, 10);
-    argb.g = pixman_unorm_to_float (g, 10);
-    argb.b = pixman_unorm_to_float (b, 10);
-
-    return argb;
-}
-
-static argb_t
-fetch_pixel_a2b10g10r10_float (bits_image_t *image,
-			       int           offset,
-			       int           line)
-{
-    uint32_t *bits = image->bits + line * image->rowstride;
-    uint32_t p = READ (image, bits + offset);
-    uint64_t a = p >> 30;
-    uint64_t b = (p >> 20) & 0x3ff;
-    uint64_t g = (p >> 10) & 0x3ff;
-    uint64_t r = p & 0x3ff;
-    argb_t argb;
-
-    argb.a = pixman_unorm_to_float (a, 2);
-    argb.r = pixman_unorm_to_float (r, 10);
-    argb.g = pixman_unorm_to_float (g, 10);
-    argb.b = pixman_unorm_to_float (b, 10);
-
-    return argb;
-}
-
-static argb_t
-fetch_pixel_x2b10g10r10_float (bits_image_t *image,
-			       int           offset,
-			       int           line)
-{
-    uint32_t *bits = image->bits + line * image->rowstride;
-    uint32_t p = READ (image, bits + offset);
-    uint64_t b = (p >> 20) & 0x3ff;
-    uint64_t g = (p >> 10) & 0x3ff;
-    uint64_t r = p & 0x3ff;
-    argb_t argb;
-
-    argb.a = 1.0;
-    argb.r = pixman_unorm_to_float (r, 10);
-    argb.g = pixman_unorm_to_float (g, 10);
-    argb.b = pixman_unorm_to_float (b, 10);
-
-    return argb;
-}
-
-static argb_t
-fetch_pixel_a8r8g8b8_sRGB_float (bits_image_t *image,
-				 int	       offset,
-				 int           line)
-{
-    uint32_t *bits = image->bits + line * image->rowstride;
-    uint32_t p = READ (image, bits + offset);
-    argb_t argb;
-
-    argb.a = pixman_unorm_to_float ((p >> 24) & 0xff, 8);
-
-    argb.r = to_linear [(p >> 16) & 0xff];
-    argb.g = to_linear [(p >>  8) & 0xff];
-    argb.b = to_linear [(p >>  0) & 0xff];
-
-    return argb;
-}
-
-static argb_t
-fetch_pixel_r8g8b8_sRGB_float (bits_image_t *image,
-			       int	     offset,
-			       int           line)
-{
-    uint8_t *bits = (uint8_t *)(image->bits + line * image->rowstride);
-    uint32_t p = FETCH_24 (image, bits, offset);
-    argb_t argb;
-
-    argb.a = 1.0f;
-
-    argb.r = to_linear[(p >> 16) & 0xff];
-    argb.g = to_linear[(p >>  8) & 0xff];
-    argb.b = to_linear[(p >>  0) & 0xff];
-
-    return argb;
-}
-
-static uint32_t
-fetch_pixel_yuy2 (bits_image_t *image,
-		  int           offset,
-		  int           line)
-{
-    const uint32_t *bits = image->bits + image->rowstride * line;
-    
-    int16_t y, u, v;
-    int32_t r, g, b;
-    
-    y = ((uint8_t *) bits)[offset << 1] - 16;
-    u = ((uint8_t *) bits)[((offset << 1) & - 4) + 1] - 128;
-    v = ((uint8_t *) bits)[((offset << 1) & - 4) + 3] - 128;
-    
-    /* R = 1.164(Y - 16) + 1.596(V - 128) */
-    r = 0x012b27 * y + 0x019a2e * v;
-    
-    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
-    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
-    
-    /* B = 1.164(Y - 16) + 2.018(U - 128) */
-    b = 0x012b27 * y + 0x0206a2 * u;
-    
-    return 0xff000000 |
-	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
-	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
-	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
-}
-
-static uint32_t
-fetch_pixel_yv12 (bits_image_t *image,
-		  int           offset,
-		  int           line)
-{
-    YV12_SETUP (image);
-    int16_t y = YV12_Y (line)[offset] - 16;
-    int16_t u = YV12_U (line)[offset >> 1] - 128;
-    int16_t v = YV12_V (line)[offset >> 1] - 128;
-    int32_t r, g, b;
-    
-    /* R = 1.164(Y - 16) + 1.596(V - 128) */
-    r = 0x012b27 * y + 0x019a2e * v;
-    
-    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
-    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
-    
-    /* B = 1.164(Y - 16) + 2.018(U - 128) */
-    b = 0x012b27 * y + 0x0206a2 * u;
-    
-    return 0xff000000 |
-	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
-	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
-	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
-}
-
-/*********************************** Store ************************************/
-
-#ifndef PIXMAN_FB_ACCESSORS
-static void
-store_scanline_rgbaf_float (bits_image_t *  image,
-			    int             x,
-			    int             y,
-			    int             width,
-			    const uint32_t *v)
-{
-    float *bits = (float *)image->bits + image->rowstride * y + 4 * x;
-    const argb_t *values = (argb_t *)v;
-
-    for (; width; width--, values++)
-    {
-	*bits++ = values->r;
-	*bits++ = values->g;
-	*bits++ = values->b;
-	*bits++ = values->a;
-    }
-}
-
-static void
-store_scanline_rgbf_float (bits_image_t *  image,
-			   int             x,
-			   int             y,
-			   int             width,
-			   const uint32_t *v)
-{
-    float *bits = (float *)image->bits + image->rowstride * y + 3 * x;
-    const argb_t *values = (argb_t *)v;
-
-    for (; width; width--, values++)
-    {
-	*bits++ = values->r;
-	*bits++ = values->g;
-	*bits++ = values->b;
-    }
-}
-#endif
-
-static void
-store_scanline_a2r10g10b10_float (bits_image_t *  image,
-				  int             x,
-				  int             y,
-				  int             width,
-				  const uint32_t *v)
-{
-    uint32_t *bits = image->bits + image->rowstride * y;
-    uint32_t *pixel = bits + x;
-    argb_t *values = (argb_t *)v;
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t a, r, g, b;
-
-	a = pixman_float_to_unorm (values[i].a, 2);
-	r = pixman_float_to_unorm (values[i].r, 10);
-	g = pixman_float_to_unorm (values[i].g, 10);
-	b = pixman_float_to_unorm (values[i].b, 10);
-
-	WRITE (image, pixel++,
-	       (a << 30) | (r << 20) | (g << 10) | b);
-    }
-}
-
-static void
-store_scanline_x2r10g10b10_float (bits_image_t *  image,
-				  int             x,
-				  int             y,
-				  int             width,
-				  const uint32_t *v)
-{
-    uint32_t *bits = image->bits + image->rowstride * y;
-    uint32_t *pixel = bits + x;
-    argb_t *values = (argb_t *)v;
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t r, g, b;
-
-	r = pixman_float_to_unorm (values[i].r, 10);
-	g = pixman_float_to_unorm (values[i].g, 10);
-	b = pixman_float_to_unorm (values[i].b, 10);
-
-	WRITE (image, pixel++,
-	       (r << 20) | (g << 10) | b);
-    }
-}
-
-static void
-store_scanline_a2b10g10r10_float (bits_image_t *  image,
-				  int             x,
-				  int             y,
-				  int             width,
-				  const uint32_t *v)
-{
-    uint32_t *bits = image->bits + image->rowstride * y;
-    uint32_t *pixel = bits + x;
-    argb_t *values = (argb_t *)v;
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t a, r, g, b;
-
-	a = pixman_float_to_unorm (values[i].a, 2);
-	r = pixman_float_to_unorm (values[i].r, 10);
-	g = pixman_float_to_unorm (values[i].g, 10);
-	b = pixman_float_to_unorm (values[i].b, 10);
-
-	WRITE (image, pixel++,
-	       (a << 30) | (b << 20) | (g << 10) | r);
-    }
-}
-
-static void
-store_scanline_x2b10g10r10_float (bits_image_t *  image,
-				  int             x,
-				  int             y,
-				  int             width,
-				  const uint32_t *v)
-{
-    uint32_t *bits = image->bits + image->rowstride * y;
-    uint32_t *pixel = bits + x;
-    argb_t *values = (argb_t *)v;
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t r, g, b;
-
-	r = pixman_float_to_unorm (values[i].r, 10);
-	g = pixman_float_to_unorm (values[i].g, 10);
-	b = pixman_float_to_unorm (values[i].b, 10);
-
-	WRITE (image, pixel++,
-	       (b << 20) | (g << 10) | r);
-    }
-}
-
-static void
-store_scanline_a8r8g8b8_sRGB_float (bits_image_t *  image,
-				    int             x,
-				    int             y,
-				    int             width,
-				    const uint32_t *v)
-{
-    uint32_t *bits = image->bits + image->rowstride * y;
-    uint32_t *pixel = bits + x;
-    argb_t *values = (argb_t *)v;
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t a, r, g, b;
-
-	a = pixman_float_to_unorm (values[i].a, 8);
-	r = to_srgb (values[i].r);
-	g = to_srgb (values[i].g);
-	b = to_srgb (values[i].b);
-
-	WRITE (image, pixel++,
-	       (a << 24) | (r << 16) | (g << 8) | b);
-    }
-}
-
-static void
-store_scanline_r8g8b8_sRGB_float (bits_image_t *  image,
-				  int             x,
-				  int             y,
-				  int             width,
-				  const uint32_t *v)
-{
-    uint8_t *bits = (uint8_t *)(image->bits + image->rowstride * y) + 3 * x;
-    argb_t *values = (argb_t *)v;
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t r, g, b, rgb;
-
-	r = to_srgb (values[i].r);
-	g = to_srgb (values[i].g);
-	b = to_srgb (values[i].b);
-
-	rgb = (r << 16) | (g << 8) | b;
-
-	STORE_24 (image, bits, i, rgb);
-    }
-}
-
-/*
- * Contracts a floating point image to 32bpp and then stores it using a
- * regular 32-bit store proc. Despite the type, this function expects an
- * argb_t buffer.
- */
-static void
-store_scanline_generic_float (bits_image_t *  image,
-			      int             x,
-			      int             y,
-			      int             width,
-			      const uint32_t *values)
-{
-    uint32_t *argb8_pixels;
-
-    assert (image->common.type == BITS);
-
-    argb8_pixels = pixman_malloc_ab (width, sizeof(uint32_t));
-    if (!argb8_pixels)
-	return;
-
-    /* Contract the scanline.  We could do this in place if values weren't
-     * const.
-     */
-    pixman_contract_from_float (argb8_pixels, (argb_t *)values, width);
-
-    image->store_scanline_32 (image, x, y, width, argb8_pixels);
-
-    free (argb8_pixels);
-}
-
-static void
-fetch_scanline_generic_float (bits_image_t *  image,
-			      int	      x,
-			      int	      y,
-			      int	      width,
-			      uint32_t *      buffer,
-			      const uint32_t *mask)
-{
-    image->fetch_scanline_32 (image, x, y, width, buffer, NULL);
-
-    pixman_expand_to_float ((argb_t *)buffer, buffer, image->format, width);
-}
-
-/* The 32_sRGB paths should be deleted after narrow processing
- * is no longer invoked for formats that are considered wide.
- * (Also see fetch_pixel_generic_lossy_32) */
-static void
-fetch_scanline_a8r8g8b8_32_sRGB (bits_image_t   *image,
-                                 int             x,
-                                 int             y,
-                                 int             width,
-                                 uint32_t       *buffer,
-                                 const uint32_t *mask)
-{
-    const uint32_t *bits = image->bits + y * image->rowstride;
-    const uint32_t *pixel = (uint32_t *)bits + x;
-    const uint32_t *end = pixel + width;
-    uint32_t tmp;
-    
-    while (pixel < end)
-    {
-	uint32_t a, r, g, b;
-
-	tmp = READ (image, pixel++);
-
-	a = (tmp >> 24) & 0xff;
-	r = (tmp >> 16) & 0xff;
-	g = (tmp >> 8) & 0xff;
-	b = (tmp >> 0) & 0xff;
-
-	r = to_linear[r] * 255.0f + 0.5f;
-	g = to_linear[g] * 255.0f + 0.5f;
-	b = to_linear[b] * 255.0f + 0.5f;
-
-	*buffer++ = (a << 24) | (r << 16) | (g << 8) | (b << 0);
-    }
-}
-
-static void
-fetch_scanline_r8g8b8_32_sRGB (bits_image_t   *image,
-                               int             x,
-                               int             y,
-                               int             width,
-                               uint32_t       *buffer,
-                               const uint32_t *mask)
-{
-    const uint8_t *bits = (uint8_t *)(image->bits + y * image->rowstride) + 3 * x;
-    uint32_t tmp;
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t a, r, g, b;
-
-	tmp = FETCH_24 (image, bits, i);
-
-	a = 0xff;
-	r = (tmp >> 16) & 0xff;
-	g = (tmp >> 8) & 0xff;
-	b = (tmp >> 0) & 0xff;
-
-	r = to_linear[r] * 255.0f + 0.5f;
-	g = to_linear[g] * 255.0f + 0.5f;
-	b = to_linear[b] * 255.0f + 0.5f;
-
-	*buffer++ = (a << 24) | (r << 16) | (g << 8) | (b << 0);
-    }
-}
-
-static uint32_t
-fetch_pixel_a8r8g8b8_32_sRGB (bits_image_t *image,
-			      int           offset,
-			      int           line)
-{
-    uint32_t *bits = image->bits + line * image->rowstride;
-    uint32_t tmp = READ (image, bits + offset);
-    uint32_t a, r, g, b;
-
-    a = (tmp >> 24) & 0xff;
-    r = (tmp >> 16) & 0xff;
-    g = (tmp >> 8) & 0xff;
-    b = (tmp >> 0) & 0xff;
-
-    r = to_linear[r] * 255.0f + 0.5f;
-    g = to_linear[g] * 255.0f + 0.5f;
-    b = to_linear[b] * 255.0f + 0.5f;
-
-    return (a << 24) | (r << 16) | (g << 8) | (b << 0);
-}
-
-static uint32_t
-fetch_pixel_r8g8b8_32_sRGB (bits_image_t *image,
-			    int           offset,
-			    int           line)
-{
-    uint8_t *bits = (uint8_t *)(image->bits + line * image->rowstride);
-    uint32_t tmp = FETCH_24 (image, bits, offset);
-    uint32_t a, r, g, b;
-
-    a = 0xff;
-    r = (tmp >> 16) & 0xff;
-    g = (tmp >> 8) & 0xff;
-    b = (tmp >> 0) & 0xff;
-
-    r = to_linear[r] * 255.0f + 0.5f;
-    g = to_linear[g] * 255.0f + 0.5f;
-    b = to_linear[b] * 255.0f + 0.5f;
-
-    return (a << 24) | (r << 16) | (g << 8) | (b << 0);
-}
-
-static void
-store_scanline_a8r8g8b8_32_sRGB (bits_image_t   *image,
-                                 int             x,
-                                 int             y,
-                                 int             width,
-                                 const uint32_t *v)
-{
-    uint32_t *bits = image->bits + image->rowstride * y;
-    uint64_t *values = (uint64_t *)v;
-    uint32_t *pixel = bits + x;
-    uint64_t tmp;
-    int i;
-    
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t a, r, g, b;
-
-	tmp = values[i];
-
-	a = (tmp >> 24) & 0xff;
-	r = (tmp >> 16) & 0xff;
-	g = (tmp >> 8) & 0xff;
-	b = (tmp >> 0) & 0xff;
-
-	r = to_srgb (r * (1/255.0f));
-	g = to_srgb (g * (1/255.0f));
-	b = to_srgb (b * (1/255.0f));
-	
-	WRITE (image, pixel++, a | (r << 16) | (g << 8) | (b << 0));
-    }
-}
-
-static void
-store_scanline_r8g8b8_32_sRGB (bits_image_t   *image,
-			       int             x,
-                               int             y,
-                               int             width,
-                               const uint32_t *v)
-{
-    uint8_t *bits = (uint8_t *)(image->bits + image->rowstride * y) + 3 * x;
-    uint64_t *values = (uint64_t *)v;
-    uint64_t tmp;
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t r, g, b;
-
-	tmp = values[i];
-
-	r = (tmp >> 16) & 0xff;
-	g = (tmp >> 8) & 0xff;
-	b = (tmp >> 0) & 0xff;
-
-	r = to_srgb (r * (1/255.0f));
-	g = to_srgb (g * (1/255.0f));
-	b = to_srgb (b * (1/255.0f));
-
-	STORE_24 (image, bits, i, (r << 16) | (g << 8) | (b << 0));
-    }
-}
-
-static argb_t
-fetch_pixel_generic_float (bits_image_t *image,
-			   int		 offset,
-			   int           line)
-{
-    uint32_t pixel32 = image->fetch_pixel_32 (image, offset, line);
-    argb_t f;
-
-    pixman_expand_to_float (&f, &pixel32, image->format, 1);
-
-    return f;
-}
-
-/*
- * XXX: The transformed fetch path only works at 32-bpp so far.  When all
- * paths have wide versions, this can be removed.
- *
- * WARNING: This function loses precision!
- */
-static uint32_t
-fetch_pixel_generic_lossy_32 (bits_image_t *image,
-			      int           offset,
-			      int           line)
-{
-    argb_t pixel64 = image->fetch_pixel_float (image, offset, line);
-    uint32_t result;
-
-    pixman_contract_from_float (&result, &pixel64, 1);
-
-    return result;
-}
-
-typedef struct
-{
-    pixman_format_code_t	format;
-    fetch_scanline_t		fetch_scanline_32;
-    fetch_scanline_t		fetch_scanline_float;
-    fetch_pixel_32_t		fetch_pixel_32;
-    fetch_pixel_float_t		fetch_pixel_float;
-    store_scanline_t		store_scanline_32;
-    store_scanline_t		store_scanline_float;
-} format_info_t;
-
-#define FORMAT_INFO(format) 						\
-    {									\
-	PIXMAN_ ## format,						\
-	    fetch_scanline_ ## format,					\
-	    fetch_scanline_generic_float,				\
-	    fetch_pixel_ ## format,					\
-	    fetch_pixel_generic_float,					\
-	    store_scanline_ ## format,					\
-	    store_scanline_generic_float				\
-    }
-
-static const format_info_t accessors[] =
-{
-/* 32 bpp formats */
-    FORMAT_INFO (a8r8g8b8),
-    FORMAT_INFO (x8r8g8b8),
-    FORMAT_INFO (a8b8g8r8),
-    FORMAT_INFO (x8b8g8r8),
-    FORMAT_INFO (b8g8r8a8),
-    FORMAT_INFO (b8g8r8x8),
-    FORMAT_INFO (r8g8b8a8),
-    FORMAT_INFO (r8g8b8x8),
-    FORMAT_INFO (x14r6g6b6),
-
-/* sRGB formats */
-  { PIXMAN_a8r8g8b8_sRGB,
-    fetch_scanline_a8r8g8b8_32_sRGB, fetch_scanline_a8r8g8b8_sRGB_float,
-    fetch_pixel_a8r8g8b8_32_sRGB, fetch_pixel_a8r8g8b8_sRGB_float,
-    store_scanline_a8r8g8b8_32_sRGB, store_scanline_a8r8g8b8_sRGB_float,
-  },
-  { PIXMAN_r8g8b8_sRGB,
-    fetch_scanline_r8g8b8_32_sRGB, fetch_scanline_r8g8b8_sRGB_float,
-    fetch_pixel_r8g8b8_32_sRGB, fetch_pixel_r8g8b8_sRGB_float,
-    store_scanline_r8g8b8_32_sRGB, store_scanline_r8g8b8_sRGB_float,
-  },
-
-/* 24bpp formats */
-    FORMAT_INFO (r8g8b8),
-    FORMAT_INFO (b8g8r8),
-    
-/* 16bpp formats */
-    FORMAT_INFO (r5g6b5),
-    FORMAT_INFO (b5g6r5),
-    
-    FORMAT_INFO (a1r5g5b5),
-    FORMAT_INFO (x1r5g5b5),
-    FORMAT_INFO (a1b5g5r5),
-    FORMAT_INFO (x1b5g5r5),
-    FORMAT_INFO (a4r4g4b4),
-    FORMAT_INFO (x4r4g4b4),
-    FORMAT_INFO (a4b4g4r4),
-    FORMAT_INFO (x4b4g4r4),
-    
-/* 8bpp formats */
-    FORMAT_INFO (a8),
-    FORMAT_INFO (r3g3b2),
-    FORMAT_INFO (b2g3r3),
-    FORMAT_INFO (a2r2g2b2),
-    FORMAT_INFO (a2b2g2r2),
-    
-    FORMAT_INFO (c8),
-    
-    FORMAT_INFO (g8),
-    
-#define fetch_scanline_x4c4 fetch_scanline_c8
-#define fetch_pixel_x4c4 fetch_pixel_c8
-#define store_scanline_x4c4 store_scanline_c8
-    FORMAT_INFO (x4c4),
-    
-#define fetch_scanline_x4g4 fetch_scanline_g8
-#define fetch_pixel_x4g4 fetch_pixel_g8
-#define store_scanline_x4g4 store_scanline_g8
-    FORMAT_INFO (x4g4),
-    
-    FORMAT_INFO (x4a4),
-    
-/* 4bpp formats */
-    FORMAT_INFO (a4),
-    FORMAT_INFO (r1g2b1),
-    FORMAT_INFO (b1g2r1),
-    FORMAT_INFO (a1r1g1b1),
-    FORMAT_INFO (a1b1g1r1),
-    
-    FORMAT_INFO (c4),
-    
-    FORMAT_INFO (g4),
-    
-/* 1bpp formats */
-    FORMAT_INFO (a1),
-    FORMAT_INFO (g1),
-    
-/* Wide formats */
-#ifndef PIXMAN_FB_ACCESSORS
-    { PIXMAN_rgba_float,
-      NULL, fetch_scanline_rgbaf_float,
-      fetch_pixel_generic_lossy_32, fetch_pixel_rgbaf_float,
-      NULL, store_scanline_rgbaf_float },
-
-    { PIXMAN_rgb_float,
-      NULL, fetch_scanline_rgbf_float,
-      fetch_pixel_generic_lossy_32, fetch_pixel_rgbf_float,
-      NULL, store_scanline_rgbf_float },
-#endif
-
-    { PIXMAN_a2r10g10b10,
-      NULL, fetch_scanline_a2r10g10b10_float,
-      fetch_pixel_generic_lossy_32, fetch_pixel_a2r10g10b10_float,
-      NULL, store_scanline_a2r10g10b10_float },
-
-    { PIXMAN_x2r10g10b10,
-      NULL, fetch_scanline_x2r10g10b10_float,
-      fetch_pixel_generic_lossy_32, fetch_pixel_x2r10g10b10_float,
-      NULL, store_scanline_x2r10g10b10_float },
-
-    { PIXMAN_a2b10g10r10,
-      NULL, fetch_scanline_a2b10g10r10_float,
-      fetch_pixel_generic_lossy_32, fetch_pixel_a2b10g10r10_float,
-      NULL, store_scanline_a2b10g10r10_float },
-
-    { PIXMAN_x2b10g10r10,
-      NULL, fetch_scanline_x2b10g10r10_float,
-      fetch_pixel_generic_lossy_32, fetch_pixel_x2b10g10r10_float,
-      NULL, store_scanline_x2b10g10r10_float },
-
-/* YUV formats */
-    { PIXMAN_yuy2,
-      fetch_scanline_yuy2, fetch_scanline_generic_float,
-      fetch_pixel_yuy2, fetch_pixel_generic_float,
-      NULL, NULL },
-
-    { PIXMAN_yv12,
-      fetch_scanline_yv12, fetch_scanline_generic_float,
-      fetch_pixel_yv12, fetch_pixel_generic_float,
-      NULL, NULL },
-    
-    { PIXMAN_null },
-};
-
-static void
-setup_accessors (bits_image_t *image)
-{
-    const format_info_t *info = accessors;
-    
-    while (info->format != PIXMAN_null)
-    {
-	if (info->format == image->format)
-	{
-	    image->fetch_scanline_32 = info->fetch_scanline_32;
-	    image->fetch_scanline_float = info->fetch_scanline_float;
-	    image->fetch_pixel_32 = info->fetch_pixel_32;
-	    image->fetch_pixel_float = info->fetch_pixel_float;
-	    image->store_scanline_32 = info->store_scanline_32;
-	    image->store_scanline_float = info->store_scanline_float;
-	    
-	    return;
-	}
-	
-	info++;
-    }
-}
-
-#ifndef PIXMAN_FB_ACCESSORS
-void
-_pixman_bits_image_setup_accessors_accessors (bits_image_t *image);
-
-void
-_pixman_bits_image_setup_accessors (bits_image_t *image)
-{
-    if (image->read_func || image->write_func)
-	_pixman_bits_image_setup_accessors_accessors (image);
-    else
-	setup_accessors (image);
-}
-
-#else
-
-void
-_pixman_bits_image_setup_accessors_accessors (bits_image_t *image)
-{
-    setup_accessors (image);
-}
-
-#endif
diff --git a/vendor/pixman/pixman/pixman-accessor.h b/vendor/pixman/pixman/pixman-accessor.h
deleted file mode 100644
index 8e0b03621..000000000
--- a/vendor/pixman/pixman/pixman-accessor.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifdef PIXMAN_FB_ACCESSORS
-
-#define READ(img, ptr)							\
-    (((bits_image_t *)(img))->read_func ((ptr), sizeof(*(ptr))))
-#define WRITE(img, ptr,val)						\
-    (((bits_image_t *)(img))->write_func ((ptr), (val), sizeof (*(ptr))))
-
-#define MEMSET_WRAPPED(img, dst, val, size)				\
-    do {								\
-	size_t _i;							\
-	uint8_t *_dst = (uint8_t*)(dst);				\
-	for(_i = 0; _i < (size_t) size; _i++) {				\
-	    WRITE((img), _dst +_i, (val));				\
-	}								\
-    } while (0)
-
-#else
-
-#define READ(img, ptr)		(*(ptr))
-#define WRITE(img, ptr, val)	(*(ptr) = (val))
-#define MEMSET_WRAPPED(img, dst, val, size)				\
-    memset(dst, val, size)
-
-#endif
-
diff --git a/vendor/pixman/pixman/pixman-arm-asm.h b/vendor/pixman/pixman/pixman-arm-asm.h
deleted file mode 100644
index ee7854108..000000000
--- a/vendor/pixman/pixman/pixman-arm-asm.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright © 2008 Mozilla Corporation
- * Copyright © 2010 Nokia Corporation
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Mozilla Corporation not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Mozilla Corporation makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author:  Jeff Muizelaar (jeff@infidigm.net)
- *
- */
-
-/* Supplementary macro for setting function attributes */
-.macro pixman_asm_function fname
-	.func fname
-	.global fname
-#ifdef __ELF__
-	.hidden fname
-	.type fname, %function
-#endif
-fname:
-.endm
diff --git a/vendor/pixman/pixman/pixman-arm-common.h b/vendor/pixman/pixman/pixman-arm-common.h
deleted file mode 100644
index 953768830..000000000
--- a/vendor/pixman/pixman/pixman-arm-common.h
+++ /dev/null
@@ -1,419 +0,0 @@
-/*
- * Copyright © 2010 Nokia Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author:  Siarhei Siamashka (siarhei.siamashka@nokia.com)
- */
-
-#ifndef PIXMAN_ARM_COMMON_H
-#define PIXMAN_ARM_COMMON_H
-
-#include "pixman-inlines.h"
-
-/* Define some macros which can expand into proxy functions between
- * ARM assembly optimized functions and the rest of pixman fast path API.
- *
- * All the low level ARM assembly functions have to use ARM EABI
- * calling convention and take up to 8 arguments:
- *    width, height, dst, dst_stride, src, src_stride, mask, mask_stride
- *
- * The arguments are ordered with the most important coming first (the
- * first 4 arguments are passed to function in registers, the rest are
- * on stack). The last arguments are optional, for example if the
- * function is not using mask, then 'mask' and 'mask_stride' can be
- * omitted when doing a function call.
- *
- * Arguments 'src' and 'mask' contain either a pointer to the top left
- * pixel of the composited rectangle or a pixel color value depending
- * on the function type. In the case of just a color value (solid source
- * or mask), the corresponding stride argument is unused.
- */
-
-#define SKIP_ZERO_SRC  1
-#define SKIP_ZERO_MASK 2
-
-#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name,                \
-                                          src_type, src_cnt,            \
-                                          dst_type, dst_cnt)            \
-void                                                                    \
-pixman_composite_##name##_asm_##cputype (int32_t   w,                   \
-                                         int32_t   h,                   \
-                                         dst_type *dst,                 \
-                                         int32_t   dst_stride,          \
-                                         src_type *src,                 \
-                                         int32_t   src_stride);         \
-                                                                        \
-static void                                                             \
-cputype##_composite_##name (pixman_implementation_t *imp,               \
-                            pixman_composite_info_t *info)              \
-{                                                                       \
-    PIXMAN_COMPOSITE_ARGS (info);                                       \
-    dst_type *dst_line;							\
-    src_type *src_line;                                                 \
-    int32_t dst_stride, src_stride;                                     \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
-                           src_stride, src_line, src_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type,        \
-                           dst_stride, dst_line, dst_cnt);              \
-                                                                        \
-    pixman_composite_##name##_asm_##cputype (width, height,             \
-                                             dst_line, dst_stride,      \
-                                             src_line, src_stride);     \
-}
-
-#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name,           \
-                                        dst_type, dst_cnt)              \
-void                                                                    \
-pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
-                                         int32_t    h,                  \
-                                         dst_type  *dst,                \
-                                         int32_t    dst_stride,         \
-                                         uint32_t   src);               \
-                                                                        \
-static void                                                             \
-cputype##_composite_##name (pixman_implementation_t *imp,               \
-			    pixman_composite_info_t *info)              \
-{                                                                       \
-    PIXMAN_COMPOSITE_ARGS (info);					\
-    dst_type  *dst_line;                                                \
-    int32_t    dst_stride;                                              \
-    uint32_t   src;                                                     \
-                                                                        \
-    src = _pixman_image_get_solid (					\
-	imp, src_image, dest_image->bits.format);			\
-                                                                        \
-    if ((flags & SKIP_ZERO_SRC) && src == 0)                            \
-	return;                                                         \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type,        \
-                           dst_stride, dst_line, dst_cnt);              \
-                                                                        \
-    pixman_composite_##name##_asm_##cputype (width, height,             \
-                                             dst_line, dst_stride,      \
-                                             src);                      \
-}
-
-#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name,      \
-                                             mask_type, mask_cnt,       \
-                                             dst_type, dst_cnt)         \
-void                                                                    \
-pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
-                                         int32_t    h,                  \
-                                         dst_type  *dst,                \
-                                         int32_t    dst_stride,         \
-                                         uint32_t   src,                \
-                                         int32_t    unused,             \
-                                         mask_type *mask,               \
-                                         int32_t    mask_stride);       \
-                                                                        \
-static void                                                             \
-cputype##_composite_##name (pixman_implementation_t *imp,               \
-                            pixman_composite_info_t *info)              \
-{                                                                       \
-    PIXMAN_COMPOSITE_ARGS (info);                                       \
-    dst_type  *dst_line;						\
-    mask_type *mask_line;                                               \
-    int32_t    dst_stride, mask_stride;                                 \
-    uint32_t   src;                                                     \
-                                                                        \
-    src = _pixman_image_get_solid (					\
-	imp, src_image, dest_image->bits.format);			\
-                                                                        \
-    if ((flags & SKIP_ZERO_SRC) && src == 0)                            \
-	return;                                                         \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type,        \
-                           dst_stride, dst_line, dst_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
-                           mask_stride, mask_line, mask_cnt);           \
-                                                                        \
-    pixman_composite_##name##_asm_##cputype (width, height,             \
-                                             dst_line, dst_stride,      \
-                                             src, 0,                    \
-                                             mask_line, mask_stride);   \
-}
-
-#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name,       \
-                                            src_type, src_cnt,          \
-                                            dst_type, dst_cnt)          \
-void                                                                    \
-pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
-                                         int32_t    h,                  \
-                                         dst_type  *dst,                \
-                                         int32_t    dst_stride,         \
-                                         src_type  *src,                \
-                                         int32_t    src_stride,         \
-                                         uint32_t   mask);              \
-                                                                        \
-static void                                                             \
-cputype##_composite_##name (pixman_implementation_t *imp,               \
-                            pixman_composite_info_t *info)              \
-{                                                                       \
-    PIXMAN_COMPOSITE_ARGS (info);                                       \
-    dst_type  *dst_line;						\
-    src_type  *src_line;                                                \
-    int32_t    dst_stride, src_stride;                                  \
-    uint32_t   mask;                                                    \
-                                                                        \
-    mask = _pixman_image_get_solid (					\
-	imp, mask_image, dest_image->bits.format);			\
-                                                                        \
-    if ((flags & SKIP_ZERO_MASK) && mask == 0)                          \
-	return;                                                         \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type,        \
-                           dst_stride, dst_line, dst_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
-                           src_stride, src_line, src_cnt);              \
-                                                                        \
-    pixman_composite_##name##_asm_##cputype (width, height,             \
-                                             dst_line, dst_stride,      \
-                                             src_line, src_stride,      \
-                                             mask);                     \
-}
-
-#define PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST(cputype, name,           \
-                                               src_type, src_cnt,       \
-                                               mask_type, mask_cnt,     \
-                                               dst_type, dst_cnt)       \
-void                                                                    \
-pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
-                                         int32_t    h,                  \
-                                         dst_type  *dst,                \
-                                         int32_t    dst_stride,         \
-                                         src_type  *src,                \
-                                         int32_t    src_stride,         \
-                                         mask_type *mask,               \
-                                         int32_t    mask_stride);       \
-                                                                        \
-static void                                                             \
-cputype##_composite_##name (pixman_implementation_t *imp,               \
-                            pixman_composite_info_t *info)              \
-{                                                                       \
-    PIXMAN_COMPOSITE_ARGS (info);                                       \
-    dst_type  *dst_line;						\
-    src_type  *src_line;                                                \
-    mask_type *mask_line;                                               \
-    int32_t    dst_stride, src_stride, mask_stride;                     \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type,        \
-                           dst_stride, dst_line, dst_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
-                           src_stride, src_line, src_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
-                           mask_stride, mask_line, mask_cnt);           \
-                                                                        \
-    pixman_composite_##name##_asm_##cputype (width, height,             \
-                                             dst_line, dst_stride,      \
-                                             src_line, src_stride,      \
-                                             mask_line, mask_stride);   \
-}
-
-#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST(cputype, name, op,             \
-                                               src_type, dst_type)            \
-void                                                                          \
-pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (                \
-                                                   int32_t          w,        \
-                                                   dst_type *       dst,      \
-                                                   const src_type * src,      \
-                                                   pixman_fixed_t   vx,       \
-                                                   pixman_fixed_t   unit_x,   \
-                                                   pixman_fixed_t   max_vx);  \
-                                                                              \
-static force_inline void                                                      \
-scaled_nearest_scanline_##cputype##_##name##_##op (dst_type *       pd,       \
-                                                   const src_type * ps,       \
-                                                   int32_t          w,        \
-                                                   pixman_fixed_t   vx,       \
-                                                   pixman_fixed_t   unit_x,   \
-                                                   pixman_fixed_t   max_vx,   \
-                                                   pixman_bool_t    zero_src) \
-{                                                                             \
-    pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps,  \
-                                                                  vx, unit_x, \
-                                                                  max_vx);    \
-}                                                                             \
-                                                                              \
-FAST_NEAREST_MAINLOOP (cputype##_##name##_cover_##op,                         \
-                       scaled_nearest_scanline_##cputype##_##name##_##op,     \
-                       src_type, dst_type, COVER)                             \
-FAST_NEAREST_MAINLOOP (cputype##_##name##_none_##op,                          \
-                       scaled_nearest_scanline_##cputype##_##name##_##op,     \
-                       src_type, dst_type, NONE)                              \
-FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op,                           \
-                       scaled_nearest_scanline_##cputype##_##name##_##op,     \
-                       src_type, dst_type, PAD)                               \
-FAST_NEAREST_MAINLOOP (cputype##_##name##_normal_##op,                        \
-                       scaled_nearest_scanline_##cputype##_##name##_##op,     \
-                       src_type, dst_type, NORMAL)
-
-#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op,   \
-                                                  src_type, dst_type)         \
-void                                                                          \
-pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (                \
-                                                   int32_t          w,        \
-                                                   dst_type *       dst,      \
-                                                   const src_type * src,      \
-                                                   pixman_fixed_t   vx,       \
-                                                   pixman_fixed_t   unit_x,   \
-                                                   pixman_fixed_t   max_vx,   \
-                                                   const uint8_t *  mask);    \
-                                                                              \
-static force_inline void                                                      \
-scaled_nearest_scanline_##cputype##_##name##_##op (const uint8_t *  mask,     \
-                                                   dst_type *       pd,       \
-                                                   const src_type * ps,       \
-                                                   int32_t          w,        \
-                                                   pixman_fixed_t   vx,       \
-                                                   pixman_fixed_t   unit_x,   \
-                                                   pixman_fixed_t   max_vx,   \
-                                                   pixman_bool_t    zero_src) \
-{                                                                             \
-    if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
-	return;                                                               \
-    pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps,  \
-                                                                  vx, unit_x, \
-                                                                  max_vx,     \
-                                                                  mask);      \
-}                                                                             \
-                                                                              \
-FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_cover_##op,                  \
-                              scaled_nearest_scanline_##cputype##_##name##_##op,\
-                              src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\
-FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_none_##op,                   \
-                              scaled_nearest_scanline_##cputype##_##name##_##op,\
-                              src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \
-FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op,                    \
-                              scaled_nearest_scanline_##cputype##_##name##_##op,\
-                              src_type, uint8_t, dst_type, PAD, TRUE, FALSE)  \
-FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_normal_##op,                 \
-                              scaled_nearest_scanline_##cputype##_##name##_##op,\
-                              src_type, uint8_t, dst_type, NORMAL, TRUE, FALSE)
-
-/* Provide entries for the fast path table */
-#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)              \
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH (op,s,d,func),                           \
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
-
-/*****************************************************************************/
-
-#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST(flags, cputype, name, op,     \
-                                                src_type, dst_type)           \
-void                                                                          \
-pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype (               \
-                                                dst_type *       dst,         \
-                                                const src_type * top,         \
-                                                const src_type * bottom,      \
-                                                int              wt,          \
-                                                int              wb,          \
-                                                pixman_fixed_t   x,           \
-                                                pixman_fixed_t   ux,          \
-                                                int              width);      \
-                                                                              \
-static force_inline void                                                      \
-scaled_bilinear_scanline_##cputype##_##name##_##op (                          \
-                                                dst_type *       dst,         \
-                                                const uint32_t * mask,        \
-                                                const src_type * src_top,     \
-                                                const src_type * src_bottom,  \
-                                                int32_t          w,           \
-                                                int              wt,          \
-                                                int              wb,          \
-                                                pixman_fixed_t   vx,          \
-                                                pixman_fixed_t   unit_x,      \
-                                                pixman_fixed_t   max_vx,      \
-                                                pixman_bool_t    zero_src)    \
-{                                                                             \
-    if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
-	return;                                                               \
-    pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype (           \
-                            dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \
-}                                                                             \
-                                                                              \
-FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op,                 \
-                       scaled_bilinear_scanline_##cputype##_##name##_##op,    \
-                       src_type, uint32_t, dst_type, COVER, FLAG_NONE)        \
-FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op,                  \
-                       scaled_bilinear_scanline_##cputype##_##name##_##op,    \
-                       src_type, uint32_t, dst_type, NONE, FLAG_NONE)         \
-FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op,                   \
-                       scaled_bilinear_scanline_##cputype##_##name##_##op,    \
-                       src_type, uint32_t, dst_type, PAD, FLAG_NONE)          \
-FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op,                \
-                       scaled_bilinear_scanline_##cputype##_##name##_##op,    \
-                       src_type, uint32_t, dst_type, NORMAL,                  \
-                       FLAG_NONE)
-
-
-#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, cputype, name, op,  \
-                                                src_type, dst_type)           \
-void                                                                          \
-pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype (               \
-                                                dst_type *       dst,         \
-                                                const uint8_t *  mask,        \
-                                                const src_type * top,         \
-                                                const src_type * bottom,      \
-                                                int              wt,          \
-                                                int              wb,          \
-                                                pixman_fixed_t   x,           \
-                                                pixman_fixed_t   ux,          \
-                                                int              width);      \
-                                                                              \
-static force_inline void                                                      \
-scaled_bilinear_scanline_##cputype##_##name##_##op (                          \
-                                                dst_type *       dst,         \
-                                                const uint8_t *  mask,        \
-                                                const src_type * src_top,     \
-                                                const src_type * src_bottom,  \
-                                                int32_t          w,           \
-                                                int              wt,          \
-                                                int              wb,          \
-                                                pixman_fixed_t   vx,          \
-                                                pixman_fixed_t   unit_x,      \
-                                                pixman_fixed_t   max_vx,      \
-                                                pixman_bool_t    zero_src)    \
-{                                                                             \
-    if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
-	return;                                                                   \
-    pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype (           \
-                      dst, mask, src_top, src_bottom, wt, wb, vx, unit_x, w); \
-}                                                                             \
-                                                                              \
-FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op,                 \
-                       scaled_bilinear_scanline_##cputype##_##name##_##op,    \
-                       src_type, uint8_t, dst_type, COVER,                    \
-                       FLAG_HAVE_NON_SOLID_MASK)                              \
-FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op,                  \
-                       scaled_bilinear_scanline_##cputype##_##name##_##op,    \
-                       src_type, uint8_t, dst_type, NONE,                     \
-                       FLAG_HAVE_NON_SOLID_MASK)                              \
-FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op,                   \
-                       scaled_bilinear_scanline_##cputype##_##name##_##op,    \
-                       src_type, uint8_t, dst_type, PAD,                      \
-                       FLAG_HAVE_NON_SOLID_MASK)                              \
-FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op,                \
-                       scaled_bilinear_scanline_##cputype##_##name##_##op,    \
-                       src_type, uint8_t, dst_type, NORMAL,                   \
-                       FLAG_HAVE_NON_SOLID_MASK)
-
-
-#endif
diff --git a/vendor/pixman/pixman/pixman-arm-detect-win32.asm b/vendor/pixman/pixman/pixman-arm-detect-win32.asm
deleted file mode 100644
index 8f5d5eb2a..000000000
--- a/vendor/pixman/pixman/pixman-arm-detect-win32.asm
+++ /dev/null
@@ -1,21 +0,0 @@
-    area pixman_msvc, code, readonly
-
-    export  pixman_msvc_try_arm_simd_op
-
-pixman_msvc_try_arm_simd_op
-    ;; I don't think the msvc arm asm knows how to do SIMD insns
-    ;; uqadd8 r3,r3,r3
-    dcd 0xe6633f93
-    mov pc,lr
-    endp
-
-    export  pixman_msvc_try_arm_neon_op
-
-pixman_msvc_try_arm_neon_op
-    ;; I don't think the msvc arm asm knows how to do NEON insns
-    ;; veor d0,d0,d0
-    dcd 0xf3000110
-    mov pc,lr
-    endp
-
-    end
diff --git a/vendor/pixman/pixman/pixman-arm-neon-asm-bilinear.S b/vendor/pixman/pixman/pixman-arm-neon-asm-bilinear.S
deleted file mode 100644
index 0fd92d61c..000000000
--- a/vendor/pixman/pixman/pixman-arm-neon-asm-bilinear.S
+++ /dev/null
@@ -1,1358 +0,0 @@
-/*
- * Copyright © 2011 SCore Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author:  Siarhei Siamashka (siarhei.siamashka@nokia.com)
- * Author:  Taekyun Kim (tkq.kim@samsung.com)
- */
-
-/*
- * This file contains scaled bilinear scanline functions implemented
- * using older siarhei's bilinear macro template.
- *
- * << General scanline function procedures >>
- *  1. bilinear interpolate source pixels
- *  2. load mask pixels
- *  3. load destination pixels
- *  4. duplicate mask to fill whole register
- *  5. interleave source & destination pixels
- *  6. apply mask to source pixels
- *  7. combine source & destination pixels
- *  8, Deinterleave final result
- *  9. store destination pixels
- *
- * All registers with single number (i.e. src0, tmp0) are 64-bits registers.
- * Registers with double numbers(src01, dst01) are 128-bits registers.
- * All temp registers can be used freely outside the code block.
- * Assume that symbol(register .req) OUT and MASK are defined at caller of these macro blocks.
- *
- * Remarks
- *  There can be lots of pipeline stalls inside code block and between code blocks.
- *  Further optimizations will be done by new macro templates using head/tail_head/tail scheme.
- */
-
-/* Prevent the stack from becoming executable for no reason... */
-#if defined(__linux__) && defined (__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
-
-.text
-.fpu neon
-.arch armv7a
-.object_arch armv4
-.eabi_attribute 10, 0
-.eabi_attribute 12, 0
-.arm
-.altmacro
-.p2align 2
-
-#include "pixman-private.h"
-#include "pixman-arm-asm.h"
-#include "pixman-arm-neon-asm.h"
-
-/*
- * Bilinear macros from pixman-arm-neon-asm.S
- */
-
-/*
- * Bilinear scaling support code which tries to provide pixel fetching, color
- * format conversion, and interpolation as separate macros which can be used
- * as the basic building blocks for constructing bilinear scanline functions.
- */
-
-.macro bilinear_load_8888 reg1, reg2, tmp
-    mov       TMP1, X, asr #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, asl #2
-    vld1.32   {reg1}, [TMP1], STRIDE
-    vld1.32   {reg2}, [TMP1]
-.endm
-
-.macro bilinear_load_0565 reg1, reg2, tmp
-    mov       TMP1, X, asr #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, asl #1
-    vld1.32   {reg2[0]}, [TMP1], STRIDE
-    vld1.32   {reg2[1]}, [TMP1]
-    convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
-.endm
-
-.macro bilinear_load_and_vertical_interpolate_two_8888 \
-                    acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2
-
-    bilinear_load_8888 reg1, reg2, tmp1
-    vmull.u8  acc1, reg1, d28
-    vmlal.u8  acc1, reg2, d29
-    bilinear_load_8888 reg3, reg4, tmp2
-    vmull.u8  acc2, reg3, d28
-    vmlal.u8  acc2, reg4, d29
-.endm
-
-.macro bilinear_load_and_vertical_interpolate_four_8888 \
-                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
-                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
-
-    bilinear_load_and_vertical_interpolate_two_8888 \
-                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi
-    bilinear_load_and_vertical_interpolate_two_8888 \
-                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
-.endm
-
-.macro bilinear_load_and_vertical_interpolate_two_0565 \
-                acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi
-
-    mov       TMP1, X, asr #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, asl #1
-    mov       TMP2, X, asr #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, asl #1
-    vld1.32   {acc2lo[0]}, [TMP1], STRIDE
-    vld1.32   {acc2hi[0]}, [TMP2], STRIDE
-    vld1.32   {acc2lo[1]}, [TMP1]
-    vld1.32   {acc2hi[1]}, [TMP2]
-    convert_0565_to_x888 acc2, reg3, reg2, reg1
-    vzip.u8   reg1, reg3
-    vzip.u8   reg2, reg4
-    vzip.u8   reg3, reg4
-    vzip.u8   reg1, reg2
-    vmull.u8  acc1, reg1, d28
-    vmlal.u8  acc1, reg2, d29
-    vmull.u8  acc2, reg3, d28
-    vmlal.u8  acc2, reg4, d29
-.endm
-
-.macro bilinear_load_and_vertical_interpolate_four_0565 \
-                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
-                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
-
-    mov       TMP1, X, asr #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, asl #1
-    mov       TMP2, X, asr #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, asl #1
-    vld1.32   {xacc2lo[0]}, [TMP1], STRIDE
-    vld1.32   {xacc2hi[0]}, [TMP2], STRIDE
-    vld1.32   {xacc2lo[1]}, [TMP1]
-    vld1.32   {xacc2hi[1]}, [TMP2]
-    convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
-    mov       TMP1, X, asr #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, asl #1
-    mov       TMP2, X, asr #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, asl #1
-    vld1.32   {yacc2lo[0]}, [TMP1], STRIDE
-    vzip.u8   xreg1, xreg3
-    vld1.32   {yacc2hi[0]}, [TMP2], STRIDE
-    vzip.u8   xreg2, xreg4
-    vld1.32   {yacc2lo[1]}, [TMP1]
-    vzip.u8   xreg3, xreg4
-    vld1.32   {yacc2hi[1]}, [TMP2]
-    vzip.u8   xreg1, xreg2
-    convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1
-    vmull.u8  xacc1, xreg1, d28
-    vzip.u8   yreg1, yreg3
-    vmlal.u8  xacc1, xreg2, d29
-    vzip.u8   yreg2, yreg4
-    vmull.u8  xacc2, xreg3, d28
-    vzip.u8   yreg3, yreg4
-    vmlal.u8  xacc2, xreg4, d29
-    vzip.u8   yreg1, yreg2
-    vmull.u8  yacc1, yreg1, d28
-    vmlal.u8  yacc1, yreg2, d29
-    vmull.u8  yacc2, yreg3, d28
-    vmlal.u8  yacc2, yreg4, d29
-.endm
-
-.macro bilinear_store_8888 numpix, tmp1, tmp2
-.if numpix == 4
-    vst1.32   {d0, d1}, [OUT]!
-.elseif numpix == 2
-    vst1.32   {d0}, [OUT]!
-.elseif numpix == 1
-    vst1.32   {d0[0]}, [OUT, :32]!
-.else
-    .error bilinear_store_8888 numpix is unsupported
-.endif
-.endm
-
-.macro bilinear_store_0565 numpix, tmp1, tmp2
-    vuzp.u8 d0, d1
-    vuzp.u8 d2, d3
-    vuzp.u8 d1, d3
-    vuzp.u8 d0, d2
-    convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2
-.if numpix == 4
-    vst1.16   {d2}, [OUT]!
-.elseif numpix == 2
-    vst1.32   {d2[0]}, [OUT]!
-.elseif numpix == 1
-    vst1.16   {d2[0]}, [OUT]!
-.else
-    .error bilinear_store_0565 numpix is unsupported
-.endif
-.endm
-
-
-/*
- * Macros for loading mask pixels into register 'mask'.
- * vdup must be done in somewhere else.
- */
-.macro bilinear_load_mask_x numpix, mask
-.endm
-
-.macro bilinear_load_mask_8 numpix, mask
-.if numpix == 4
-    vld1.32     {mask[0]}, [MASK]!
-.elseif numpix == 2
-    vld1.16     {mask[0]}, [MASK]!
-.elseif numpix == 1
-    vld1.8      {mask[0]}, [MASK]!
-.else
-    .error bilinear_load_mask_8 numpix is unsupported
-.endif
-    pld         [MASK, #prefetch_offset]
-.endm
-
-.macro bilinear_load_mask mask_fmt, numpix, mask
-    bilinear_load_mask_&mask_fmt numpix, mask
-.endm
-
-
-/*
- * Macros for loading destination pixels into register 'dst0' and 'dst1'.
- * Interleave should be done somewhere else.
- */
-.macro bilinear_load_dst_0565_src numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_load_dst_8888_src numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_load_dst_8888 numpix, dst0, dst1, dst01
-.if numpix == 4
-    vld1.32     {dst0, dst1}, [OUT]
-.elseif numpix == 2
-    vld1.32     {dst0}, [OUT]
-.elseif numpix == 1
-    vld1.32     {dst0[0]}, [OUT]
-.else
-    .error bilinear_load_dst_8888 numpix is unsupported
-.endif
-    pld         [OUT, #(prefetch_offset * 4)]
-.endm
-
-.macro bilinear_load_dst_8888_over numpix, dst0, dst1, dst01
-    bilinear_load_dst_8888 numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_load_dst_8888_add numpix, dst0, dst1, dst01
-    bilinear_load_dst_8888 numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_load_dst dst_fmt, op, numpix, dst0, dst1, dst01
-    bilinear_load_dst_&dst_fmt&_&op numpix, dst0, dst1, dst01
-.endm
-
-/*
- * Macros for duplicating partially loaded mask to fill entire register.
- * We will apply mask to interleaved source pixels, that is
- *  (r0, r1, r2, r3, g0, g1, g2, g3) x (m0, m1, m2, m3, m0, m1, m2, m3)
- *  (b0, b1, b2, b3, a0, a1, a2, a3) x (m0, m1, m2, m3, m0, m1, m2, m3)
- * So, we need to duplicate loaded mask into whole register.
- *
- * For two pixel case
- *  (r0, r1, x, x, g0, g1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1)
- *  (b0, b1, x, x, a0, a1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1)
- * We can do some optimizations for this including last pixel cases.
- */
-.macro bilinear_duplicate_mask_x numpix, mask
-.endm
-
-.macro bilinear_duplicate_mask_8 numpix, mask
-.if numpix == 4
-    vdup.32     mask, mask[0]
-.elseif numpix == 2
-    vdup.16     mask, mask[0]
-.elseif numpix == 1
-    vdup.8      mask, mask[0]
-.else
-    .error bilinear_duplicate_mask_8 is unsupported
-.endif
-.endm
-
-.macro bilinear_duplicate_mask mask_fmt, numpix, mask
-    bilinear_duplicate_mask_&mask_fmt numpix, mask
-.endm
-
-/*
- * Macros for interleaving src and dst pixels to rrrr gggg bbbb aaaa form.
- * Interleave should be done when maks is enabled or operator is 'over'.
- */
-.macro bilinear_interleave src0, src1, dst0, dst1
-    vuzp.8      src0, src1
-    vuzp.8      dst0, dst1
-    vuzp.8      src0, src1
-    vuzp.8      dst0, dst1
-.endm
-
-.macro bilinear_interleave_src_dst_x_src \
-                numpix, src0, src1, src01, dst0, dst1, dst01
-.endm
-
-.macro bilinear_interleave_src_dst_x_over \
-                numpix, src0, src1, src01, dst0, dst1, dst01
-
-    bilinear_interleave src0, src1, dst0, dst1
-.endm
-
-.macro bilinear_interleave_src_dst_x_add \
-                numpix, src0, src1, src01, dst0, dst1, dst01
-.endm
-
-.macro bilinear_interleave_src_dst_8_src \
-                numpix, src0, src1, src01, dst0, dst1, dst01
-
-    bilinear_interleave src0, src1, dst0, dst1
-.endm
-
-.macro bilinear_interleave_src_dst_8_over \
-                numpix, src0, src1, src01, dst0, dst1, dst01
-
-    bilinear_interleave src0, src1, dst0, dst1
-.endm
-
-.macro bilinear_interleave_src_dst_8_add \
-                numpix, src0, src1, src01, dst0, dst1, dst01
-
-    bilinear_interleave src0, src1, dst0, dst1
-.endm
-
-.macro bilinear_interleave_src_dst \
-                mask_fmt, op, numpix, src0, src1, src01, dst0, dst1, dst01
-
-    bilinear_interleave_src_dst_&mask_fmt&_&op \
-                numpix, src0, src1, src01, dst0, dst1, dst01
-.endm
-
-
-/*
- * Macros for applying masks to src pixels. (see combine_mask_u() function)
- * src, dst should be in interleaved form.
- * mask register should be in form (m0, m1, m2, m3).
- */
-.macro bilinear_apply_mask_to_src_x \
-                numpix, src0, src1, src01, mask, \
-                tmp01, tmp23, tmp45, tmp67
-.endm
-
-.macro bilinear_apply_mask_to_src_8 \
-                numpix, src0, src1, src01, mask, \
-                tmp01, tmp23, tmp45, tmp67
-
-    vmull.u8        tmp01, src0, mask
-    vmull.u8        tmp23, src1, mask
-    /* bubbles */
-    vrshr.u16       tmp45, tmp01, #8
-    vrshr.u16       tmp67, tmp23, #8
-    /* bubbles */
-    vraddhn.u16     src0, tmp45, tmp01
-    vraddhn.u16     src1, tmp67, tmp23
-.endm
-
-.macro bilinear_apply_mask_to_src \
-                mask_fmt, numpix, src0, src1, src01, mask, \
-                tmp01, tmp23, tmp45, tmp67
-
-    bilinear_apply_mask_to_src_&mask_fmt \
-                numpix, src0, src1, src01, mask, \
-                tmp01, tmp23, tmp45, tmp67
-.endm
-
-
-/*
- * Macros for combining src and destination pixels.
- * Interleave or not is depending on operator 'op'.
- */
-.macro bilinear_combine_src \
-                numpix, src0, src1, src01, dst0, dst1, dst01, \
-                tmp01, tmp23, tmp45, tmp67, tmp8
-.endm
-
-.macro bilinear_combine_over \
-                numpix, src0, src1, src01, dst0, dst1, dst01, \
-                tmp01, tmp23, tmp45, tmp67, tmp8
-
-    vdup.32     tmp8, src1[1]
-    /* bubbles */
-    vmvn.8      tmp8, tmp8
-    /* bubbles */
-    vmull.u8    tmp01, dst0, tmp8
-    /* bubbles */
-    vmull.u8    tmp23, dst1, tmp8
-    /* bubbles */
-    vrshr.u16   tmp45, tmp01, #8
-    vrshr.u16   tmp67, tmp23, #8
-    /* bubbles */
-    vraddhn.u16 dst0, tmp45, tmp01
-    vraddhn.u16 dst1, tmp67, tmp23
-    /* bubbles */
-    vqadd.u8    src01, dst01, src01
-.endm
-
-.macro bilinear_combine_add \
-                numpix, src0, src1, src01, dst0, dst1, dst01, \
-                tmp01, tmp23, tmp45, tmp67, tmp8
-
-    vqadd.u8    src01, dst01, src01
-.endm
-
-.macro bilinear_combine \
-                op, numpix, src0, src1, src01, dst0, dst1, dst01, \
-                tmp01, tmp23, tmp45, tmp67, tmp8
-
-    bilinear_combine_&op \
-                numpix, src0, src1, src01, dst0, dst1, dst01, \
-                tmp01, tmp23, tmp45, tmp67, tmp8
-.endm
-
-/*
- * Macros for final deinterleaving of destination pixels if needed.
- */
-.macro bilinear_deinterleave numpix, dst0, dst1, dst01
-    vuzp.8      dst0, dst1
-    /* bubbles */
-    vuzp.8      dst0, dst1
-.endm
-
-.macro bilinear_deinterleave_dst_x_src numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_deinterleave_dst_x_over numpix, dst0, dst1, dst01
-    bilinear_deinterleave numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_deinterleave_dst_x_add numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_deinterleave_dst_8_src numpix, dst0, dst1, dst01
-    bilinear_deinterleave numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_deinterleave_dst_8_over numpix, dst0, dst1, dst01
-    bilinear_deinterleave numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_deinterleave_dst_8_add numpix, dst0, dst1, dst01
-    bilinear_deinterleave numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_deinterleave_dst mask_fmt, op, numpix, dst0, dst1, dst01
-    bilinear_deinterleave_dst_&mask_fmt&_&op numpix, dst0, dst1, dst01
-.endm
-
-
-.macro bilinear_interpolate_last_pixel src_fmt, mask_fmt, dst_fmt, op
-    bilinear_load_&src_fmt d0, d1, d2
-    bilinear_load_mask mask_fmt, 1, d4
-    bilinear_load_dst dst_fmt, op, 1, d18, d19, q9
-    vmull.u8  q1, d0, d28
-    vmlal.u8  q1, d1, d29
-    /* 5 cycles bubble */
-    vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q0, d2, d30
-    vmlal.u16 q0, d3, d30
-    /* 5 cycles bubble */
-    bilinear_duplicate_mask mask_fmt, 1, d4
-    vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
-    /* 3 cycles bubble */
-    vmovn.u16 d0, q0
-    /* 1 cycle bubble */
-    bilinear_interleave_src_dst \
-                mask_fmt, op, 1, d0, d1, q0, d18, d19, q9
-    bilinear_apply_mask_to_src \
-                mask_fmt, 1, d0, d1, q0, d4, \
-                q3, q8, q10, q11
-    bilinear_combine \
-                op, 1, d0, d1, q0, d18, d19, q9, \
-                q3, q8, q10, q11, d5
-    bilinear_deinterleave_dst mask_fmt, op, 1, d0, d1, q0
-    bilinear_store_&dst_fmt 1, q2, q3
-.endm
-
-.macro bilinear_interpolate_two_pixels src_fmt, mask_fmt, dst_fmt, op
-    bilinear_load_and_vertical_interpolate_two_&src_fmt \
-                q1, q11, d0, d1, d20, d21, d22, d23
-    bilinear_load_mask mask_fmt, 2, d4
-    bilinear_load_dst dst_fmt, op, 2, d18, d19, q9
-    vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q0, d2, d30
-    vmlal.u16 q0, d3, d30
-    vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q10, d22, d31
-    vmlal.u16 q10, d23, d31
-    vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS)
-    bilinear_duplicate_mask mask_fmt, 2, d4
-    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vadd.u16  q12, q12, q13
-    vmovn.u16 d0, q0
-    bilinear_interleave_src_dst \
-                mask_fmt, op, 2, d0, d1, q0, d18, d19, q9
-    bilinear_apply_mask_to_src \
-                mask_fmt, 2, d0, d1, q0, d4, \
-                q3, q8, q10, q11
-    bilinear_combine \
-                op, 2, d0, d1, q0, d18, d19, q9, \
-                q3, q8, q10, q11, d5
-    bilinear_deinterleave_dst mask_fmt, op, 2, d0, d1, q0
-    bilinear_store_&dst_fmt 2, q2, q3
-.endm
-
-.macro bilinear_interpolate_four_pixels src_fmt, mask_fmt, dst_fmt, op
-    bilinear_load_and_vertical_interpolate_four_&src_fmt \
-                q1, q11, d0, d1, d20, d21, d22, d23 \
-                q3, q9,  d4, d5, d16, d17, d18, d19
-    pld       [TMP1, PF_OFFS]
-    sub       TMP1, TMP1, STRIDE
-    vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q0, d2, d30
-    vmlal.u16 q0, d3, d30
-    vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q10, d22, d31
-    vmlal.u16 q10, d23, d31
-    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vshll.u16 q2, d6, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q2, d6, d30
-    vmlal.u16 q2, d7, d30
-    vshll.u16 q8, d18, #BILINEAR_INTERPOLATION_BITS
-    bilinear_load_mask mask_fmt, 4, d22
-    bilinear_load_dst dst_fmt, op, 4, d2, d3, q1
-    pld       [TMP1, PF_OFFS]
-    vmlsl.u16 q8, d18, d31
-    vmlal.u16 q8, d19, d31
-    vadd.u16  q12, q12, q13
-    vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32 d5, q8, #(2 * BILINEAR_INTERPOLATION_BITS)
-    bilinear_duplicate_mask mask_fmt, 4, d22
-    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vmovn.u16 d0, q0
-    vmovn.u16 d1, q2
-    vadd.u16  q12, q12, q13
-    bilinear_interleave_src_dst \
-                mask_fmt, op, 4, d0, d1, q0, d2, d3, q1
-    bilinear_apply_mask_to_src \
-                mask_fmt, 4, d0, d1, q0, d22, \
-                q3, q8, q9, q10
-    bilinear_combine \
-                op, 4, d0, d1, q0, d2, d3, q1, \
-                q3, q8, q9, q10, d23
-    bilinear_deinterleave_dst mask_fmt, op, 4, d0, d1, q0
-    bilinear_store_&dst_fmt 4, q2, q3
-.endm
-
-.set BILINEAR_FLAG_USE_MASK,		1
-.set BILINEAR_FLAG_USE_ALL_NEON_REGS,	2
-
-/*
- * Main template macro for generating NEON optimized bilinear scanline functions.
- *
- * Bilinear scanline generator macro take folling arguments:
- *  fname			- name of the function to generate
- *  src_fmt			- source color format (8888 or 0565)
- *  dst_fmt			- destination color format (8888 or 0565)
- *  src/dst_bpp_shift		- (1 << bpp_shift) is the size of src/dst pixel in bytes
- *  process_last_pixel		- code block that interpolate one pixel and does not
- *				  update horizontal weight
- *  process_two_pixels		- code block that interpolate two pixels and update
- *				  horizontal weight
- *  process_four_pixels		- code block that interpolate four pixels and update
- *				  horizontal weight
- *  process_pixblock_head	- head part of middle loop
- *  process_pixblock_tail	- tail part of middle loop
- *  process_pixblock_tail_head	- tail_head of middle loop
- *  pixblock_size		- number of pixels processed in a single middle loop
- *  prefetch_distance		- prefetch in the source image by that many pixels ahead
- */
-
-.macro generate_bilinear_scanline_func \
-	fname, \
-	src_fmt, dst_fmt, src_bpp_shift, dst_bpp_shift, \
-	bilinear_process_last_pixel, \
-	bilinear_process_two_pixels, \
-	bilinear_process_four_pixels, \
-	bilinear_process_pixblock_head, \
-	bilinear_process_pixblock_tail, \
-	bilinear_process_pixblock_tail_head, \
-	pixblock_size, \
-	prefetch_distance, \
-	flags
-
-pixman_asm_function fname
-.if pixblock_size == 8
-.elseif pixblock_size == 4
-.else
-    .error unsupported pixblock size
-.endif
-
-.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0
-    OUT       .req    r0
-    TOP       .req    r1
-    BOTTOM    .req    r2
-    WT        .req    r3
-    WB        .req    r4
-    X         .req    r5
-    UX        .req    r6
-    WIDTH     .req    ip
-    TMP1      .req    r3
-    TMP2      .req    r4
-    PF_OFFS   .req    r7
-    TMP3      .req    r8
-    TMP4      .req    r9
-    STRIDE    .req    r2
-
-    mov		ip, sp
-    push	{r4, r5, r6, r7, r8, r9}
-    mov		PF_OFFS, #prefetch_distance
-    ldmia	ip, {WB, X, UX, WIDTH}
-.else
-    OUT       .req      r0
-    MASK      .req      r1
-    TOP       .req      r2
-    BOTTOM    .req      r3
-    WT        .req      r4
-    WB        .req      r5
-    X         .req      r6
-    UX        .req      r7
-    WIDTH     .req      ip
-    TMP1      .req      r4
-    TMP2      .req      r5
-    PF_OFFS   .req      r8
-    TMP3      .req      r9
-    TMP4      .req      r10
-    STRIDE    .req      r3
-
-    .set prefetch_offset, prefetch_distance
-
-    mov       ip, sp
-    push      {r4, r5, r6, r7, r8, r9, r10, ip}
-    mov       PF_OFFS, #prefetch_distance
-    ldmia     ip, {WT, WB, X, UX, WIDTH}
-.endif
-
-    mul       PF_OFFS, PF_OFFS, UX
-
-.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
-    vpush     {d8-d15}
-.endif
-
-    sub	      STRIDE, BOTTOM, TOP
-    .unreq    BOTTOM
-
-    cmp       WIDTH, #0
-    ble       3f
-
-    vdup.u16  q12, X
-    vdup.u16  q13, UX
-    vdup.u8   d28, WT
-    vdup.u8   d29, WB
-    vadd.u16  d25, d25, d26
-
-    /* ensure good destination alignment  */
-    cmp       WIDTH, #1
-    blt       0f
-    tst       OUT, #(1 << dst_bpp_shift)
-    beq       0f
-    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vadd.u16  q12, q12, q13
-    bilinear_process_last_pixel
-    sub       WIDTH, WIDTH, #1
-0:
-    vadd.u16  q13, q13, q13
-    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vadd.u16  q12, q12, q13
-
-    cmp       WIDTH, #2
-    blt       0f
-    tst       OUT, #(1 << (dst_bpp_shift + 1))
-    beq       0f
-    bilinear_process_two_pixels
-    sub       WIDTH, WIDTH, #2
-0:
-.if pixblock_size == 8
-    cmp       WIDTH, #4
-    blt       0f
-    tst       OUT, #(1 << (dst_bpp_shift + 2))
-    beq       0f
-    bilinear_process_four_pixels
-    sub       WIDTH, WIDTH, #4
-0:
-.endif
-    subs      WIDTH, WIDTH, #pixblock_size
-    blt       1f
-    mov       PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
-    bilinear_process_pixblock_head
-    subs      WIDTH, WIDTH, #pixblock_size
-    blt       5f
-0:
-    bilinear_process_pixblock_tail_head
-    subs      WIDTH, WIDTH, #pixblock_size
-    bge       0b
-5:
-    bilinear_process_pixblock_tail
-1:
-.if pixblock_size == 8
-    tst       WIDTH, #4
-    beq       2f
-    bilinear_process_four_pixels
-2:
-.endif
-    /* handle the remaining trailing pixels */
-    tst       WIDTH, #2
-    beq       2f
-    bilinear_process_two_pixels
-2:
-    tst       WIDTH, #1
-    beq       3f
-    bilinear_process_last_pixel
-3:
-.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
-    vpop      {d8-d15}
-.endif
-
-.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0
-    pop       {r4, r5, r6, r7, r8, r9}
-.else
-    pop       {r4, r5, r6, r7, r8, r9, r10, ip}
-.endif
-    bx        lr
-
-    .unreq    OUT
-    .unreq    TOP
-    .unreq    WT
-    .unreq    WB
-    .unreq    X
-    .unreq    UX
-    .unreq    WIDTH
-    .unreq    TMP1
-    .unreq    TMP2
-    .unreq    PF_OFFS
-    .unreq    TMP3
-    .unreq    TMP4
-    .unreq    STRIDE
-.if ((flags) & BILINEAR_FLAG_USE_MASK) != 0
-    .unreq    MASK
-.endif
-
-.endfunc
-
-.endm
-
-/* src_8888_8_8888 */
-.macro bilinear_src_8888_8_8888_process_last_pixel
-    bilinear_interpolate_last_pixel 8888, 8, 8888, src
-.endm
-
-.macro bilinear_src_8888_8_8888_process_two_pixels
-    bilinear_interpolate_two_pixels 8888, 8, 8888, src
-.endm
-
-.macro bilinear_src_8888_8_8888_process_four_pixels
-    bilinear_interpolate_four_pixels 8888, 8, 8888, src
-.endm
-
-.macro bilinear_src_8888_8_8888_process_pixblock_head
-    bilinear_src_8888_8_8888_process_four_pixels
-.endm
-
-.macro bilinear_src_8888_8_8888_process_pixblock_tail
-.endm
-
-.macro bilinear_src_8888_8_8888_process_pixblock_tail_head
-    bilinear_src_8888_8_8888_process_pixblock_tail
-    bilinear_src_8888_8_8888_process_pixblock_head
-.endm
-
-/* src_8888_8_0565 */
-.macro bilinear_src_8888_8_0565_process_last_pixel
-    bilinear_interpolate_last_pixel 8888, 8, 0565, src
-.endm
-
-.macro bilinear_src_8888_8_0565_process_two_pixels
-    bilinear_interpolate_two_pixels 8888, 8, 0565, src
-.endm
-
-.macro bilinear_src_8888_8_0565_process_four_pixels
-    bilinear_interpolate_four_pixels 8888, 8, 0565, src
-.endm
-
-.macro bilinear_src_8888_8_0565_process_pixblock_head
-    bilinear_src_8888_8_0565_process_four_pixels
-.endm
-
-.macro bilinear_src_8888_8_0565_process_pixblock_tail
-.endm
-
-.macro bilinear_src_8888_8_0565_process_pixblock_tail_head
-    bilinear_src_8888_8_0565_process_pixblock_tail
-    bilinear_src_8888_8_0565_process_pixblock_head
-.endm
-
-/* src_0565_8_x888 */
-.macro bilinear_src_0565_8_x888_process_last_pixel
-    bilinear_interpolate_last_pixel 0565, 8, 8888, src
-.endm
-
-.macro bilinear_src_0565_8_x888_process_two_pixels
-    bilinear_interpolate_two_pixels 0565, 8, 8888, src
-.endm
-
-.macro bilinear_src_0565_8_x888_process_four_pixels
-    bilinear_interpolate_four_pixels 0565, 8, 8888, src
-.endm
-
-.macro bilinear_src_0565_8_x888_process_pixblock_head
-    bilinear_src_0565_8_x888_process_four_pixels
-.endm
-
-.macro bilinear_src_0565_8_x888_process_pixblock_tail
-.endm
-
-.macro bilinear_src_0565_8_x888_process_pixblock_tail_head
-    bilinear_src_0565_8_x888_process_pixblock_tail
-    bilinear_src_0565_8_x888_process_pixblock_head
-.endm
-
-/* src_0565_8_0565 */
-.macro bilinear_src_0565_8_0565_process_last_pixel
-    bilinear_interpolate_last_pixel 0565, 8, 0565, src
-.endm
-
-.macro bilinear_src_0565_8_0565_process_two_pixels
-    bilinear_interpolate_two_pixels 0565, 8, 0565, src
-.endm
-
-.macro bilinear_src_0565_8_0565_process_four_pixels
-    bilinear_interpolate_four_pixels 0565, 8, 0565, src
-.endm
-
-.macro bilinear_src_0565_8_0565_process_pixblock_head
-    bilinear_src_0565_8_0565_process_four_pixels
-.endm
-
-.macro bilinear_src_0565_8_0565_process_pixblock_tail
-.endm
-
-.macro bilinear_src_0565_8_0565_process_pixblock_tail_head
-    bilinear_src_0565_8_0565_process_pixblock_tail
-    bilinear_src_0565_8_0565_process_pixblock_head
-.endm
-
-/* over_8888_8888 */
-.macro bilinear_over_8888_8888_process_last_pixel
-    bilinear_interpolate_last_pixel 8888, x, 8888, over
-.endm
-
-.macro bilinear_over_8888_8888_process_two_pixels
-    bilinear_interpolate_two_pixels 8888, x, 8888, over
-.endm
-
-.macro bilinear_over_8888_8888_process_four_pixels
-    bilinear_interpolate_four_pixels 8888, x, 8888, over
-.endm
-
-.macro bilinear_over_8888_8888_process_pixblock_head
-    mov         TMP1, X, asr #16
-    add         X, X, UX
-    add         TMP1, TOP, TMP1, asl #2
-    mov         TMP2, X, asr #16
-    add         X, X, UX
-    add         TMP2, TOP, TMP2, asl #2
-
-    vld1.32     {d22}, [TMP1], STRIDE
-    vld1.32     {d23}, [TMP1]
-    mov         TMP3, X, asr #16
-    add         X, X, UX
-    add         TMP3, TOP, TMP3, asl #2
-    vmull.u8    q8, d22, d28
-    vmlal.u8    q8, d23, d29
-
-    vld1.32     {d22}, [TMP2], STRIDE
-    vld1.32     {d23}, [TMP2]
-    mov         TMP4, X, asr #16
-    add         X, X, UX
-    add         TMP4, TOP, TMP4, asl #2
-    vmull.u8    q9, d22, d28
-    vmlal.u8    q9, d23, d29
-
-    vld1.32     {d22}, [TMP3], STRIDE
-    vld1.32     {d23}, [TMP3]
-    vmull.u8    q10, d22, d28
-    vmlal.u8    q10, d23, d29
-
-    vshll.u16   q0, d16, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16   q0, d16, d30
-    vmlal.u16   q0, d17, d30
-
-    pld         [TMP4, PF_OFFS]
-    vld1.32     {d16}, [TMP4], STRIDE
-    vld1.32     {d17}, [TMP4]
-    pld         [TMP4, PF_OFFS]
-    vmull.u8    q11, d16, d28
-    vmlal.u8    q11, d17, d29
-
-    vshll.u16   q1, d18, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16   q1, d18, d31
-    vmlal.u16   q1, d19, d31
-    vshr.u16    q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vadd.u16    q12, q12, q13
-.endm
-
-.macro bilinear_over_8888_8888_process_pixblock_tail
-    vshll.u16   q2, d20, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16   q2, d20, d30
-    vmlal.u16   q2, d21, d30
-    vshll.u16   q3, d22, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16   q3, d22, d31
-    vmlal.u16   q3, d23, d31
-    vshrn.u32   d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32   d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vld1.32     {d2, d3}, [OUT, :128]
-    pld         [OUT, #(prefetch_offset * 4)]
-    vshrn.u32   d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshr.u16    q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32   d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vmovn.u16   d6, q0
-    vmovn.u16   d7, q2
-    vuzp.8      d6, d7
-    vuzp.8      d2, d3
-    vuzp.8      d6, d7
-    vuzp.8      d2, d3
-    vdup.32     d4, d7[1]
-    vmvn.8      d4, d4
-    vmull.u8    q11, d2, d4
-    vmull.u8    q2, d3, d4
-    vrshr.u16   q1, q11, #8
-    vrshr.u16   q10, q2, #8
-    vraddhn.u16 d2, q1, q11
-    vraddhn.u16 d3, q10, q2
-    vqadd.u8    q3, q1, q3
-    vuzp.8      d6, d7
-    vuzp.8      d6, d7
-    vadd.u16    q12, q12, q13
-    vst1.32     {d6, d7}, [OUT, :128]!
-.endm
-
-.macro bilinear_over_8888_8888_process_pixblock_tail_head
-                                            vshll.u16   q2, d20, #BILINEAR_INTERPOLATION_BITS
-    mov         TMP1, X, asr #16
-    add         X, X, UX
-    add         TMP1, TOP, TMP1, asl #2
-                                            vmlsl.u16   q2, d20, d30
-    mov         TMP2, X, asr #16
-    add         X, X, UX
-    add         TMP2, TOP, TMP2, asl #2
-                                            vmlal.u16   q2, d21, d30
-                                            vshll.u16   q3, d22, #BILINEAR_INTERPOLATION_BITS
-    vld1.32     {d20}, [TMP1], STRIDE
-                                            vmlsl.u16   q3, d22, d31
-                                            vmlal.u16   q3, d23, d31
-    vld1.32     {d21}, [TMP1]
-    vmull.u8    q8, d20, d28
-    vmlal.u8    q8, d21, d29
-                                            vshrn.u32   d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
-                                            vshrn.u32   d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
-                                            vld1.32     {d2, d3}, [OUT, :128]
-                                            pld         [OUT, PF_OFFS]
-                                            vshrn.u32   d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS)
-                                            vshr.u16    q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vld1.32     {d22}, [TMP2], STRIDE
-                                            vshrn.u32   d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS)
-                                            vmovn.u16   d6, q0
-    vld1.32     {d23}, [TMP2]
-    vmull.u8    q9, d22, d28
-    mov         TMP3, X, asr #16
-    add         X, X, UX
-    add         TMP3, TOP, TMP3, asl #2
-    mov         TMP4, X, asr #16
-    add         X, X, UX
-    add         TMP4, TOP, TMP4, asl #2
-    vmlal.u8    q9, d23, d29
-                                            vmovn.u16   d7, q2
-    vld1.32     {d22}, [TMP3], STRIDE
-                                            vuzp.8      d6, d7
-                                            vuzp.8      d2, d3
-                                            vuzp.8      d6, d7
-                                            vuzp.8      d2, d3
-                                            vdup.32     d4, d7[1]
-    vld1.32     {d23}, [TMP3]
-                                            vmvn.8      d4, d4
-    vmull.u8    q10, d22, d28
-    vmlal.u8    q10, d23, d29
-                                            vmull.u8    q11, d2, d4
-                                            vmull.u8    q2, d3, d4
-    vshll.u16   q0, d16, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16   q0, d16, d30
-                                            vrshr.u16   q1, q11, #8
-    vmlal.u16   q0, d17, d30
-                                            vrshr.u16   q8, q2, #8
-                                            vraddhn.u16 d2, q1, q11
-                                            vraddhn.u16 d3, q8, q2
-    pld         [TMP4, PF_OFFS]
-    vld1.32     {d16}, [TMP4], STRIDE
-                                            vqadd.u8    q3, q1, q3
-    vld1.32     {d17}, [TMP4]
-    pld         [TMP4, PF_OFFS]
-    vmull.u8    q11, d16, d28
-    vmlal.u8    q11, d17, d29
-                                            vuzp.8      d6, d7
-    vshll.u16   q1, d18, #BILINEAR_INTERPOLATION_BITS
-                                            vuzp.8      d6, d7
-    vmlsl.u16   q1, d18, d31
-                                            vadd.u16    q12, q12, q13
-    vmlal.u16   q1, d19, d31
-    vshr.u16    q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vadd.u16    q12, q12, q13
-                                            vst1.32     {d6, d7}, [OUT, :128]!
-.endm
-
-/* over_8888_8_8888 */
-.macro bilinear_over_8888_8_8888_process_last_pixel
-    bilinear_interpolate_last_pixel 8888, 8, 8888, over
-.endm
-
-.macro bilinear_over_8888_8_8888_process_two_pixels
-    bilinear_interpolate_two_pixels 8888, 8, 8888, over
-.endm
-
-.macro bilinear_over_8888_8_8888_process_four_pixels
-    bilinear_interpolate_four_pixels 8888, 8, 8888, over
-.endm
-
-.macro bilinear_over_8888_8_8888_process_pixblock_head
-    mov         TMP1, X, asr #16
-    add         X, X, UX
-    add         TMP1, TOP, TMP1, asl #2
-    vld1.32     {d0}, [TMP1], STRIDE
-    mov         TMP2, X, asr #16
-    add         X, X, UX
-    add         TMP2, TOP, TMP2, asl #2
-    vld1.32     {d1}, [TMP1]
-    mov         TMP3, X, asr #16
-    add         X, X, UX
-    add         TMP3, TOP, TMP3, asl #2
-    vld1.32     {d2}, [TMP2], STRIDE
-    mov         TMP4, X, asr #16
-    add         X, X, UX
-    add         TMP4, TOP, TMP4, asl #2
-    vld1.32     {d3}, [TMP2]
-    vmull.u8    q2, d0, d28
-    vmull.u8    q3, d2, d28
-    vmlal.u8    q2, d1, d29
-    vmlal.u8    q3, d3, d29
-    vshll.u16   q0, d4, #BILINEAR_INTERPOLATION_BITS
-    vshll.u16   q1, d6, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16   q0, d4, d30
-    vmlsl.u16   q1, d6, d31
-    vmlal.u16   q0, d5, d30
-    vmlal.u16   q1, d7, d31
-    vshrn.u32   d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32   d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vld1.32     {d2}, [TMP3], STRIDE
-    vld1.32     {d3}, [TMP3]
-    pld         [TMP4, PF_OFFS]
-    vld1.32     {d4}, [TMP4], STRIDE
-    vld1.32     {d5}, [TMP4]
-    pld         [TMP4, PF_OFFS]
-    vmull.u8    q3, d2, d28
-    vmlal.u8    q3, d3, d29
-    vmull.u8    q1, d4, d28
-    vmlal.u8    q1, d5, d29
-    vshr.u16    q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vld1.32     {d22[0]}, [MASK]!
-    pld         [MASK, #prefetch_offset]
-    vadd.u16    q12, q12, q13
-    vmovn.u16   d16, q0
-.endm
-
-.macro bilinear_over_8888_8_8888_process_pixblock_tail
-    vshll.u16   q9, d6, #BILINEAR_INTERPOLATION_BITS
-    vshll.u16   q10, d2, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16   q9, d6, d30
-    vmlsl.u16   q10, d2, d31
-    vmlal.u16   q9, d7, d30
-    vmlal.u16   q10, d3, d31
-    vshr.u16    q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vadd.u16    q12, q12, q13
-    vdup.32     d22, d22[0]
-    vshrn.u32   d18, q9, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32   d19, q10, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vmovn.u16   d17, q9
-    vld1.32     {d18, d19}, [OUT, :128]
-    pld         [OUT, PF_OFFS]
-    vuzp.8      d16, d17
-    vuzp.8      d18, d19
-    vuzp.8      d16, d17
-    vuzp.8      d18, d19
-    vmull.u8    q10, d16, d22
-    vmull.u8    q11, d17, d22
-    vrsra.u16   q10, q10, #8
-    vrsra.u16   q11, q11, #8
-    vrshrn.u16  d16, q10, #8
-    vrshrn.u16  d17, q11, #8
-    vdup.32     d22, d17[1]
-    vmvn.8      d22, d22
-    vmull.u8    q10, d18, d22
-    vmull.u8    q11, d19, d22
-    vrshr.u16   q9, q10, #8
-    vrshr.u16   q0, q11, #8
-    vraddhn.u16 d18, q9, q10
-    vraddhn.u16 d19, q0, q11
-    vqadd.u8    q9, q8, q9
-    vuzp.8      d18, d19
-    vuzp.8      d18, d19
-    vst1.32     {d18, d19}, [OUT, :128]!
-.endm
-
-.macro bilinear_over_8888_8_8888_process_pixblock_tail_head
-                                            vshll.u16   q9, d6, #BILINEAR_INTERPOLATION_BITS
-    mov         TMP1, X, asr #16
-    add         X, X, UX
-    add         TMP1, TOP, TMP1, asl #2
-                                            vshll.u16   q10, d2, #BILINEAR_INTERPOLATION_BITS
-    vld1.32     {d0}, [TMP1], STRIDE
-    mov         TMP2, X, asr #16
-    add         X, X, UX
-    add         TMP2, TOP, TMP2, asl #2
-                                            vmlsl.u16   q9, d6, d30
-                                            vmlsl.u16   q10, d2, d31
-    vld1.32     {d1}, [TMP1]
-    mov         TMP3, X, asr #16
-    add         X, X, UX
-    add         TMP3, TOP, TMP3, asl #2
-                                            vmlal.u16   q9, d7, d30
-                                            vmlal.u16   q10, d3, d31
-    vld1.32     {d2}, [TMP2], STRIDE
-    mov         TMP4, X, asr #16
-    add         X, X, UX
-    add         TMP4, TOP, TMP4, asl #2
-                                            vshr.u16    q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-                                            vadd.u16    q12, q12, q13
-    vld1.32     {d3}, [TMP2]
-                                            vdup.32     d22, d22[0]
-                                            vshrn.u32   d18, q9, #(2 * BILINEAR_INTERPOLATION_BITS)
-                                            vshrn.u32   d19, q10, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vmull.u8    q2, d0, d28
-    vmull.u8    q3, d2, d28
-                                            vmovn.u16   d17, q9
-                                            vld1.32     {d18, d19}, [OUT, :128]
-                                            pld         [OUT, #(prefetch_offset * 4)]
-    vmlal.u8    q2, d1, d29
-    vmlal.u8    q3, d3, d29
-                                            vuzp.8      d16, d17
-                                            vuzp.8      d18, d19
-    vshll.u16   q0, d4, #BILINEAR_INTERPOLATION_BITS
-    vshll.u16   q1, d6, #BILINEAR_INTERPOLATION_BITS
-                                            vuzp.8      d16, d17
-                                            vuzp.8      d18, d19
-    vmlsl.u16   q0, d4, d30
-    vmlsl.u16   q1, d6, d31
-                                            vmull.u8    q10, d16, d22
-                                            vmull.u8    q11, d17, d22
-    vmlal.u16   q0, d5, d30
-    vmlal.u16   q1, d7, d31
-                                            vrsra.u16   q10, q10, #8
-                                            vrsra.u16   q11, q11, #8
-    vshrn.u32   d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32   d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
-                                            vrshrn.u16  d16, q10, #8
-                                            vrshrn.u16  d17, q11, #8
-    vld1.32     {d2}, [TMP3], STRIDE
-                                            vdup.32     d22, d17[1]
-    vld1.32     {d3}, [TMP3]
-                                            vmvn.8      d22, d22
-    pld         [TMP4, PF_OFFS]
-    vld1.32     {d4}, [TMP4], STRIDE
-                                            vmull.u8    q10, d18, d22
-                                            vmull.u8    q11, d19, d22
-    vld1.32     {d5}, [TMP4]
-    pld         [TMP4, PF_OFFS]
-    vmull.u8    q3, d2, d28
-                                            vrshr.u16   q9, q10, #8
-                                            vrshr.u16   q15, q11, #8
-    vmlal.u8    q3, d3, d29
-    vmull.u8    q1, d4, d28
-                                            vraddhn.u16 d18, q9, q10
-                                            vraddhn.u16 d19, q15, q11
-    vmlal.u8    q1, d5, d29
-    vshr.u16    q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-                                            vqadd.u8    q9, q8, q9
-    vld1.32     {d22[0]}, [MASK]!
-                                            vuzp.8      d18, d19
-    vadd.u16    q12, q12, q13
-                                            vuzp.8      d18, d19
-    vmovn.u16   d16, q0
-                                            vst1.32     {d18, d19}, [OUT, :128]!
-.endm
-
-/* add_8888_8888 */
-.macro bilinear_add_8888_8888_process_last_pixel
-    bilinear_interpolate_last_pixel 8888, x, 8888, add
-.endm
-
-.macro bilinear_add_8888_8888_process_two_pixels
-    bilinear_interpolate_two_pixels 8888, x, 8888, add
-.endm
-
-.macro bilinear_add_8888_8888_process_four_pixels
-    bilinear_interpolate_four_pixels 8888, x, 8888, add
-.endm
-
-.macro bilinear_add_8888_8888_process_pixblock_head
-    bilinear_add_8888_8888_process_four_pixels
-.endm
-
-.macro bilinear_add_8888_8888_process_pixblock_tail
-.endm
-
-.macro bilinear_add_8888_8888_process_pixblock_tail_head
-    bilinear_add_8888_8888_process_pixblock_tail
-    bilinear_add_8888_8888_process_pixblock_head
-.endm
-
-/* add_8888_8_8888 */
-.macro bilinear_add_8888_8_8888_process_last_pixel
-    bilinear_interpolate_last_pixel 8888, 8, 8888, add
-.endm
-
-.macro bilinear_add_8888_8_8888_process_two_pixels
-    bilinear_interpolate_two_pixels 8888, 8, 8888, add
-.endm
-
-.macro bilinear_add_8888_8_8888_process_four_pixels
-    bilinear_interpolate_four_pixels 8888, 8, 8888, add
-.endm
-
-.macro bilinear_add_8888_8_8888_process_pixblock_head
-    bilinear_add_8888_8_8888_process_four_pixels
-.endm
-
-.macro bilinear_add_8888_8_8888_process_pixblock_tail
-.endm
-
-.macro bilinear_add_8888_8_8888_process_pixblock_tail_head
-    bilinear_add_8888_8_8888_process_pixblock_tail
-    bilinear_add_8888_8_8888_process_pixblock_head
-.endm
-
-
-/* Bilinear scanline functions */
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_neon, \
-    8888, 8888, 2, 2, \
-    bilinear_src_8888_8_8888_process_last_pixel, \
-    bilinear_src_8888_8_8888_process_two_pixels, \
-    bilinear_src_8888_8_8888_process_four_pixels, \
-    bilinear_src_8888_8_8888_process_pixblock_head, \
-    bilinear_src_8888_8_8888_process_pixblock_tail, \
-    bilinear_src_8888_8_8888_process_pixblock_tail_head, \
-    4, 28, BILINEAR_FLAG_USE_MASK
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_neon, \
-    8888, 0565, 2, 1, \
-    bilinear_src_8888_8_0565_process_last_pixel, \
-    bilinear_src_8888_8_0565_process_two_pixels, \
-    bilinear_src_8888_8_0565_process_four_pixels, \
-    bilinear_src_8888_8_0565_process_pixblock_head, \
-    bilinear_src_8888_8_0565_process_pixblock_tail, \
-    bilinear_src_8888_8_0565_process_pixblock_tail_head, \
-    4, 28, BILINEAR_FLAG_USE_MASK
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_neon, \
-    0565, 8888, 1, 2, \
-    bilinear_src_0565_8_x888_process_last_pixel, \
-    bilinear_src_0565_8_x888_process_two_pixels, \
-    bilinear_src_0565_8_x888_process_four_pixels, \
-    bilinear_src_0565_8_x888_process_pixblock_head, \
-    bilinear_src_0565_8_x888_process_pixblock_tail, \
-    bilinear_src_0565_8_x888_process_pixblock_tail_head, \
-    4, 28, BILINEAR_FLAG_USE_MASK
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_neon, \
-    0565, 0565, 1, 1, \
-    bilinear_src_0565_8_0565_process_last_pixel, \
-    bilinear_src_0565_8_0565_process_two_pixels, \
-    bilinear_src_0565_8_0565_process_four_pixels, \
-    bilinear_src_0565_8_0565_process_pixblock_head, \
-    bilinear_src_0565_8_0565_process_pixblock_tail, \
-    bilinear_src_0565_8_0565_process_pixblock_tail_head, \
-    4, 28, BILINEAR_FLAG_USE_MASK
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_neon, \
-    8888, 8888, 2, 2, \
-    bilinear_over_8888_8888_process_last_pixel, \
-    bilinear_over_8888_8888_process_two_pixels, \
-    bilinear_over_8888_8888_process_four_pixels, \
-    bilinear_over_8888_8888_process_pixblock_head, \
-    bilinear_over_8888_8888_process_pixblock_tail, \
-    bilinear_over_8888_8888_process_pixblock_tail_head, \
-    4, 28, 0
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_neon, \
-    8888, 8888, 2, 2, \
-    bilinear_over_8888_8_8888_process_last_pixel, \
-    bilinear_over_8888_8_8888_process_two_pixels, \
-    bilinear_over_8888_8_8888_process_four_pixels, \
-    bilinear_over_8888_8_8888_process_pixblock_head, \
-    bilinear_over_8888_8_8888_process_pixblock_tail, \
-    bilinear_over_8888_8_8888_process_pixblock_tail_head, \
-    4, 28, BILINEAR_FLAG_USE_MASK
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_neon, \
-    8888, 8888, 2, 2, \
-    bilinear_add_8888_8888_process_last_pixel, \
-    bilinear_add_8888_8888_process_two_pixels, \
-    bilinear_add_8888_8888_process_four_pixels, \
-    bilinear_add_8888_8888_process_pixblock_head, \
-    bilinear_add_8888_8888_process_pixblock_tail, \
-    bilinear_add_8888_8888_process_pixblock_tail_head, \
-    4, 28, 0
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_neon, \
-    8888, 8888, 2, 2, \
-    bilinear_add_8888_8_8888_process_last_pixel, \
-    bilinear_add_8888_8_8888_process_two_pixels, \
-    bilinear_add_8888_8_8888_process_four_pixels, \
-    bilinear_add_8888_8_8888_process_pixblock_head, \
-    bilinear_add_8888_8_8888_process_pixblock_tail, \
-    bilinear_add_8888_8_8888_process_pixblock_tail_head, \
-    4, 28, BILINEAR_FLAG_USE_MASK
diff --git a/vendor/pixman/pixman/pixman-arm-neon-asm.S b/vendor/pixman/pixman/pixman-arm-neon-asm.S
deleted file mode 100644
index 7e949a38f..000000000
--- a/vendor/pixman/pixman/pixman-arm-neon-asm.S
+++ /dev/null
@@ -1,3627 +0,0 @@
-/*
- * Copyright © 2009 Nokia Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author:  Siarhei Siamashka (siarhei.siamashka@nokia.com)
- */
-
-/*
- * This file contains implementations of NEON optimized pixel processing
- * functions. There is no full and detailed tutorial, but some functions
- * (those which are exposing some new or interesting features) are
- * extensively commented and can be used as examples.
- *
- * You may want to have a look at the comments for following functions:
- *  - pixman_composite_over_8888_0565_asm_neon
- *  - pixman_composite_over_n_8_0565_asm_neon
- */
-
-/* Prevent the stack from becoming executable for no reason... */
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
-
-    .text
-    .fpu neon
-    .arch armv7a
-    .object_arch armv4
-    .eabi_attribute 10, 0 /* suppress Tag_FP_arch */
-    .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */
-    .arm
-    .altmacro
-    .p2align 2
-
-#include "pixman-private.h"
-#include "pixman-arm-asm.h"
-#include "pixman-arm-neon-asm.h"
-
-/* Global configuration options and preferences */
-
-/*
- * The code can optionally make use of unaligned memory accesses to improve
- * performance of handling leading/trailing pixels for each scanline.
- * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for
- * example in linux if unaligned memory accesses are not configured to
- * generate.exceptions.
- */
-.set RESPECT_STRICT_ALIGNMENT, 1
-
-/*
- * Set default prefetch type. There is a choice between the following options:
- *
- * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work
- * as NOP to workaround some HW bugs or for whatever other reason)
- *
- * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where
- * advanced prefetch intruduces heavy overhead)
- *
- * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8
- * which can run ARM and NEON instructions simultaneously so that extra ARM
- * instructions do not add (many) extra cycles, but improve prefetch efficiency)
- *
- * Note: some types of function can't support advanced prefetch and fallback
- *       to simple one (those which handle 24bpp pixels)
- */
-.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED
-
-/* Prefetch distance in pixels for simple prefetch */
-.set PREFETCH_DISTANCE_SIMPLE, 64
-
-/*
- * Implementation of pixman_composite_over_8888_0565_asm_neon
- *
- * This function takes a8r8g8b8 source buffer, r5g6b5 destination buffer and
- * performs OVER compositing operation. Function fast_composite_over_8888_0565
- * from pixman-fast-path.c does the same in C and can be used as a reference.
- *
- * First we need to have some NEON assembly code which can do the actual
- * operation on the pixels and provide it to the template macro.
- *
- * Template macro quite conveniently takes care of emitting all the necessary
- * code for memory reading and writing (including quite tricky cases of
- * handling unaligned leading/trailing pixels), so we only need to deal with
- * the data in NEON registers.
- *
- * NEON registers allocation in general is recommented to be the following:
- * d0,  d1,  d2,  d3  - contain loaded source pixel data
- * d4,  d5,  d6,  d7  - contain loaded destination pixels (if they are needed)
- * d24, d25, d26, d27 - contain loading mask pixel data (if mask is used)
- * d28, d29, d30, d31 - place for storing the result (destination pixels)
- *
- * As can be seen above, four 64-bit NEON registers are used for keeping
- * intermediate pixel data and up to 8 pixels can be processed in one step
- * for 32bpp formats (16 pixels for 16bpp, 32 pixels for 8bpp).
- *
- * This particular function uses the following registers allocation:
- * d0,  d1,  d2,  d3  - contain loaded source pixel data
- * d4,  d5            - contain loaded destination pixels (they are needed)
- * d28, d29           - place for storing the result (destination pixels)
- */
-
-/*
- * Step one. We need to have some code to do some arithmetics on pixel data.
- * This is implemented as a pair of macros: '*_head' and '*_tail'. When used
- * back-to-back, they take pixel data from {d0, d1, d2, d3} and {d4, d5},
- * perform all the needed calculations and write the result to {d28, d29}.
- * The rationale for having two macros and not just one will be explained
- * later. In practice, any single monolitic function which does the work can
- * be split into two parts in any arbitrary way without affecting correctness.
- *
- * There is one special trick here too. Common template macro can optionally
- * make our life a bit easier by doing R, G, B, A color components
- * deinterleaving for 32bpp pixel formats (and this feature is used in
- * 'pixman_composite_over_8888_0565_asm_neon' function). So it means that
- * instead of having 8 packed pixels in {d0, d1, d2, d3} registers, we
- * actually use d0 register for blue channel (a vector of eight 8-bit
- * values), d1 register for green, d2 for red and d3 for alpha. This
- * simple conversion can be also done with a few NEON instructions:
- *
- * Packed to planar conversion:
- *  vuzp.8 d0, d1
- *  vuzp.8 d2, d3
- *  vuzp.8 d1, d3
- *  vuzp.8 d0, d2
- *
- * Planar to packed conversion:
- *  vzip.8 d0, d2
- *  vzip.8 d1, d3
- *  vzip.8 d2, d3
- *  vzip.8 d0, d1
- *
- * But pixel can be loaded directly in planar format using VLD4.8 NEON
- * instruction. It is 1 cycle slower than VLD1.32, so this is not always
- * desirable, that's why deinterleaving is optional.
- *
- * But anyway, here is the code:
- */
-.macro pixman_composite_over_8888_0565_process_pixblock_head
-    /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
-       and put data into d6 - red, d7 - green, d30 - blue */
-    vshrn.u16   d6, q2, #8
-    vshrn.u16   d7, q2, #3
-    vsli.u16    q2, q2, #5
-    vsri.u8     d6, d6, #5
-    vmvn.8      d3, d3      /* invert source alpha */
-    vsri.u8     d7, d7, #6
-    vshrn.u16   d30, q2, #2
-    /* now do alpha blending, storing results in 8-bit planar format
-       into d16 - red, d19 - green, d18 - blue */
-    vmull.u8    q10, d3, d6
-    vmull.u8    q11, d3, d7
-    vmull.u8    q12, d3, d30
-    vrshr.u16   q13, q10, #8
-    vrshr.u16   q3, q11, #8
-    vrshr.u16   q15, q12, #8
-    vraddhn.u16 d20, q10, q13
-    vraddhn.u16 d23, q11, q3
-    vraddhn.u16 d22, q12, q15
-.endm
-
-.macro pixman_composite_over_8888_0565_process_pixblock_tail
-    /* ... continue alpha blending */
-    vqadd.u8    d16, d2, d20
-    vqadd.u8    q9, q0, q11
-    /* convert the result to r5g6b5 and store it into {d28, d29} */
-    vshll.u8    q14, d16, #8
-    vshll.u8    q8, d19, #8
-    vshll.u8    q9, d18, #8
-    vsri.u16    q14, q8, #5
-    vsri.u16    q14, q9, #11
-.endm
-
-/*
- * OK, now we got almost everything that we need. Using the above two
- * macros, the work can be done right. But now we want to optimize
- * it a bit. ARM Cortex-A8 is an in-order core, and benefits really
- * a lot from good code scheduling and software pipelining.
- *
- * Let's construct some code, which will run in the core main loop.
- * Some pseudo-code of the main loop will look like this:
- *   head
- *   while (...) {
- *     tail
- *     head
- *   }
- *   tail
- *
- * It may look a bit weird, but this setup allows to hide instruction
- * latencies better and also utilize dual-issue capability more
- * efficiently (make pairs of load-store and ALU instructions).
- *
- * So what we need now is a '*_tail_head' macro, which will be used
- * in the core main loop. A trivial straightforward implementation
- * of this macro would look like this:
- *
- *   pixman_composite_over_8888_0565_process_pixblock_tail
- *   vst1.16     {d28, d29}, [DST_W, :128]!
- *   vld1.16     {d4, d5}, [DST_R, :128]!
- *   vld4.32     {d0, d1, d2, d3}, [SRC]!
- *   pixman_composite_over_8888_0565_process_pixblock_head
- *   cache_preload 8, 8
- *
- * Now it also got some VLD/VST instructions. We simply can't move from
- * processing one block of pixels to the other one with just arithmetics.
- * The previously processed data needs to be written to memory and new
- * data needs to be fetched. Fortunately, this main loop does not deal
- * with partial leading/trailing pixels and can load/store a full block
- * of pixels in a bulk. Additionally, destination buffer is already
- * 16 bytes aligned here (which is good for performance).
- *
- * New things here are DST_R, DST_W, SRC and MASK identifiers. These
- * are the aliases for ARM registers which are used as pointers for
- * accessing data. We maintain separate pointers for reading and writing
- * destination buffer (DST_R and DST_W).
- *
- * Another new thing is 'cache_preload' macro. It is used for prefetching
- * data into CPU L2 cache and improve performance when dealing with large
- * images which are far larger than cache size. It uses one argument
- * (actually two, but they need to be the same here) - number of pixels
- * in a block. Looking into 'pixman-arm-neon-asm.h' can provide some
- * details about this macro. Moreover, if good performance is needed
- * the code from this macro needs to be copied into '*_tail_head' macro
- * and mixed with the rest of code for optimal instructions scheduling.
- * We are actually doing it below.
- *
- * Now after all the explanations, here is the optimized code.
- * Different instruction streams (originaling from '*_head', '*_tail'
- * and 'cache_preload' macro) use different indentation levels for
- * better readability. Actually taking the code from one of these
- * indentation levels and ignoring a few VLD/VST instructions would
- * result in exactly the code from '*_head', '*_tail' or 'cache_preload'
- * macro!
- */
-
-#if 1
-
-.macro pixman_composite_over_8888_0565_process_pixblock_tail_head
-        vqadd.u8    d16, d2, d20
-    vld1.16     {d4, d5}, [DST_R, :128]!
-        vqadd.u8    q9, q0, q11
-    vshrn.u16   d6, q2, #8
-    fetch_src_pixblock
-    vshrn.u16   d7, q2, #3
-    vsli.u16    q2, q2, #5
-        vshll.u8    q14, d16, #8
-                                    PF add PF_X, PF_X, #8
-        vshll.u8    q8, d19, #8
-                                    PF tst PF_CTL, #0xF
-    vsri.u8     d6, d6, #5
-                                    PF addne PF_X, PF_X, #8
-    vmvn.8      d3, d3
-                                    PF subne PF_CTL, PF_CTL, #1
-    vsri.u8     d7, d7, #6
-    vshrn.u16   d30, q2, #2
-    vmull.u8    q10, d3, d6
-                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
-    vmull.u8    q11, d3, d7
-    vmull.u8    q12, d3, d30
-                                    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
-        vsri.u16    q14, q8, #5
-                                    PF cmp PF_X, ORIG_W
-        vshll.u8    q9, d18, #8
-    vrshr.u16   q13, q10, #8
-                                    PF subge PF_X, PF_X, ORIG_W
-    vrshr.u16   q3, q11, #8
-    vrshr.u16   q15, q12, #8
-                                    PF subges PF_CTL, PF_CTL, #0x10
-        vsri.u16    q14, q9, #11
-                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-    vraddhn.u16 d20, q10, q13
-    vraddhn.u16 d23, q11, q3
-                                    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-    vraddhn.u16 d22, q12, q15
-        vst1.16     {d28, d29}, [DST_W, :128]!
-.endm
-
-#else
-
-/* If we did not care much about the performance, we would just use this... */
-.macro pixman_composite_over_8888_0565_process_pixblock_tail_head
-    pixman_composite_over_8888_0565_process_pixblock_tail
-    vst1.16     {d28, d29}, [DST_W, :128]!
-    vld1.16     {d4, d5}, [DST_R, :128]!
-    fetch_src_pixblock
-    pixman_composite_over_8888_0565_process_pixblock_head
-    cache_preload 8, 8
-.endm
-
-#endif
-
-/*
- * And now the final part. We are using 'generate_composite_function' macro
- * to put all the stuff together. We are specifying the name of the function
- * which we want to get, number of bits per pixel for the source, mask and
- * destination (0 if unused, like mask in this case). Next come some bit
- * flags:
- *   FLAG_DST_READWRITE      - tells that the destination buffer is both read
- *                             and written, for write-only buffer we would use
- *                             FLAG_DST_WRITEONLY flag instead
- *   FLAG_DEINTERLEAVE_32BPP - tells that we prefer to work with planar data
- *                             and separate color channels for 32bpp format.
- * The next things are:
- *  - the number of pixels processed per iteration (8 in this case, because
- *    that's the maximum what can fit into four 64-bit NEON registers).
- *  - prefetch distance, measured in pixel blocks. In this case it is 5 times
- *    by 8 pixels. That would be 40 pixels, or up to 160 bytes. Optimal
- *    prefetch distance can be selected by running some benchmarks.
- *
- * After that we specify some macros, these are 'default_init',
- * 'default_cleanup' here which are empty (but it is possible to have custom
- * init/cleanup macros to be able to save/restore some extra NEON registers
- * like d8-d15 or do anything else) followed by
- * 'pixman_composite_over_8888_0565_process_pixblock_head',
- * 'pixman_composite_over_8888_0565_process_pixblock_tail' and
- * 'pixman_composite_over_8888_0565_process_pixblock_tail_head'
- * which we got implemented above.
- *
- * The last part is the NEON registers allocation scheme.
- */
-generate_composite_function \
-    pixman_composite_over_8888_0565_asm_neon, 32, 0, 16, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_over_8888_0565_process_pixblock_head, \
-    pixman_composite_over_8888_0565_process_pixblock_tail, \
-    pixman_composite_over_8888_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_0565_process_pixblock_head
-    /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
-       and put data into d6 - red, d7 - green, d30 - blue */
-    vshrn.u16   d6, q2, #8
-    vshrn.u16   d7, q2, #3
-    vsli.u16    q2, q2, #5
-    vsri.u8     d6, d6, #5
-    vsri.u8     d7, d7, #6
-    vshrn.u16   d30, q2, #2
-    /* now do alpha blending, storing results in 8-bit planar format
-       into d16 - red, d19 - green, d18 - blue */
-    vmull.u8    q10, d3, d6
-    vmull.u8    q11, d3, d7
-    vmull.u8    q12, d3, d30
-    vrshr.u16   q13, q10, #8
-    vrshr.u16   q3, q11, #8
-    vrshr.u16   q15, q12, #8
-    vraddhn.u16 d20, q10, q13
-    vraddhn.u16 d23, q11, q3
-    vraddhn.u16 d22, q12, q15
-.endm
-
-.macro pixman_composite_over_n_0565_process_pixblock_tail
-    /* ... continue alpha blending */
-    vqadd.u8    d16, d2, d20
-    vqadd.u8    q9, q0, q11
-    /* convert the result to r5g6b5 and store it into {d28, d29} */
-    vshll.u8    q14, d16, #8
-    vshll.u8    q8, d19, #8
-    vshll.u8    q9, d18, #8
-    vsri.u16    q14, q8, #5
-    vsri.u16    q14, q9, #11
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_n_0565_process_pixblock_tail_head
-    pixman_composite_over_n_0565_process_pixblock_tail
-    vld1.16     {d4, d5}, [DST_R, :128]!
-    vst1.16     {d28, d29}, [DST_W, :128]!
-    pixman_composite_over_n_0565_process_pixblock_head
-    cache_preload 8, 8
-.endm
-
-.macro pixman_composite_over_n_0565_init
-    add         DUMMY, sp, #ARGS_STACK_OFFSET
-    vld1.32     {d3[0]}, [DUMMY]
-    vdup.8      d0, d3[0]
-    vdup.8      d1, d3[1]
-    vdup.8      d2, d3[2]
-    vdup.8      d3, d3[3]
-    vmvn.8      d3, d3      /* invert source alpha */
-.endm
-
-generate_composite_function \
-    pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_n_0565_init, \
-    default_cleanup, \
-    pixman_composite_over_n_0565_process_pixblock_head, \
-    pixman_composite_over_n_0565_process_pixblock_tail, \
-    pixman_composite_over_n_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_8888_0565_process_pixblock_head
-    vshll.u8    q8, d1, #8
-    vshll.u8    q14, d2, #8
-    vshll.u8    q9, d0, #8
-.endm
-
-.macro pixman_composite_src_8888_0565_process_pixblock_tail
-    vsri.u16    q14, q8, #5
-    vsri.u16    q14, q9, #11
-.endm
-
-.macro pixman_composite_src_8888_0565_process_pixblock_tail_head
-        vsri.u16    q14, q8, #5
-                                    PF add PF_X, PF_X, #8
-                                    PF tst PF_CTL, #0xF
-    fetch_src_pixblock
-                                    PF addne PF_X, PF_X, #8
-                                    PF subne PF_CTL, PF_CTL, #1
-        vsri.u16    q14, q9, #11
-                                    PF cmp PF_X, ORIG_W
-                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
-    vshll.u8    q8, d1, #8
-        vst1.16     {d28, d29}, [DST_W, :128]!
-                                    PF subge PF_X, PF_X, ORIG_W
-                                    PF subges PF_CTL, PF_CTL, #0x10
-    vshll.u8    q14, d2, #8
-                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-    vshll.u8    q9, d0, #8
-.endm
-
-generate_composite_function \
-    pixman_composite_src_8888_0565_asm_neon, 32, 0, 16, \
-    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_8888_0565_process_pixblock_head, \
-    pixman_composite_src_8888_0565_process_pixblock_tail, \
-    pixman_composite_src_8888_0565_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0565_8888_process_pixblock_head
-    vshrn.u16   d30, q0, #8
-    vshrn.u16   d29, q0, #3
-    vsli.u16    q0, q0, #5
-    vmov.u8     d31, #255
-    vsri.u8     d30, d30, #5
-    vsri.u8     d29, d29, #6
-    vshrn.u16   d28, q0, #2
-.endm
-
-.macro pixman_composite_src_0565_8888_process_pixblock_tail
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_src_0565_8888_process_pixblock_tail_head
-    pixman_composite_src_0565_8888_process_pixblock_tail
-    vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
-    fetch_src_pixblock
-    pixman_composite_src_0565_8888_process_pixblock_head
-    cache_preload 8, 8
-.endm
-
-generate_composite_function \
-    pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \
-    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_0565_8888_process_pixblock_head, \
-    pixman_composite_src_0565_8888_process_pixblock_tail, \
-    pixman_composite_src_0565_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8_8_process_pixblock_head
-    vqadd.u8    q14, q0, q2
-    vqadd.u8    q15, q1, q3
-.endm
-
-.macro pixman_composite_add_8_8_process_pixblock_tail
-.endm
-
-.macro pixman_composite_add_8_8_process_pixblock_tail_head
-    fetch_src_pixblock
-                                    PF add PF_X, PF_X, #32
-                                    PF tst PF_CTL, #0xF
-    vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
-                                    PF addne PF_X, PF_X, #32
-                                    PF subne PF_CTL, PF_CTL, #1
-        vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
-                                    PF cmp PF_X, ORIG_W
-                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
-                                    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
-                                    PF subge PF_X, PF_X, ORIG_W
-                                    PF subges PF_CTL, PF_CTL, #0x10
-    vqadd.u8    q14, q0, q2
-                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-                                    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-    vqadd.u8    q15, q1, q3
-.endm
-
-generate_composite_function \
-    pixman_composite_add_8_8_asm_neon, 8, 0, 8, \
-    FLAG_DST_READWRITE, \
-    32, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_add_8_8_process_pixblock_head, \
-    pixman_composite_add_8_8_process_pixblock_tail, \
-    pixman_composite_add_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8888_8888_process_pixblock_tail_head
-    fetch_src_pixblock
-                                    PF add PF_X, PF_X, #8
-                                    PF tst PF_CTL, #0xF
-    vld1.32     {d4, d5, d6, d7}, [DST_R, :128]!
-                                    PF addne PF_X, PF_X, #8
-                                    PF subne PF_CTL, PF_CTL, #1
-        vst1.32     {d28, d29, d30, d31}, [DST_W, :128]!
-                                    PF cmp PF_X, ORIG_W
-                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
-                                    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
-                                    PF subge PF_X, PF_X, ORIG_W
-                                    PF subges PF_CTL, PF_CTL, #0x10
-    vqadd.u8    q14, q0, q2
-                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-                                    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-    vqadd.u8    q15, q1, q3
-.endm
-
-generate_composite_function \
-    pixman_composite_add_8888_8888_asm_neon, 32, 0, 32, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_add_8_8_process_pixblock_head, \
-    pixman_composite_add_8_8_process_pixblock_tail, \
-    pixman_composite_add_8888_8888_process_pixblock_tail_head
-
-generate_composite_function_single_scanline \
-    pixman_composite_scanline_add_asm_neon, 32, 0, 32, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_add_8_8_process_pixblock_head, \
-    pixman_composite_add_8_8_process_pixblock_tail, \
-    pixman_composite_add_8888_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_out_reverse_8888_8888_process_pixblock_head
-    vmvn.8      d24, d3  /* get inverted alpha */
-    /* do alpha blending */
-    vmull.u8    q8, d24, d4
-    vmull.u8    q9, d24, d5
-    vmull.u8    q10, d24, d6
-    vmull.u8    q11, d24, d7
-.endm
-
-.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail
-    vrshr.u16   q14, q8, #8
-    vrshr.u16   q15, q9, #8
-    vrshr.u16   q12, q10, #8
-    vrshr.u16   q13, q11, #8
-    vraddhn.u16 d28, q14, q8
-    vraddhn.u16 d29, q15, q9
-    vraddhn.u16 d30, q12, q10
-    vraddhn.u16 d31, q13, q11
-.endm
-
-.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head
-    vld4.8      {d4, d5, d6, d7}, [DST_R, :128]!
-        vrshr.u16   q14, q8, #8
-                                    PF add PF_X, PF_X, #8
-                                    PF tst PF_CTL, #0xF
-        vrshr.u16   q15, q9, #8
-        vrshr.u16   q12, q10, #8
-        vrshr.u16   q13, q11, #8
-                                    PF addne PF_X, PF_X, #8
-                                    PF subne PF_CTL, PF_CTL, #1
-        vraddhn.u16 d28, q14, q8
-        vraddhn.u16 d29, q15, q9
-                                    PF cmp PF_X, ORIG_W
-        vraddhn.u16 d30, q12, q10
-        vraddhn.u16 d31, q13, q11
-    fetch_src_pixblock
-                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
-    vmvn.8      d22, d3
-                                    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
-        vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
-                                    PF subge PF_X, PF_X, ORIG_W
-    vmull.u8    q8, d22, d4
-                                    PF subges PF_CTL, PF_CTL, #0x10
-    vmull.u8    q9, d22, d5
-                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-    vmull.u8    q10, d22, d6
-                                    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-    vmull.u8    q11, d22, d7
-.endm
-
-generate_composite_function_single_scanline \
-    pixman_composite_scanline_out_reverse_asm_neon, 32, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_out_reverse_8888_8888_process_pixblock_head, \
-    pixman_composite_out_reverse_8888_8888_process_pixblock_tail, \
-    pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_8888_8888_process_pixblock_head
-    pixman_composite_out_reverse_8888_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_over_8888_8888_process_pixblock_tail
-    pixman_composite_out_reverse_8888_8888_process_pixblock_tail
-    vqadd.u8    q14, q0, q14
-    vqadd.u8    q15, q1, q15
-.endm
-
-.macro pixman_composite_over_8888_8888_process_pixblock_tail_head
-    vld4.8      {d4, d5, d6, d7}, [DST_R, :128]!
-        vrshr.u16   q14, q8, #8
-                                    PF add PF_X, PF_X, #8
-                                    PF tst PF_CTL, #0xF
-        vrshr.u16   q15, q9, #8
-        vrshr.u16   q12, q10, #8
-        vrshr.u16   q13, q11, #8
-                                    PF addne PF_X, PF_X, #8
-                                    PF subne PF_CTL, PF_CTL, #1
-        vraddhn.u16 d28, q14, q8
-        vraddhn.u16 d29, q15, q9
-                                    PF cmp PF_X, ORIG_W
-        vraddhn.u16 d30, q12, q10
-        vraddhn.u16 d31, q13, q11
-        vqadd.u8    q14, q0, q14
-        vqadd.u8    q15, q1, q15
-    fetch_src_pixblock
-                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
-    vmvn.8      d22, d3
-                                    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
-        vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
-                                    PF subge PF_X, PF_X, ORIG_W
-    vmull.u8    q8, d22, d4
-                                    PF subges PF_CTL, PF_CTL, #0x10
-    vmull.u8    q9, d22, d5
-                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-    vmull.u8    q10, d22, d6
-                                    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-    vmull.u8    q11, d22, d7
-.endm
-
-generate_composite_function \
-    pixman_composite_over_8888_8888_asm_neon, 32, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_over_8888_8888_process_pixblock_head, \
-    pixman_composite_over_8888_8888_process_pixblock_tail, \
-    pixman_composite_over_8888_8888_process_pixblock_tail_head
-
-generate_composite_function_single_scanline \
-    pixman_composite_scanline_over_asm_neon, 32, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_over_8888_8888_process_pixblock_head, \
-    pixman_composite_over_8888_8888_process_pixblock_tail, \
-    pixman_composite_over_8888_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_8888_process_pixblock_head
-    /* deinterleaved source pixels in {d0, d1, d2, d3} */
-    /* inverted alpha in {d24} */
-    /* destination pixels in {d4, d5, d6, d7} */
-    vmull.u8    q8, d24, d4
-    vmull.u8    q9, d24, d5
-    vmull.u8    q10, d24, d6
-    vmull.u8    q11, d24, d7
-.endm
-
-.macro pixman_composite_over_n_8888_process_pixblock_tail
-    vrshr.u16   q14, q8, #8
-    vrshr.u16   q15, q9, #8
-    vrshr.u16   q2, q10, #8
-    vrshr.u16   q3, q11, #8
-    vraddhn.u16 d28, q14, q8
-    vraddhn.u16 d29, q15, q9
-    vraddhn.u16 d30, q2, q10
-    vraddhn.u16 d31, q3, q11
-    vqadd.u8    q14, q0, q14
-    vqadd.u8    q15, q1, q15
-.endm
-
-.macro pixman_composite_over_n_8888_process_pixblock_tail_head
-        vrshr.u16   q14, q8, #8
-        vrshr.u16   q15, q9, #8
-        vrshr.u16   q2, q10, #8
-        vrshr.u16   q3, q11, #8
-        vraddhn.u16 d28, q14, q8
-        vraddhn.u16 d29, q15, q9
-        vraddhn.u16 d30, q2, q10
-        vraddhn.u16 d31, q3, q11
-    vld4.8      {d4, d5, d6, d7}, [DST_R, :128]!
-        vqadd.u8    q14, q0, q14
-                                    PF add PF_X, PF_X, #8
-                                    PF tst PF_CTL, #0x0F
-                                    PF addne PF_X, PF_X, #8
-                                    PF subne PF_CTL, PF_CTL, #1
-        vqadd.u8    q15, q1, q15
-                                    PF cmp PF_X, ORIG_W
-    vmull.u8    q8, d24, d4
-                                    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
-    vmull.u8    q9, d24, d5
-                                    PF subge PF_X, PF_X, ORIG_W
-    vmull.u8    q10, d24, d6
-                                    PF subges PF_CTL, PF_CTL, #0x10
-    vmull.u8    q11, d24, d7
-                                    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-        vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_over_n_8888_init
-    add         DUMMY, sp, #ARGS_STACK_OFFSET
-    vld1.32     {d3[0]}, [DUMMY]
-    vdup.8      d0, d3[0]
-    vdup.8      d1, d3[1]
-    vdup.8      d2, d3[2]
-    vdup.8      d3, d3[3]
-    vmvn.8      d24, d3  /* get inverted alpha */
-.endm
-
-generate_composite_function \
-    pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_n_8888_init, \
-    default_cleanup, \
-    pixman_composite_over_8888_8888_process_pixblock_head, \
-    pixman_composite_over_8888_8888_process_pixblock_tail, \
-    pixman_composite_over_n_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head
-        vrshr.u16   q14, q8, #8
-                                    PF add PF_X, PF_X, #8
-                                    PF tst PF_CTL, #0xF
-        vrshr.u16   q15, q9, #8
-        vrshr.u16   q12, q10, #8
-        vrshr.u16   q13, q11, #8
-                                    PF addne PF_X, PF_X, #8
-                                    PF subne PF_CTL, PF_CTL, #1
-        vraddhn.u16 d28, q14, q8
-        vraddhn.u16 d29, q15, q9
-                                    PF cmp PF_X, ORIG_W
-        vraddhn.u16 d30, q12, q10
-        vraddhn.u16 d31, q13, q11
-        vqadd.u8    q14, q0, q14
-        vqadd.u8    q15, q1, q15
-    vld4.8      {d0, d1, d2, d3}, [DST_R, :128]!
-    vmvn.8      d22, d3
-                                    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
-        vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
-                                    PF subge PF_X, PF_X, ORIG_W
-    vmull.u8    q8, d22, d4
-                                    PF subges PF_CTL, PF_CTL, #0x10
-    vmull.u8    q9, d22, d5
-    vmull.u8    q10, d22, d6
-                                    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-    vmull.u8    q11, d22, d7
-.endm
-
-.macro pixman_composite_over_reverse_n_8888_init
-    add         DUMMY, sp, #ARGS_STACK_OFFSET
-    vld1.32     {d7[0]}, [DUMMY]
-    vdup.8      d4, d7[0]
-    vdup.8      d5, d7[1]
-    vdup.8      d6, d7[2]
-    vdup.8      d7, d7[3]
-.endm
-
-generate_composite_function \
-    pixman_composite_over_reverse_n_8888_asm_neon, 0, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_reverse_n_8888_init, \
-    default_cleanup, \
-    pixman_composite_over_8888_8888_process_pixblock_head, \
-    pixman_composite_over_8888_8888_process_pixblock_tail, \
-    pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    0,  /* dst_r_basereg */ \
-    4,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_8888_8_0565_process_pixblock_head
-    vmull.u8    q0,  d24, d8    /* IN for SRC pixels (part1) */
-    vmull.u8    q1,  d24, d9
-    vmull.u8    q6,  d24, d10
-    vmull.u8    q7,  d24, d11
-        vshrn.u16   d6,  q2, #8 /* convert DST_R data to 32-bpp (part1) */
-        vshrn.u16   d7,  q2, #3
-        vsli.u16    q2,  q2, #5
-    vrshr.u16   q8,  q0,  #8    /* IN for SRC pixels (part2) */
-    vrshr.u16   q9,  q1,  #8
-    vrshr.u16   q10, q6,  #8
-    vrshr.u16   q11, q7,  #8
-    vraddhn.u16 d0,  q0,  q8
-    vraddhn.u16 d1,  q1,  q9
-    vraddhn.u16 d2,  q6,  q10
-    vraddhn.u16 d3,  q7,  q11
-        vsri.u8     d6,  d6, #5 /* convert DST_R data to 32-bpp (part2) */
-        vsri.u8     d7,  d7, #6
-    vmvn.8      d3,  d3
-        vshrn.u16   d30, q2, #2
-    vmull.u8    q8,  d3, d6     /* now do alpha blending */
-    vmull.u8    q9,  d3, d7
-    vmull.u8    q10, d3, d30
-.endm
-
-.macro pixman_composite_over_8888_8_0565_process_pixblock_tail
-    /* 3 cycle bubble (after vmull.u8) */
-    vrshr.u16   q13, q8,  #8
-    vrshr.u16   q11, q9,  #8
-    vrshr.u16   q15, q10, #8
-    vraddhn.u16 d16, q8,  q13
-    vraddhn.u16 d27, q9,  q11
-    vraddhn.u16 d26, q10, q15
-    vqadd.u8    d16, d2,  d16
-    /* 1 cycle bubble */
-    vqadd.u8    q9,  q0,  q13
-    vshll.u8    q14, d16, #8    /* convert to 16bpp */
-    vshll.u8    q8,  d19, #8
-    vshll.u8    q9,  d18, #8
-    vsri.u16    q14, q8,  #5
-    /* 1 cycle bubble */
-    vsri.u16    q14, q9,  #11
-.endm
-
-.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head
-    vld1.16     {d4, d5}, [DST_R, :128]!
-    vshrn.u16   d6,  q2,  #8
-    fetch_mask_pixblock
-    vshrn.u16   d7,  q2,  #3
-    fetch_src_pixblock
-    vmull.u8    q6,  d24, d10
-        vrshr.u16   q13, q8,  #8
-        vrshr.u16   q11, q9,  #8
-        vrshr.u16   q15, q10, #8
-        vraddhn.u16 d16, q8,  q13
-        vraddhn.u16 d27, q9,  q11
-        vraddhn.u16 d26, q10, q15
-        vqadd.u8    d16, d2,  d16
-    vmull.u8    q1,  d24, d9
-        vqadd.u8    q9,  q0,  q13
-        vshll.u8    q14, d16, #8
-    vmull.u8    q0,  d24, d8
-        vshll.u8    q8,  d19, #8
-        vshll.u8    q9,  d18, #8
-        vsri.u16    q14, q8,  #5
-    vmull.u8    q7,  d24, d11
-        vsri.u16    q14, q9,  #11
-
-    cache_preload 8, 8
-
-    vsli.u16    q2,  q2,  #5
-    vrshr.u16   q8,  q0,  #8
-    vrshr.u16   q9,  q1,  #8
-    vrshr.u16   q10, q6,  #8
-    vrshr.u16   q11, q7,  #8
-    vraddhn.u16 d0,  q0,  q8
-    vraddhn.u16 d1,  q1,  q9
-    vraddhn.u16 d2,  q6,  q10
-    vraddhn.u16 d3,  q7,  q11
-    vsri.u8     d6,  d6,  #5
-    vsri.u8     d7,  d7,  #6
-    vmvn.8      d3,  d3
-    vshrn.u16   d30, q2,  #2
-    vst1.16     {d28, d29}, [DST_W, :128]!
-    vmull.u8    q8,  d3,  d6
-    vmull.u8    q9,  d3,  d7
-    vmull.u8    q10, d3,  d30
-.endm
-
-generate_composite_function \
-    pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_over_8888_8_0565_process_pixblock_head, \
-    pixman_composite_over_8888_8_0565_process_pixblock_tail, \
-    pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    8,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-/******************************************************************************/
-
-/*
- * This function needs a special initialization of solid mask.
- * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET
- * offset, split into color components and replicated in d8-d11
- * registers. Additionally, this function needs all the NEON registers,
- * so it has to save d8-d15 registers which are callee saved according
- * to ABI. These registers are restored from 'cleanup' macro. All the
- * other NEON registers are caller saved, so can be clobbered freely
- * without introducing any problems.
- */
-.macro pixman_composite_over_n_8_0565_init
-    add         DUMMY, sp, #ARGS_STACK_OFFSET
-    vpush       {d8-d15}
-    vld1.32     {d11[0]}, [DUMMY]
-    vdup.8      d8, d11[0]
-    vdup.8      d9, d11[1]
-    vdup.8      d10, d11[2]
-    vdup.8      d11, d11[3]
-.endm
-
-.macro pixman_composite_over_n_8_0565_cleanup
-    vpop        {d8-d15}
-.endm
-
-generate_composite_function \
-    pixman_composite_over_n_8_0565_asm_neon, 0, 8, 16, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_n_8_0565_init, \
-    pixman_composite_over_n_8_0565_cleanup, \
-    pixman_composite_over_8888_8_0565_process_pixblock_head, \
-    pixman_composite_over_8888_8_0565_process_pixblock_tail, \
-    pixman_composite_over_8888_8_0565_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_8888_n_0565_init
-    add         DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
-    vpush       {d8-d15}
-    vld1.32     {d24[0]}, [DUMMY]
-    vdup.8      d24, d24[3]
-.endm
-
-.macro pixman_composite_over_8888_n_0565_cleanup
-    vpop        {d8-d15}
-.endm
-
-generate_composite_function \
-    pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_8888_n_0565_init, \
-    pixman_composite_over_8888_n_0565_cleanup, \
-    pixman_composite_over_8888_8_0565_process_pixblock_head, \
-    pixman_composite_over_8888_8_0565_process_pixblock_tail, \
-    pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    8,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0565_0565_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_0565_0565_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_0565_0565_process_pixblock_tail_head
-    vst1.16 {d0, d1, d2, d3}, [DST_W, :128]!
-    fetch_src_pixblock
-    cache_preload 16, 16
-.endm
-
-generate_composite_function \
-    pixman_composite_src_0565_0565_asm_neon, 16, 0, 16, \
-    FLAG_DST_WRITEONLY, \
-    16, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_0565_0565_process_pixblock_head, \
-    pixman_composite_src_0565_0565_process_pixblock_tail, \
-    pixman_composite_src_0565_0565_process_pixblock_tail_head, \
-    0, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_n_8_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_n_8_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_n_8_process_pixblock_tail_head
-    vst1.8  {d0, d1, d2, d3}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_src_n_8_init
-    add         DUMMY, sp, #ARGS_STACK_OFFSET
-    vld1.32     {d0[0]}, [DUMMY]
-    vsli.u64    d0, d0, #8
-    vsli.u64    d0, d0, #16
-    vsli.u64    d0, d0, #32
-    vorr        d1, d0, d0
-    vorr        q1, q0, q0
-.endm
-
-.macro pixman_composite_src_n_8_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_src_n_8_asm_neon, 0, 0, 8, \
-    FLAG_DST_WRITEONLY, \
-    32, /* number of pixels, processed in a single block */ \
-    0,  /* prefetch distance */ \
-    pixman_composite_src_n_8_init, \
-    pixman_composite_src_n_8_cleanup, \
-    pixman_composite_src_n_8_process_pixblock_head, \
-    pixman_composite_src_n_8_process_pixblock_tail, \
-    pixman_composite_src_n_8_process_pixblock_tail_head, \
-    0, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_n_0565_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_n_0565_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_n_0565_process_pixblock_tail_head
-    vst1.16 {d0, d1, d2, d3}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_src_n_0565_init
-    add         DUMMY, sp, #ARGS_STACK_OFFSET
-    vld1.32     {d0[0]}, [DUMMY]
-    vsli.u64    d0, d0, #16
-    vsli.u64    d0, d0, #32
-    vorr        d1, d0, d0
-    vorr        q1, q0, q0
-.endm
-
-.macro pixman_composite_src_n_0565_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_src_n_0565_asm_neon, 0, 0, 16, \
-    FLAG_DST_WRITEONLY, \
-    16, /* number of pixels, processed in a single block */ \
-    0,  /* prefetch distance */ \
-    pixman_composite_src_n_0565_init, \
-    pixman_composite_src_n_0565_cleanup, \
-    pixman_composite_src_n_0565_process_pixblock_head, \
-    pixman_composite_src_n_0565_process_pixblock_tail, \
-    pixman_composite_src_n_0565_process_pixblock_tail_head, \
-    0, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_n_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_n_8888_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_n_8888_process_pixblock_tail_head
-    vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_src_n_8888_init
-    add         DUMMY, sp, #ARGS_STACK_OFFSET
-    vld1.32     {d0[0]}, [DUMMY]
-    vsli.u64    d0, d0, #32
-    vorr        d1, d0, d0
-    vorr        q1, q0, q0
-.endm
-
-.macro pixman_composite_src_n_8888_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_src_n_8888_asm_neon, 0, 0, 32, \
-    FLAG_DST_WRITEONLY, \
-    8, /* number of pixels, processed in a single block */ \
-    0, /* prefetch distance */ \
-    pixman_composite_src_n_8888_init, \
-    pixman_composite_src_n_8888_cleanup, \
-    pixman_composite_src_n_8888_process_pixblock_head, \
-    pixman_composite_src_n_8888_process_pixblock_tail, \
-    pixman_composite_src_n_8888_process_pixblock_tail_head, \
-    0, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_8888_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_8888_8888_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_8888_8888_process_pixblock_tail_head
-    vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
-    fetch_src_pixblock
-    cache_preload 8, 8
-.endm
-
-generate_composite_function \
-    pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \
-    FLAG_DST_WRITEONLY, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_8888_8888_process_pixblock_head, \
-    pixman_composite_src_8888_8888_process_pixblock_tail, \
-    pixman_composite_src_8888_8888_process_pixblock_tail_head, \
-    0, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_x888_8888_process_pixblock_head
-    vorr     q0, q0, q2
-    vorr     q1, q1, q2
-.endm
-
-.macro pixman_composite_src_x888_8888_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_x888_8888_process_pixblock_tail_head
-    vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
-    fetch_src_pixblock
-    vorr     q0, q0, q2
-    vorr     q1, q1, q2
-    cache_preload 8, 8
-.endm
-
-.macro pixman_composite_src_x888_8888_init
-    vmov.u8  q2, #0xFF
-    vshl.u32 q2, q2, #24
-.endm
-
-generate_composite_function \
-    pixman_composite_src_x888_8888_asm_neon, 32, 0, 32, \
-    FLAG_DST_WRITEONLY, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    pixman_composite_src_x888_8888_init, \
-    default_cleanup, \
-    pixman_composite_src_x888_8888_process_pixblock_head, \
-    pixman_composite_src_x888_8888_process_pixblock_tail, \
-    pixman_composite_src_x888_8888_process_pixblock_tail_head, \
-    0, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_n_8_8888_process_pixblock_head
-    /* expecting solid source in {d0, d1, d2, d3} */
-    /* mask is in d24 (d25, d26, d27 are unused) */
-
-    /* in */
-    vmull.u8    q8, d24, d0
-    vmull.u8    q9, d24, d1
-    vmull.u8    q10, d24, d2
-    vmull.u8    q11, d24, d3
-    vrsra.u16   q8, q8, #8
-    vrsra.u16   q9, q9, #8
-    vrsra.u16   q10, q10, #8
-    vrsra.u16   q11, q11, #8
-.endm
-
-.macro pixman_composite_src_n_8_8888_process_pixblock_tail
-    vrshrn.u16  d28, q8, #8
-    vrshrn.u16  d29, q9, #8
-    vrshrn.u16  d30, q10, #8
-    vrshrn.u16  d31, q11, #8
-.endm
-
-.macro pixman_composite_src_n_8_8888_process_pixblock_tail_head
-    fetch_mask_pixblock
-                                    PF add PF_X, PF_X, #8
-        vrshrn.u16  d28, q8, #8
-                                    PF tst PF_CTL, #0x0F
-        vrshrn.u16  d29, q9, #8
-                                    PF addne PF_X, PF_X, #8
-        vrshrn.u16  d30, q10, #8
-                                    PF subne PF_CTL, PF_CTL, #1
-        vrshrn.u16  d31, q11, #8
-                                    PF cmp PF_X, ORIG_W
-    vmull.u8    q8, d24, d0
-                                    PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift]
-    vmull.u8    q9, d24, d1
-                                    PF subge PF_X, PF_X, ORIG_W
-    vmull.u8    q10, d24, d2
-                                    PF subges PF_CTL, PF_CTL, #0x10
-    vmull.u8    q11, d24, d3
-                                    PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]!
-        vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
-    vrsra.u16   q8, q8, #8
-    vrsra.u16   q9, q9, #8
-    vrsra.u16   q10, q10, #8
-    vrsra.u16   q11, q11, #8
-.endm
-
-.macro pixman_composite_src_n_8_8888_init
-    add         DUMMY, sp, #ARGS_STACK_OFFSET
-    vld1.32     {d3[0]}, [DUMMY]
-    vdup.8      d0, d3[0]
-    vdup.8      d1, d3[1]
-    vdup.8      d2, d3[2]
-    vdup.8      d3, d3[3]
-.endm
-
-.macro pixman_composite_src_n_8_8888_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_src_n_8_8888_asm_neon, 0, 8, 32, \
-    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_src_n_8_8888_init, \
-    pixman_composite_src_n_8_8888_cleanup, \
-    pixman_composite_src_n_8_8888_process_pixblock_head, \
-    pixman_composite_src_n_8_8888_process_pixblock_tail, \
-    pixman_composite_src_n_8_8888_process_pixblock_tail_head, \
-
-/******************************************************************************/
-
-.macro pixman_composite_src_n_8_8_process_pixblock_head
-    vmull.u8    q0, d24, d16
-    vmull.u8    q1, d25, d16
-    vmull.u8    q2, d26, d16
-    vmull.u8    q3, d27, d16
-    vrsra.u16   q0, q0,  #8
-    vrsra.u16   q1, q1,  #8
-    vrsra.u16   q2, q2,  #8
-    vrsra.u16   q3, q3,  #8
-.endm
-
-.macro pixman_composite_src_n_8_8_process_pixblock_tail
-    vrshrn.u16  d28, q0, #8
-    vrshrn.u16  d29, q1, #8
-    vrshrn.u16  d30, q2, #8
-    vrshrn.u16  d31, q3, #8
-.endm
-
-.macro pixman_composite_src_n_8_8_process_pixblock_tail_head
-    fetch_mask_pixblock
-                                    PF add PF_X, PF_X, #8
-        vrshrn.u16  d28, q0, #8
-                                    PF tst PF_CTL, #0x0F
-        vrshrn.u16  d29, q1, #8
-                                    PF addne PF_X, PF_X, #8
-        vrshrn.u16  d30, q2, #8
-                                    PF subne PF_CTL, PF_CTL, #1
-        vrshrn.u16  d31, q3, #8
-                                    PF cmp PF_X, ORIG_W
-    vmull.u8    q0,  d24, d16
-                                    PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift]
-    vmull.u8    q1,  d25, d16
-                                    PF subge PF_X, PF_X, ORIG_W
-    vmull.u8    q2,  d26, d16
-                                    PF subges PF_CTL, PF_CTL, #0x10
-    vmull.u8    q3,  d27, d16
-                                    PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]!
-        vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
-    vrsra.u16   q0, q0,  #8
-    vrsra.u16   q1, q1,  #8
-    vrsra.u16   q2, q2,  #8
-    vrsra.u16   q3, q3,  #8
-.endm
-
-.macro pixman_composite_src_n_8_8_init
-    add         DUMMY, sp, #ARGS_STACK_OFFSET
-    vld1.32     {d16[0]}, [DUMMY]
-    vdup.8      d16, d16[3]
-.endm
-
-.macro pixman_composite_src_n_8_8_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_src_n_8_8_asm_neon, 0, 8, 8, \
-    FLAG_DST_WRITEONLY, \
-    32, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_src_n_8_8_init, \
-    pixman_composite_src_n_8_8_cleanup, \
-    pixman_composite_src_n_8_8_process_pixblock_head, \
-    pixman_composite_src_n_8_8_process_pixblock_tail, \
-    pixman_composite_src_n_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_8_8888_process_pixblock_head
-    /* expecting deinterleaved source data in {d8, d9, d10, d11} */
-    /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
-    /* and destination data in {d4, d5, d6, d7} */
-    /* mask is in d24 (d25, d26, d27 are unused) */
-
-    /* in */
-    vmull.u8    q6, d24, d8
-    vmull.u8    q7, d24, d9
-    vmull.u8    q8, d24, d10
-    vmull.u8    q9, d24, d11
-    vrshr.u16   q10, q6, #8
-    vrshr.u16   q11, q7, #8
-    vrshr.u16   q12, q8, #8
-    vrshr.u16   q13, q9, #8
-    vraddhn.u16 d0, q6, q10
-    vraddhn.u16 d1, q7, q11
-    vraddhn.u16 d2, q8, q12
-    vraddhn.u16 d3, q9, q13
-    vmvn.8      d25, d3  /* get inverted alpha */
-    /* source:      d0 - blue, d1 - green, d2 - red, d3 - alpha */
-    /* destination: d4 - blue, d5 - green, d6 - red, d7 - alpha */
-    /* now do alpha blending */
-    vmull.u8    q8, d25, d4
-    vmull.u8    q9, d25, d5
-    vmull.u8    q10, d25, d6
-    vmull.u8    q11, d25, d7
-.endm
-
-.macro pixman_composite_over_n_8_8888_process_pixblock_tail
-    vrshr.u16   q14, q8, #8
-    vrshr.u16   q15, q9, #8
-    vrshr.u16   q6, q10, #8
-    vrshr.u16   q7, q11, #8
-    vraddhn.u16 d28, q14, q8
-    vraddhn.u16 d29, q15, q9
-    vraddhn.u16 d30, q6, q10
-    vraddhn.u16 d31, q7, q11
-    vqadd.u8    q14, q0, q14
-    vqadd.u8    q15, q1, q15
-.endm
-
-.macro pixman_composite_over_n_8_8888_process_pixblock_tail_head
-        vrshr.u16   q14, q8, #8
-    vld4.8      {d4, d5, d6, d7}, [DST_R, :128]!
-        vrshr.u16   q15, q9, #8
-    fetch_mask_pixblock
-        vrshr.u16   q6, q10, #8
-                                    PF add PF_X, PF_X, #8
-        vrshr.u16   q7, q11, #8
-                                    PF tst PF_CTL, #0x0F
-        vraddhn.u16 d28, q14, q8
-                                    PF addne PF_X, PF_X, #8
-        vraddhn.u16 d29, q15, q9
-                                    PF subne PF_CTL, PF_CTL, #1
-        vraddhn.u16 d30, q6, q10
-                                    PF cmp PF_X, ORIG_W
-        vraddhn.u16 d31, q7, q11
-                                    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
-    vmull.u8    q6, d24, d8
-                                    PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift]
-    vmull.u8    q7, d24, d9
-                                    PF subge PF_X, PF_X, ORIG_W
-    vmull.u8    q8, d24, d10
-                                    PF subges PF_CTL, PF_CTL, #0x10
-    vmull.u8    q9, d24, d11
-                                    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-        vqadd.u8    q14, q0, q14
-                                    PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]!
-        vqadd.u8    q15, q1, q15
-    vrshr.u16   q10, q6, #8
-    vrshr.u16   q11, q7, #8
-    vrshr.u16   q12, q8, #8
-    vrshr.u16   q13, q9, #8
-    vraddhn.u16 d0, q6, q10
-    vraddhn.u16 d1, q7, q11
-    vraddhn.u16 d2, q8, q12
-    vraddhn.u16 d3, q9, q13
-        vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
-    vmvn.8      d25, d3
-    vmull.u8    q8, d25, d4
-    vmull.u8    q9, d25, d5
-    vmull.u8    q10, d25, d6
-    vmull.u8    q11, d25, d7
-.endm
-
-.macro pixman_composite_over_n_8_8888_init
-    add         DUMMY, sp, #ARGS_STACK_OFFSET
-    vpush       {d8-d15}
-    vld1.32     {d11[0]}, [DUMMY]
-    vdup.8      d8, d11[0]
-    vdup.8      d9, d11[1]
-    vdup.8      d10, d11[2]
-    vdup.8      d11, d11[3]
-.endm
-
-.macro pixman_composite_over_n_8_8888_cleanup
-    vpop        {d8-d15}
-.endm
-
-generate_composite_function \
-    pixman_composite_over_n_8_8888_asm_neon, 0, 8, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_n_8_8888_init, \
-    pixman_composite_over_n_8_8888_cleanup, \
-    pixman_composite_over_n_8_8888_process_pixblock_head, \
-    pixman_composite_over_n_8_8888_process_pixblock_tail, \
-    pixman_composite_over_n_8_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_8_8_process_pixblock_head
-    vmull.u8    q0,  d24, d8
-    vmull.u8    q1,  d25, d8
-    vmull.u8    q6,  d26, d8
-    vmull.u8    q7,  d27, d8
-    vrshr.u16   q10, q0,  #8
-    vrshr.u16   q11, q1,  #8
-    vrshr.u16   q12, q6,  #8
-    vrshr.u16   q13, q7,  #8
-    vraddhn.u16 d0,  q0,  q10
-    vraddhn.u16 d1,  q1,  q11
-    vraddhn.u16 d2,  q6,  q12
-    vraddhn.u16 d3,  q7,  q13
-    vmvn.8      q12, q0
-    vmvn.8      q13, q1
-    vmull.u8    q8,  d24, d4
-    vmull.u8    q9,  d25, d5
-    vmull.u8    q10, d26, d6
-    vmull.u8    q11, d27, d7
-.endm
-
-.macro pixman_composite_over_n_8_8_process_pixblock_tail
-    vrshr.u16   q14, q8,  #8
-    vrshr.u16   q15, q9,  #8
-    vrshr.u16   q12, q10, #8
-    vrshr.u16   q13, q11, #8
-    vraddhn.u16 d28, q14, q8
-    vraddhn.u16 d29, q15, q9
-    vraddhn.u16 d30, q12, q10
-    vraddhn.u16 d31, q13, q11
-    vqadd.u8    q14, q0,  q14
-    vqadd.u8    q15, q1,  q15
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_n_8_8_process_pixblock_tail_head
-    vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
-    pixman_composite_over_n_8_8_process_pixblock_tail
-    fetch_mask_pixblock
-    cache_preload 32, 32
-    vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
-    pixman_composite_over_n_8_8_process_pixblock_head
-.endm
-
-.macro pixman_composite_over_n_8_8_init
-    add         DUMMY, sp, #ARGS_STACK_OFFSET
-    vpush       {d8-d15}
-    vld1.32     {d8[0]}, [DUMMY]
-    vdup.8      d8, d8[3]
-.endm
-
-.macro pixman_composite_over_n_8_8_cleanup
-    vpop        {d8-d15}
-.endm
-
-generate_composite_function \
-    pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \
-    FLAG_DST_READWRITE, \
-    32, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_n_8_8_init, \
-    pixman_composite_over_n_8_8_cleanup, \
-    pixman_composite_over_n_8_8_process_pixblock_head, \
-    pixman_composite_over_n_8_8_process_pixblock_tail, \
-    pixman_composite_over_n_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head
-    /*
-     * 'combine_mask_ca' replacement
-     *
-     * input:  solid src (n) in {d8,  d9,  d10, d11}
-     *         dest in          {d4,  d5,  d6,  d7 }
-     *         mask in          {d24, d25, d26, d27}
-     * output: updated src in   {d0,  d1,  d2,  d3 }
-     *         updated mask in  {d24, d25, d26, d3 }
-     */
-    vmull.u8    q0,  d24, d8
-    vmull.u8    q1,  d25, d9
-    vmull.u8    q6,  d26, d10
-    vmull.u8    q7,  d27, d11
-    vmull.u8    q9,  d11, d25
-    vmull.u8    q12, d11, d24
-    vmull.u8    q13, d11, d26
-    vrshr.u16   q8,  q0,  #8
-    vrshr.u16   q10, q1,  #8
-    vrshr.u16   q11, q6,  #8
-    vraddhn.u16 d0,  q0,  q8
-    vraddhn.u16 d1,  q1,  q10
-    vraddhn.u16 d2,  q6,  q11
-    vrshr.u16   q11, q12, #8
-    vrshr.u16   q8,  q9,  #8
-    vrshr.u16   q6,  q13, #8
-    vrshr.u16   q10, q7,  #8
-    vraddhn.u16 d24, q12, q11
-    vraddhn.u16 d25, q9,  q8
-    vraddhn.u16 d26, q13, q6
-    vraddhn.u16 d3,  q7,  q10
-    /*
-     * 'combine_over_ca' replacement
-     *
-     * output: updated dest in {d28, d29, d30, d31}
-     */
-    vmvn.8      q12, q12
-    vmvn.8      d26, d26
-    vmull.u8    q8,  d24, d4
-    vmull.u8    q9,  d25, d5
-    vmvn.8      d27, d3
-    vmull.u8    q10, d26, d6
-    vmull.u8    q11, d27, d7
-.endm
-
-.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail
-    /* ... continue 'combine_over_ca' replacement */
-    vrshr.u16   q14, q8,  #8
-    vrshr.u16   q15, q9,  #8
-    vrshr.u16   q6,  q10, #8
-    vrshr.u16   q7,  q11, #8
-    vraddhn.u16 d28, q14, q8
-    vraddhn.u16 d29, q15, q9
-    vraddhn.u16 d30, q6,  q10
-    vraddhn.u16 d31, q7,  q11
-    vqadd.u8    q14, q0,  q14
-    vqadd.u8    q15, q1,  q15
-.endm
-
-.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head
-        vrshr.u16   q14, q8, #8
-        vrshr.u16   q15, q9, #8
-    vld4.8      {d4, d5, d6, d7}, [DST_R, :128]!
-        vrshr.u16   q6, q10, #8
-        vrshr.u16   q7, q11, #8
-        vraddhn.u16 d28, q14, q8
-        vraddhn.u16 d29, q15, q9
-        vraddhn.u16 d30, q6, q10
-        vraddhn.u16 d31, q7, q11
-    fetch_mask_pixblock
-        vqadd.u8    q14, q0, q14
-        vqadd.u8    q15, q1, q15
-    cache_preload 8, 8
-    pixman_composite_over_n_8888_8888_ca_process_pixblock_head
-    vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_over_n_8888_8888_ca_init
-    add         DUMMY, sp, #ARGS_STACK_OFFSET
-    vpush       {d8-d15}
-    vld1.32     {d11[0]}, [DUMMY]
-    vdup.8      d8, d11[0]
-    vdup.8      d9, d11[1]
-    vdup.8      d10, d11[2]
-    vdup.8      d11, d11[3]
-.endm
-
-.macro pixman_composite_over_n_8888_8888_ca_cleanup
-    vpop        {d8-d15}
-.endm
-
-generate_composite_function \
-    pixman_composite_over_n_8888_8888_ca_asm_neon, 0, 32, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_n_8888_8888_ca_init, \
-    pixman_composite_over_n_8888_8888_ca_cleanup, \
-    pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \
-    pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \
-    pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_head
-    /*
-     * 'combine_mask_ca' replacement
-     *
-     * input:  solid src (n) in {d8,  d9,  d10, d11}  [B, G, R, A]
-     *         mask in          {d24, d25, d26}       [B, G, R]
-     * output: updated src in   {d0,  d1,  d2 }       [B, G, R]
-     *         updated mask in  {d24, d25, d26}       [B, G, R]
-     */
-    vmull.u8    q0,  d24, d8
-    vmull.u8    q1,  d25, d9
-    vmull.u8    q6,  d26, d10
-    vmull.u8    q9,  d11, d25
-    vmull.u8    q12, d11, d24
-    vmull.u8    q13, d11, d26
-    vrshr.u16   q8,  q0,  #8
-    vrshr.u16   q10, q1,  #8
-    vrshr.u16   q11, q6,  #8
-    vraddhn.u16 d0,  q0,  q8
-    vraddhn.u16 d1,  q1,  q10
-    vraddhn.u16 d2,  q6,  q11
-    vrshr.u16   q11, q12, #8
-    vrshr.u16   q8,  q9,  #8
-    vrshr.u16   q6,  q13, #8
-    vraddhn.u16 d24, q12, q11
-    vraddhn.u16 d25, q9,  q8
-    /*
-     * convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
-     * and put data into d16 - blue, d17 - green, d18 - red
-     */
-       vshrn.u16   d17, q2,  #3
-       vshrn.u16   d18, q2,  #8
-    vraddhn.u16 d26, q13, q6
-       vsli.u16    q2,  q2,  #5
-       vsri.u8     d18, d18, #5
-       vsri.u8     d17, d17, #6
-    /*
-     * 'combine_over_ca' replacement
-     *
-     * output: updated dest in d16 - blue, d17 - green, d18 - red
-     */
-    vmvn.8      q12, q12
-       vshrn.u16   d16, q2,  #2
-    vmvn.8      d26, d26
-    vmull.u8    q6,  d16, d24
-    vmull.u8    q7,  d17, d25
-    vmull.u8    q11, d18, d26
-.endm
-
-.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail
-    /* ... continue 'combine_over_ca' replacement */
-    vrshr.u16   q10, q6,  #8
-    vrshr.u16   q14, q7,  #8
-    vrshr.u16   q15, q11, #8
-    vraddhn.u16 d16, q10, q6
-    vraddhn.u16 d17, q14, q7
-    vraddhn.u16 d18, q15, q11
-    vqadd.u8    q8,  q0,  q8
-    vqadd.u8    d18, d2,  d18
-    /*
-     * convert the results in d16, d17, d18 to r5g6b5 and store
-     * them into {d28, d29}
-     */
-    vshll.u8    q14, d18, #8
-    vshll.u8    q10, d17, #8
-    vshll.u8    q15, d16, #8
-    vsri.u16    q14, q10, #5
-    vsri.u16    q14, q15, #11
-.endm
-
-.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head
-    fetch_mask_pixblock
-        vrshr.u16   q10, q6, #8
-        vrshr.u16   q14, q7, #8
-    vld1.16     {d4, d5}, [DST_R, :128]!
-        vrshr.u16   q15, q11, #8
-        vraddhn.u16 d16, q10, q6
-        vraddhn.u16 d17, q14, q7
-        vraddhn.u16 d22, q15, q11
-            /* process_pixblock_head */
-            /*
-             * 'combine_mask_ca' replacement
-             *
-             * input:  solid src (n) in {d8,  d9,  d10, d11}  [B, G, R, A]
-             *         mask in          {d24, d25, d26}       [B, G, R]
-             * output: updated src in   {d0,  d1,  d2 }       [B, G, R]
-             *         updated mask in  {d24, d25, d26}       [B, G, R]
-             */
-            vmull.u8    q6,  d26, d10
-        vqadd.u8    q8,  q0, q8
-            vmull.u8    q0,  d24, d8
-        vqadd.u8    d22, d2, d22
-            vmull.u8    q1,  d25, d9
-        /*
-         * convert the result in d16, d17, d22 to r5g6b5 and store
-         * it into {d28, d29}
-         */
-        vshll.u8    q14, d22, #8
-        vshll.u8    q10, d17, #8
-        vshll.u8    q15, d16, #8
-            vmull.u8    q9,  d11, d25
-        vsri.u16    q14, q10, #5
-            vmull.u8    q12, d11, d24
-            vmull.u8    q13, d11, d26
-        vsri.u16    q14, q15, #11
-    cache_preload 8, 8
-            vrshr.u16   q8,  q0,  #8
-            vrshr.u16   q10, q1,  #8
-            vrshr.u16   q11, q6,  #8
-            vraddhn.u16 d0,  q0,  q8
-            vraddhn.u16 d1,  q1,  q10
-            vraddhn.u16 d2,  q6,  q11
-            vrshr.u16   q11, q12, #8
-            vrshr.u16   q8,  q9,  #8
-            vrshr.u16   q6,  q13, #8
-            vraddhn.u16 d24, q12, q11
-            vraddhn.u16 d25, q9,  q8
-                /*
-                 * convert 8 r5g6b5 pixel data from {d4, d5} to planar
-	         * 8-bit format and put data into d16 - blue, d17 - green,
-	         * d18 - red
-                 */
-                vshrn.u16   d17, q2,  #3
-                vshrn.u16   d18, q2,  #8
-            vraddhn.u16 d26, q13, q6
-                vsli.u16    q2,  q2,  #5
-                vsri.u8     d17, d17, #6
-                vsri.u8     d18, d18, #5
-            /*
-             * 'combine_over_ca' replacement
-             *
-             * output: updated dest in d16 - blue, d17 - green, d18 - red
-             */
-            vmvn.8      q12, q12
-                vshrn.u16   d16, q2,  #2
-            vmvn.8      d26, d26
-            vmull.u8    q7,  d17, d25
-            vmull.u8    q6,  d16, d24
-            vmull.u8    q11, d18, d26
-    vst1.16     {d28, d29}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_over_n_8888_0565_ca_init
-    add         DUMMY, sp, #ARGS_STACK_OFFSET
-    vpush       {d8-d15}
-    vld1.32     {d11[0]}, [DUMMY]
-    vdup.8      d8, d11[0]
-    vdup.8      d9, d11[1]
-    vdup.8      d10, d11[2]
-    vdup.8      d11, d11[3]
-.endm
-
-.macro pixman_composite_over_n_8888_0565_ca_cleanup
-    vpop        {d8-d15}
-.endm
-
-generate_composite_function \
-    pixman_composite_over_n_8888_0565_ca_asm_neon, 0, 32, 16, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_n_8888_0565_ca_init, \
-    pixman_composite_over_n_8888_0565_ca_cleanup, \
-    pixman_composite_over_n_8888_0565_ca_process_pixblock_head, \
-    pixman_composite_over_n_8888_0565_ca_process_pixblock_tail, \
-    pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_in_n_8_process_pixblock_head
-    /* expecting source data in {d0, d1, d2, d3} */
-    /* and destination data in {d4, d5, d6, d7} */
-    vmull.u8    q8,  d4,  d3
-    vmull.u8    q9,  d5,  d3
-    vmull.u8    q10, d6,  d3
-    vmull.u8    q11, d7,  d3
-.endm
-
-.macro pixman_composite_in_n_8_process_pixblock_tail
-    vrshr.u16   q14, q8,  #8
-    vrshr.u16   q15, q9,  #8
-    vrshr.u16   q12, q10, #8
-    vrshr.u16   q13, q11, #8
-    vraddhn.u16 d28, q8,  q14
-    vraddhn.u16 d29, q9,  q15
-    vraddhn.u16 d30, q10, q12
-    vraddhn.u16 d31, q11, q13
-.endm
-
-.macro pixman_composite_in_n_8_process_pixblock_tail_head
-    pixman_composite_in_n_8_process_pixblock_tail
-    vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
-    cache_preload 32, 32
-    pixman_composite_in_n_8_process_pixblock_head
-    vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_in_n_8_init
-    add         DUMMY, sp, #ARGS_STACK_OFFSET
-    vld1.32     {d3[0]}, [DUMMY]
-    vdup.8      d3, d3[3]
-.endm
-
-.macro pixman_composite_in_n_8_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_in_n_8_asm_neon, 0, 0, 8, \
-    FLAG_DST_READWRITE, \
-    32, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_in_n_8_init, \
-    pixman_composite_in_n_8_cleanup, \
-    pixman_composite_in_n_8_process_pixblock_head, \
-    pixman_composite_in_n_8_process_pixblock_tail, \
-    pixman_composite_in_n_8_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-.macro pixman_composite_add_n_8_8_process_pixblock_head
-    /* expecting source data in {d8, d9, d10, d11} */
-    /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
-    /* and destination data in {d4, d5, d6, d7} */
-    /* mask is in d24, d25, d26, d27 */
-    vmull.u8    q0, d24, d11
-    vmull.u8    q1, d25, d11
-    vmull.u8    q6, d26, d11
-    vmull.u8    q7, d27, d11
-    vrshr.u16   q10, q0, #8
-    vrshr.u16   q11, q1, #8
-    vrshr.u16   q12, q6, #8
-    vrshr.u16   q13, q7, #8
-    vraddhn.u16 d0, q0, q10
-    vraddhn.u16 d1, q1, q11
-    vraddhn.u16 d2, q6, q12
-    vraddhn.u16 d3, q7, q13
-    vqadd.u8    q14, q0, q2
-    vqadd.u8    q15, q1, q3
-.endm
-
-.macro pixman_composite_add_n_8_8_process_pixblock_tail
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_add_n_8_8_process_pixblock_tail_head
-    pixman_composite_add_n_8_8_process_pixblock_tail
-    vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
-    vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
-    fetch_mask_pixblock
-    cache_preload 32, 32
-    pixman_composite_add_n_8_8_process_pixblock_head
-.endm
-
-.macro pixman_composite_add_n_8_8_init
-    add         DUMMY, sp, #ARGS_STACK_OFFSET
-    vpush       {d8-d15}
-    vld1.32     {d11[0]}, [DUMMY]
-    vdup.8      d11, d11[3]
-.endm
-
-.macro pixman_composite_add_n_8_8_cleanup
-    vpop        {d8-d15}
-.endm
-
-generate_composite_function \
-    pixman_composite_add_n_8_8_asm_neon, 0, 8, 8, \
-    FLAG_DST_READWRITE, \
-    32, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_add_n_8_8_init, \
-    pixman_composite_add_n_8_8_cleanup, \
-    pixman_composite_add_n_8_8_process_pixblock_head, \
-    pixman_composite_add_n_8_8_process_pixblock_tail, \
-    pixman_composite_add_n_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8_8_8_process_pixblock_head
-    /* expecting source data in {d0, d1, d2, d3} */
-    /* destination data in {d4, d5, d6, d7} */
-    /* mask in {d24, d25, d26, d27} */
-    vmull.u8    q8, d24, d0
-    vmull.u8    q9, d25, d1
-    vmull.u8    q10, d26, d2
-    vmull.u8    q11, d27, d3
-    vrshr.u16   q0, q8, #8
-    vrshr.u16   q1, q9, #8
-    vrshr.u16   q12, q10, #8
-    vrshr.u16   q13, q11, #8
-    vraddhn.u16 d0, q0, q8
-    vraddhn.u16 d1, q1, q9
-    vraddhn.u16 d2, q12, q10
-    vraddhn.u16 d3, q13, q11
-    vqadd.u8    q14, q0, q2
-    vqadd.u8    q15, q1, q3
-.endm
-
-.macro pixman_composite_add_8_8_8_process_pixblock_tail
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_add_8_8_8_process_pixblock_tail_head
-    pixman_composite_add_8_8_8_process_pixblock_tail
-    vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
-    vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
-    fetch_mask_pixblock
-    fetch_src_pixblock
-    cache_preload 32, 32
-    pixman_composite_add_8_8_8_process_pixblock_head
-.endm
-
-.macro pixman_composite_add_8_8_8_init
-.endm
-
-.macro pixman_composite_add_8_8_8_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_add_8_8_8_asm_neon, 8, 8, 8, \
-    FLAG_DST_READWRITE, \
-    32, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_add_8_8_8_init, \
-    pixman_composite_add_8_8_8_cleanup, \
-    pixman_composite_add_8_8_8_process_pixblock_head, \
-    pixman_composite_add_8_8_8_process_pixblock_tail, \
-    pixman_composite_add_8_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8888_8888_8888_process_pixblock_head
-    /* expecting source data in {d0, d1, d2, d3} */
-    /* destination data in {d4, d5, d6, d7} */
-    /* mask in {d24, d25, d26, d27} */
-    vmull.u8    q8,  d27, d0
-    vmull.u8    q9,  d27, d1
-    vmull.u8    q10, d27, d2
-    vmull.u8    q11, d27, d3
-    /* 1 cycle bubble */
-    vrsra.u16   q8,  q8,  #8
-    vrsra.u16   q9,  q9,  #8
-    vrsra.u16   q10, q10, #8
-    vrsra.u16   q11, q11, #8
-.endm
-
-.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail
-    /* 2 cycle bubble */
-    vrshrn.u16  d28, q8,  #8
-    vrshrn.u16  d29, q9,  #8
-    vrshrn.u16  d30, q10, #8
-    vrshrn.u16  d31, q11, #8
-    vqadd.u8    q14, q2,  q14
-    /* 1 cycle bubble */
-    vqadd.u8    q15, q3,  q15
-.endm
-
-.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
-    fetch_src_pixblock
-        vrshrn.u16  d28, q8,  #8
-    fetch_mask_pixblock
-        vrshrn.u16  d29, q9,  #8
-    vmull.u8    q8,  d27, d0
-        vrshrn.u16  d30, q10, #8
-    vmull.u8    q9,  d27, d1
-        vrshrn.u16  d31, q11, #8
-    vmull.u8    q10, d27, d2
-        vqadd.u8    q14, q2,  q14
-    vmull.u8    q11, d27, d3
-        vqadd.u8    q15, q3,  q15
-    vrsra.u16   q8,  q8,  #8
-    vld4.8      {d4, d5, d6, d7}, [DST_R, :128]!
-    vrsra.u16   q9,  q9,  #8
-        vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
-    vrsra.u16   q10, q10, #8
-
-    cache_preload 8, 8
-
-    vrsra.u16   q11, q11, #8
-.endm
-
-generate_composite_function \
-    pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
-
-generate_composite_function_single_scanline \
-    pixman_composite_scanline_add_mask_asm_neon, 32, 32, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-generate_composite_function \
-    pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    27  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_add_n_8_8888_init
-    add         DUMMY, sp, #ARGS_STACK_OFFSET
-    vld1.32     {d3[0]}, [DUMMY]
-    vdup.8      d0, d3[0]
-    vdup.8      d1, d3[1]
-    vdup.8      d2, d3[2]
-    vdup.8      d3, d3[3]
-.endm
-
-.macro pixman_composite_add_n_8_8888_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_add_n_8_8888_init, \
-    pixman_composite_add_n_8_8888_cleanup, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    27  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8888_n_8888_init
-    add         DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
-    vld1.32     {d27[0]}, [DUMMY]
-    vdup.8      d27, d27[3]
-.endm
-
-.macro pixman_composite_add_8888_n_8888_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_add_8888_n_8888_init, \
-    pixman_composite_add_8888_n_8888_cleanup, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    27  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
-    /* expecting source data in {d0, d1, d2, d3} */
-    /* destination data in {d4, d5, d6, d7} */
-    /* solid mask is in d15 */
-
-    /* 'in' */
-    vmull.u8    q8, d15, d3
-    vmull.u8    q6, d15, d2
-    vmull.u8    q5, d15, d1
-    vmull.u8    q4, d15, d0
-    vrshr.u16   q13, q8, #8
-    vrshr.u16   q12, q6, #8
-    vrshr.u16   q11, q5, #8
-    vrshr.u16   q10, q4, #8
-    vraddhn.u16 d3, q8, q13
-    vraddhn.u16 d2, q6, q12
-    vraddhn.u16 d1, q5, q11
-    vraddhn.u16 d0, q4, q10
-    vmvn.8      d24, d3  /* get inverted alpha */
-    /* now do alpha blending */
-    vmull.u8    q8, d24, d4
-    vmull.u8    q9, d24, d5
-    vmull.u8    q10, d24, d6
-    vmull.u8    q11, d24, d7
-.endm
-
-.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
-    vrshr.u16   q14, q8, #8
-    vrshr.u16   q15, q9, #8
-    vrshr.u16   q12, q10, #8
-    vrshr.u16   q13, q11, #8
-    vraddhn.u16 d28, q14, q8
-    vraddhn.u16 d29, q15, q9
-    vraddhn.u16 d30, q12, q10
-    vraddhn.u16 d31, q13, q11
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head
-    vld4.8     {d4, d5, d6, d7}, [DST_R, :128]!
-    pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
-    fetch_src_pixblock
-    cache_preload 8, 8
-    fetch_mask_pixblock
-    pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
-    vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-generate_composite_function_single_scanline \
-    pixman_composite_scanline_out_reverse_mask_asm_neon, 32, 32, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_out_reverse_8888_n_8888_process_pixblock_head, \
-    pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail, \
-    pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    12  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_8888_n_8888_process_pixblock_head
-    pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_over_8888_n_8888_process_pixblock_tail
-    pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
-    vqadd.u8    q14, q0, q14
-    vqadd.u8    q15, q1, q15
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_8888_n_8888_process_pixblock_tail_head
-    vld4.8     {d4, d5, d6, d7}, [DST_R, :128]!
-    pixman_composite_over_8888_n_8888_process_pixblock_tail
-    fetch_src_pixblock
-    cache_preload 8, 8
-    pixman_composite_over_8888_n_8888_process_pixblock_head
-    vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_over_8888_n_8888_init
-    add         DUMMY, sp, #48
-    vpush       {d8-d15}
-    vld1.32     {d15[0]}, [DUMMY]
-    vdup.8      d15, d15[3]
-.endm
-
-.macro pixman_composite_over_8888_n_8888_cleanup
-    vpop        {d8-d15}
-.endm
-
-generate_composite_function \
-    pixman_composite_over_8888_n_8888_asm_neon, 32, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_8888_n_8888_init, \
-    pixman_composite_over_8888_n_8888_cleanup, \
-    pixman_composite_over_8888_n_8888_process_pixblock_head, \
-    pixman_composite_over_8888_n_8888_process_pixblock_tail, \
-    pixman_composite_over_8888_n_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head
-    vld4.8     {d4, d5, d6, d7}, [DST_R, :128]!
-    pixman_composite_over_8888_n_8888_process_pixblock_tail
-    fetch_src_pixblock
-    cache_preload 8, 8
-    fetch_mask_pixblock
-    pixman_composite_over_8888_n_8888_process_pixblock_head
-    vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-generate_composite_function \
-    pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_over_8888_n_8888_process_pixblock_head, \
-    pixman_composite_over_8888_n_8888_process_pixblock_tail, \
-    pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    12  /* mask_basereg  */
-
-generate_composite_function_single_scanline \
-    pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_over_8888_n_8888_process_pixblock_head, \
-    pixman_composite_over_8888_n_8888_process_pixblock_tail, \
-    pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    12  /* mask_basereg  */
-
-/******************************************************************************/
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head
-    vld4.8     {d4, d5, d6, d7}, [DST_R, :128]!
-    pixman_composite_over_8888_n_8888_process_pixblock_tail
-    fetch_src_pixblock
-    cache_preload 8, 8
-    fetch_mask_pixblock
-    pixman_composite_over_8888_n_8888_process_pixblock_head
-    vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-generate_composite_function \
-    pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_over_8888_n_8888_process_pixblock_head, \
-    pixman_composite_over_8888_n_8888_process_pixblock_tail, \
-    pixman_composite_over_8888_8_8888_process_pixblock_tail_head \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    15  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0888_0888_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_0888_0888_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_0888_0888_process_pixblock_tail_head
-    vst3.8 {d0, d1, d2}, [DST_W]!
-    fetch_src_pixblock
-    cache_preload 8, 8
-.endm
-
-generate_composite_function \
-    pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \
-    FLAG_DST_WRITEONLY, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_0888_0888_process_pixblock_head, \
-    pixman_composite_src_0888_0888_process_pixblock_tail, \
-    pixman_composite_src_0888_0888_process_pixblock_tail_head, \
-    0, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0888_8888_rev_process_pixblock_head
-    vswp   d0, d2
-.endm
-
-.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head
-    vst4.8 {d0, d1, d2, d3}, [DST_W]!
-    fetch_src_pixblock
-    vswp   d0, d2
-    cache_preload 8, 8
-.endm
-
-.macro pixman_composite_src_0888_8888_rev_init
-    veor   d3, d3, d3
-.endm
-
-generate_composite_function \
-    pixman_composite_src_0888_8888_rev_asm_neon, 24, 0, 32, \
-    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    pixman_composite_src_0888_8888_rev_init, \
-    default_cleanup, \
-    pixman_composite_src_0888_8888_rev_process_pixblock_head, \
-    pixman_composite_src_0888_8888_rev_process_pixblock_tail, \
-    pixman_composite_src_0888_8888_rev_process_pixblock_tail_head, \
-    0, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0888_0565_rev_process_pixblock_head
-    vshll.u8    q8, d1, #8
-    vshll.u8    q9, d2, #8
-.endm
-
-.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail
-    vshll.u8    q14, d0, #8
-    vsri.u16    q14, q8, #5
-    vsri.u16    q14, q9, #11
-.endm
-
-.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head
-        vshll.u8    q14, d0, #8
-    fetch_src_pixblock
-        vsri.u16    q14, q8, #5
-        vsri.u16    q14, q9, #11
-    vshll.u8    q8, d1, #8
-        vst1.16 {d28, d29}, [DST_W, :128]!
-    vshll.u8    q9, d2, #8
-.endm
-
-generate_composite_function \
-    pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \
-    FLAG_DST_WRITEONLY, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_0888_0565_rev_process_pixblock_head, \
-    pixman_composite_src_0888_0565_rev_process_pixblock_tail, \
-    pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_pixbuf_8888_process_pixblock_head
-    vmull.u8    q8, d3, d0
-    vmull.u8    q9, d3, d1
-    vmull.u8    q10, d3, d2
-.endm
-
-.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail
-    vrshr.u16   q11, q8, #8
-    vswp        d3, d31
-    vrshr.u16   q12, q9, #8
-    vrshr.u16   q13, q10, #8
-    vraddhn.u16 d30, q11, q8
-    vraddhn.u16 d29, q12, q9
-    vraddhn.u16 d28, q13, q10
-.endm
-
-.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head
-        vrshr.u16   q11, q8, #8
-        vswp        d3, d31
-        vrshr.u16   q12, q9, #8
-        vrshr.u16   q13, q10, #8
-    fetch_src_pixblock
-        vraddhn.u16 d30, q11, q8
-                                    PF add PF_X, PF_X, #8
-                                    PF tst PF_CTL, #0xF
-                                    PF addne PF_X, PF_X, #8
-                                    PF subne PF_CTL, PF_CTL, #1
-        vraddhn.u16 d29, q12, q9
-        vraddhn.u16 d28, q13, q10
-    vmull.u8    q8, d3, d0
-    vmull.u8    q9, d3, d1
-    vmull.u8    q10, d3, d2
-        vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
-                                    PF cmp PF_X, ORIG_W
-                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
-                                    PF subge PF_X, PF_X, ORIG_W
-                                    PF subges PF_CTL, PF_CTL, #0x10
-                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-.endm
-
-generate_composite_function \
-    pixman_composite_src_pixbuf_8888_asm_neon, 32, 0, 32, \
-    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_pixbuf_8888_process_pixblock_head, \
-    pixman_composite_src_pixbuf_8888_process_pixblock_tail, \
-    pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_rpixbuf_8888_process_pixblock_head
-    vmull.u8    q8, d3, d0
-    vmull.u8    q9, d3, d1
-    vmull.u8    q10, d3, d2
-.endm
-
-.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail
-    vrshr.u16   q11, q8, #8
-    vswp        d3, d31
-    vrshr.u16   q12, q9, #8
-    vrshr.u16   q13, q10, #8
-    vraddhn.u16 d28, q11, q8
-    vraddhn.u16 d29, q12, q9
-    vraddhn.u16 d30, q13, q10
-.endm
-
-.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head
-        vrshr.u16   q11, q8, #8
-        vswp        d3, d31
-        vrshr.u16   q12, q9, #8
-        vrshr.u16   q13, q10, #8
-    fetch_src_pixblock
-        vraddhn.u16 d28, q11, q8
-                                    PF add PF_X, PF_X, #8
-                                    PF tst PF_CTL, #0xF
-                                    PF addne PF_X, PF_X, #8
-                                    PF subne PF_CTL, PF_CTL, #1
-        vraddhn.u16 d29, q12, q9
-        vraddhn.u16 d30, q13, q10
-    vmull.u8    q8, d3, d0
-    vmull.u8    q9, d3, d1
-    vmull.u8    q10, d3, d2
-        vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
-                                    PF cmp PF_X, ORIG_W
-                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
-                                    PF subge PF_X, PF_X, ORIG_W
-                                    PF subges PF_CTL, PF_CTL, #0x10
-                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-.endm
-
-generate_composite_function \
-    pixman_composite_src_rpixbuf_8888_asm_neon, 32, 0, 32, \
-    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_rpixbuf_8888_process_pixblock_head, \
-    pixman_composite_src_rpixbuf_8888_process_pixblock_tail, \
-    pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_0565_8_0565_process_pixblock_head
-    /* mask is in d15 */
-    convert_0565_to_x888 q4, d2, d1, d0
-    convert_0565_to_x888 q5, d6, d5, d4
-    /* source pixel data is in      {d0, d1, d2, XX} */
-    /* destination pixel data is in {d4, d5, d6, XX} */
-    vmvn.8      d7,  d15
-    vmull.u8    q6,  d15, d2
-    vmull.u8    q5,  d15, d1
-    vmull.u8    q4,  d15, d0
-    vmull.u8    q8,  d7,  d4
-    vmull.u8    q9,  d7,  d5
-    vmull.u8    q13, d7,  d6
-    vrshr.u16   q12, q6,  #8
-    vrshr.u16   q11, q5,  #8
-    vrshr.u16   q10, q4,  #8
-    vraddhn.u16 d2,  q6,  q12
-    vraddhn.u16 d1,  q5,  q11
-    vraddhn.u16 d0,  q4,  q10
-.endm
-
-.macro pixman_composite_over_0565_8_0565_process_pixblock_tail
-    vrshr.u16   q14, q8,  #8
-    vrshr.u16   q15, q9,  #8
-    vrshr.u16   q12, q13, #8
-    vraddhn.u16 d28, q14, q8
-    vraddhn.u16 d29, q15, q9
-    vraddhn.u16 d30, q12, q13
-    vqadd.u8    q0,  q0,  q14
-    vqadd.u8    q1,  q1,  q15
-    /* 32bpp result is in {d0, d1, d2, XX} */
-    convert_8888_to_0565 d2, d1, d0, q14, q15, q3
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head
-    fetch_mask_pixblock
-    pixman_composite_over_0565_8_0565_process_pixblock_tail
-    fetch_src_pixblock
-    vld1.16    {d10, d11}, [DST_R, :128]!
-    cache_preload 8, 8
-    pixman_composite_over_0565_8_0565_process_pixblock_head
-    vst1.16    {d28, d29}, [DST_W, :128]!
-.endm
-
-generate_composite_function \
-    pixman_composite_over_0565_8_0565_asm_neon, 16, 8, 16, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_over_0565_8_0565_process_pixblock_head, \
-    pixman_composite_over_0565_8_0565_process_pixblock_tail, \
-    pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    10,  /* dst_r_basereg */ \
-    8,  /* src_basereg   */ \
-    15  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_0565_n_0565_init
-    add         DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
-    vpush       {d8-d15}
-    vld1.32     {d15[0]}, [DUMMY]
-    vdup.8      d15, d15[3]
-.endm
-
-.macro pixman_composite_over_0565_n_0565_cleanup
-    vpop        {d8-d15}
-.endm
-
-generate_composite_function \
-    pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_0565_n_0565_init, \
-    pixman_composite_over_0565_n_0565_cleanup, \
-    pixman_composite_over_0565_8_0565_process_pixblock_head, \
-    pixman_composite_over_0565_8_0565_process_pixblock_tail, \
-    pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    10, /* dst_r_basereg */ \
-    8,  /* src_basereg   */ \
-    15  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_add_0565_8_0565_process_pixblock_head
-    /* mask is in d15 */
-    convert_0565_to_x888 q4, d2, d1, d0
-    convert_0565_to_x888 q5, d6, d5, d4
-    /* source pixel data is in      {d0, d1, d2, XX} */
-    /* destination pixel data is in {d4, d5, d6, XX} */
-    vmull.u8    q6,  d15, d2
-    vmull.u8    q5,  d15, d1
-    vmull.u8    q4,  d15, d0
-    vrshr.u16   q12, q6,  #8
-    vrshr.u16   q11, q5,  #8
-    vrshr.u16   q10, q4,  #8
-    vraddhn.u16 d2,  q6,  q12
-    vraddhn.u16 d1,  q5,  q11
-    vraddhn.u16 d0,  q4,  q10
-.endm
-
-.macro pixman_composite_add_0565_8_0565_process_pixblock_tail
-    vqadd.u8    q0,  q0,  q2
-    vqadd.u8    q1,  q1,  q3
-    /* 32bpp result is in {d0, d1, d2, XX} */
-    convert_8888_to_0565 d2, d1, d0, q14, q15, q3
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head
-    fetch_mask_pixblock
-    pixman_composite_add_0565_8_0565_process_pixblock_tail
-    fetch_src_pixblock
-    vld1.16    {d10, d11}, [DST_R, :128]!
-    cache_preload 8, 8
-    pixman_composite_add_0565_8_0565_process_pixblock_head
-    vst1.16    {d28, d29}, [DST_W, :128]!
-.endm
-
-generate_composite_function \
-    pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_add_0565_8_0565_process_pixblock_head, \
-    pixman_composite_add_0565_8_0565_process_pixblock_tail, \
-    pixman_composite_add_0565_8_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    10, /* dst_r_basereg */ \
-    8,  /* src_basereg   */ \
-    15  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_out_reverse_8_0565_process_pixblock_head
-    /* mask is in d15 */
-    convert_0565_to_x888 q5, d6, d5, d4
-    /* destination pixel data is in {d4, d5, d6, xx} */
-    vmvn.8      d24, d15 /* get inverted alpha */
-    /* now do alpha blending */
-    vmull.u8    q8, d24, d4
-    vmull.u8    q9, d24, d5
-    vmull.u8    q10, d24, d6
-.endm
-
-.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail
-    vrshr.u16   q14, q8, #8
-    vrshr.u16   q15, q9, #8
-    vrshr.u16   q12, q10, #8
-    vraddhn.u16 d0, q14, q8
-    vraddhn.u16 d1, q15, q9
-    vraddhn.u16 d2, q12, q10
-    /* 32bpp result is in {d0, d1, d2, XX} */
-    convert_8888_to_0565 d2, d1, d0, q14, q15, q3
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head
-    fetch_src_pixblock
-    pixman_composite_out_reverse_8_0565_process_pixblock_tail
-    vld1.16    {d10, d11}, [DST_R, :128]!
-    cache_preload 8, 8
-    pixman_composite_out_reverse_8_0565_process_pixblock_head
-    vst1.16    {d28, d29}, [DST_W, :128]!
-.endm
-
-generate_composite_function \
-    pixman_composite_out_reverse_8_0565_asm_neon, 8, 0, 16, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_out_reverse_8_0565_process_pixblock_head, \
-    pixman_composite_out_reverse_8_0565_process_pixblock_tail, \
-    pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    10, /* dst_r_basereg */ \
-    15, /* src_basereg   */ \
-    0   /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_out_reverse_8_8888_process_pixblock_head
-    /* src is in d0 */
-    /* destination pixel data is in {d4, d5, d6, d7} */
-    vmvn.8      d1, d0 /* get inverted alpha */
-    /* now do alpha blending */
-    vmull.u8    q8, d1, d4
-    vmull.u8    q9, d1, d5
-    vmull.u8    q10, d1, d6
-    vmull.u8    q11, d1, d7
-.endm
-
-.macro pixman_composite_out_reverse_8_8888_process_pixblock_tail
-    vrshr.u16   q14, q8, #8
-    vrshr.u16   q15, q9, #8
-    vrshr.u16   q12, q10, #8
-    vrshr.u16   q13, q11, #8
-    vraddhn.u16 d28, q14, q8
-    vraddhn.u16 d29, q15, q9
-    vraddhn.u16 d30, q12, q10
-    vraddhn.u16 d31, q13, q11
-    /* 32bpp result is in {d28, d29, d30, d31} */
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_out_reverse_8_8888_process_pixblock_tail_head
-    fetch_src_pixblock
-    pixman_composite_out_reverse_8_8888_process_pixblock_tail
-    vld4.8    {d4, d5, d6, d7}, [DST_R, :128]!
-    cache_preload 8, 8
-    pixman_composite_out_reverse_8_8888_process_pixblock_head
-    vst4.8    {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-generate_composite_function \
-    pixman_composite_out_reverse_8_8888_asm_neon, 8, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_out_reverse_8_8888_process_pixblock_head, \
-    pixman_composite_out_reverse_8_8888_process_pixblock_tail, \
-    pixman_composite_out_reverse_8_8888_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0   /* mask_basereg  */
-
-/******************************************************************************/
-
-generate_composite_function_nearest_scanline \
-    pixman_scaled_nearest_scanline_8888_8888_OVER_asm_neon, 32, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_over_8888_8888_process_pixblock_head, \
-    pixman_composite_over_8888_8888_process_pixblock_tail, \
-    pixman_composite_over_8888_8888_process_pixblock_tail_head
-
-generate_composite_function_nearest_scanline \
-    pixman_scaled_nearest_scanline_8888_0565_OVER_asm_neon, 32, 0, 16, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_over_8888_0565_process_pixblock_head, \
-    pixman_composite_over_8888_0565_process_pixblock_tail, \
-    pixman_composite_over_8888_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-generate_composite_function_nearest_scanline \
-    pixman_scaled_nearest_scanline_8888_0565_SRC_asm_neon, 32, 0, 16, \
-    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_8888_0565_process_pixblock_head, \
-    pixman_composite_src_8888_0565_process_pixblock_tail, \
-    pixman_composite_src_8888_0565_process_pixblock_tail_head
-
-generate_composite_function_nearest_scanline \
-    pixman_scaled_nearest_scanline_0565_8888_SRC_asm_neon, 16, 0, 32, \
-    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_0565_8888_process_pixblock_head, \
-    pixman_composite_src_0565_8888_process_pixblock_tail, \
-    pixman_composite_src_0565_8888_process_pixblock_tail_head
-
-generate_composite_function_nearest_scanline \
-    pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_neon, 32, 8, 16, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_over_8888_8_0565_process_pixblock_head, \
-    pixman_composite_over_8888_8_0565_process_pixblock_tail, \
-    pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    8,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-generate_composite_function_nearest_scanline \
-    pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_neon, 16, 8, 16, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_over_0565_8_0565_process_pixblock_head, \
-    pixman_composite_over_0565_8_0565_process_pixblock_tail, \
-    pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    10,  /* dst_r_basereg */ \
-    8,  /* src_basereg   */ \
-    15  /* mask_basereg  */
-
-/******************************************************************************/
-
-/*
- * Bilinear scaling support code which tries to provide pixel fetching, color
- * format conversion, and interpolation as separate macros which can be used
- * as the basic building blocks for constructing bilinear scanline functions.
- */
-
-.macro bilinear_load_8888 reg1, reg2, tmp
-    mov       TMP1, X, asr #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, asl #2
-    vld1.32   {reg1}, [TMP1], STRIDE
-    vld1.32   {reg2}, [TMP1]
-.endm
-
-.macro bilinear_load_0565 reg1, reg2, tmp
-    mov       TMP1, X, asr #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, asl #1
-    vld1.32   {reg2[0]}, [TMP1], STRIDE
-    vld1.32   {reg2[1]}, [TMP1]
-    convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
-.endm
-
-.macro bilinear_load_and_vertical_interpolate_two_8888 \
-                    acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2
-
-    bilinear_load_8888 reg1, reg2, tmp1
-    vmull.u8  acc1, reg1, d28
-    vmlal.u8  acc1, reg2, d29
-    bilinear_load_8888 reg3, reg4, tmp2
-    vmull.u8  acc2, reg3, d28
-    vmlal.u8  acc2, reg4, d29
-.endm
-
-.macro bilinear_load_and_vertical_interpolate_four_8888 \
-                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
-                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
-
-    bilinear_load_and_vertical_interpolate_two_8888 \
-                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi
-    bilinear_load_and_vertical_interpolate_two_8888 \
-                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
-.endm
-
-.macro bilinear_load_and_vertical_interpolate_two_0565 \
-                acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi
-
-    mov       TMP1, X, asr #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, asl #1
-    mov       TMP2, X, asr #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, asl #1
-    vld1.32   {acc2lo[0]}, [TMP1], STRIDE
-    vld1.32   {acc2hi[0]}, [TMP2], STRIDE
-    vld1.32   {acc2lo[1]}, [TMP1]
-    vld1.32   {acc2hi[1]}, [TMP2]
-    convert_0565_to_x888 acc2, reg3, reg2, reg1
-    vzip.u8   reg1, reg3
-    vzip.u8   reg2, reg4
-    vzip.u8   reg3, reg4
-    vzip.u8   reg1, reg2
-    vmull.u8  acc1, reg1, d28
-    vmlal.u8  acc1, reg2, d29
-    vmull.u8  acc2, reg3, d28
-    vmlal.u8  acc2, reg4, d29
-.endm
-
-.macro bilinear_load_and_vertical_interpolate_four_0565 \
-                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
-                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
-
-    mov       TMP1, X, asr #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, asl #1
-    mov       TMP2, X, asr #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, asl #1
-    vld1.32   {xacc2lo[0]}, [TMP1], STRIDE
-    vld1.32   {xacc2hi[0]}, [TMP2], STRIDE
-    vld1.32   {xacc2lo[1]}, [TMP1]
-    vld1.32   {xacc2hi[1]}, [TMP2]
-    convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
-    mov       TMP1, X, asr #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, asl #1
-    mov       TMP2, X, asr #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, asl #1
-    vld1.32   {yacc2lo[0]}, [TMP1], STRIDE
-    vzip.u8   xreg1, xreg3
-    vld1.32   {yacc2hi[0]}, [TMP2], STRIDE
-    vzip.u8   xreg2, xreg4
-    vld1.32   {yacc2lo[1]}, [TMP1]
-    vzip.u8   xreg3, xreg4
-    vld1.32   {yacc2hi[1]}, [TMP2]
-    vzip.u8   xreg1, xreg2
-    convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1
-    vmull.u8  xacc1, xreg1, d28
-    vzip.u8   yreg1, yreg3
-    vmlal.u8  xacc1, xreg2, d29
-    vzip.u8   yreg2, yreg4
-    vmull.u8  xacc2, xreg3, d28
-    vzip.u8   yreg3, yreg4
-    vmlal.u8  xacc2, xreg4, d29
-    vzip.u8   yreg1, yreg2
-    vmull.u8  yacc1, yreg1, d28
-    vmlal.u8  yacc1, yreg2, d29
-    vmull.u8  yacc2, yreg3, d28
-    vmlal.u8  yacc2, yreg4, d29
-.endm
-
-.macro bilinear_store_8888 numpix, tmp1, tmp2
-.if numpix == 4
-    vst1.32   {d0, d1}, [OUT, :128]!
-.elseif numpix == 2
-    vst1.32   {d0}, [OUT, :64]!
-.elseif numpix == 1
-    vst1.32   {d0[0]}, [OUT, :32]!
-.else
-    .error bilinear_store_8888 numpix is unsupported
-.endif
-.endm
-
-.macro bilinear_store_0565 numpix, tmp1, tmp2
-    vuzp.u8 d0, d1
-    vuzp.u8 d2, d3
-    vuzp.u8 d1, d3
-    vuzp.u8 d0, d2
-    convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2
-.if numpix == 4
-    vst1.16   {d2}, [OUT, :64]!
-.elseif numpix == 2
-    vst1.32   {d2[0]}, [OUT, :32]!
-.elseif numpix == 1
-    vst1.16   {d2[0]}, [OUT, :16]!
-.else
-    .error bilinear_store_0565 numpix is unsupported
-.endif
-.endm
-
-.macro bilinear_interpolate_last_pixel src_fmt, dst_fmt
-    bilinear_load_&src_fmt d0, d1, d2
-    vmull.u8  q1, d0, d28
-    vmlal.u8  q1, d1, d29
-    /* 5 cycles bubble */
-    vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q0, d2, d30
-    vmlal.u16 q0, d3, d30
-    /* 5 cycles bubble */
-    vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
-    /* 3 cycles bubble */
-    vmovn.u16 d0, q0
-    /* 1 cycle bubble */
-    bilinear_store_&dst_fmt 1, q2, q3
-.endm
-
-.macro bilinear_interpolate_two_pixels src_fmt, dst_fmt
-    bilinear_load_and_vertical_interpolate_two_&src_fmt \
-                q1, q11, d0, d1, d20, d21, d22, d23
-    vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q0, d2, d30
-    vmlal.u16 q0, d3, d30
-    vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q10, d22, d31
-    vmlal.u16 q10, d23, d31
-    vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vadd.u16  q12, q12, q13
-    vmovn.u16 d0, q0
-    bilinear_store_&dst_fmt 2, q2, q3
-.endm
-
-.macro bilinear_interpolate_four_pixels src_fmt, dst_fmt
-    bilinear_load_and_vertical_interpolate_four_&src_fmt \
-                q1, q11, d0, d1, d20, d21, d22, d23 \
-                q3, q9,  d4, d5, d16, d17, d18, d19
-    pld       [TMP1, PF_OFFS]
-    sub       TMP1, TMP1, STRIDE
-    vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q0, d2, d30
-    vmlal.u16 q0, d3, d30
-    vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q10, d22, d31
-    vmlal.u16 q10, d23, d31
-    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vshll.u16 q2, d6, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q2, d6, d30
-    vmlal.u16 q2, d7, d30
-    vshll.u16 q8, d18, #BILINEAR_INTERPOLATION_BITS
-    pld       [TMP2, PF_OFFS]
-    vmlsl.u16 q8, d18, d31
-    vmlal.u16 q8, d19, d31
-    vadd.u16  q12, q12, q13
-    vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32 d5, q8, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vmovn.u16 d0, q0
-    vmovn.u16 d1, q2
-    vadd.u16  q12, q12, q13
-    bilinear_store_&dst_fmt 4, q2, q3
-.endm
-
-.macro bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
-    bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_head
-.else
-    bilinear_interpolate_four_pixels src_fmt, dst_fmt
-.endif
-.endm
-
-.macro bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
-    bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail
-.endif
-.endm
-
-.macro bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
-    bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail_head
-.else
-    bilinear_interpolate_four_pixels src_fmt, dst_fmt
-.endif
-.endm
-
-.macro bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
-    bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_head
-.else
-    bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
-    bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
-.endif
-.endm
-
-.macro bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
-    bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail
-.else
-    bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
-.endif
-.endm
-
-.macro bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
-    bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail_head
-.else
-    bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
-    bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
-.endif
-.endm
-
-.set BILINEAR_FLAG_UNROLL_4,          0
-.set BILINEAR_FLAG_UNROLL_8,          1
-.set BILINEAR_FLAG_USE_ALL_NEON_REGS, 2
-
-/*
- * Main template macro for generating NEON optimized bilinear scanline
- * functions.
- *
- * Bilinear scanline scaler macro template uses the following arguments:
- *  fname             - name of the function to generate
- *  src_fmt           - source color format (8888 or 0565)
- *  dst_fmt           - destination color format (8888 or 0565)
- *  bpp_shift         - (1 << bpp_shift) is the size of source pixel in bytes
- *  prefetch_distance - prefetch in the source image by that many
- *                      pixels ahead
- */
-
-.macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \
-                                       src_bpp_shift, dst_bpp_shift, \
-                                       prefetch_distance, flags
-
-pixman_asm_function fname
-    OUT       .req      r0
-    TOP       .req      r1
-    BOTTOM    .req      r2
-    WT        .req      r3
-    WB        .req      r4
-    X         .req      r5
-    UX        .req      r6
-    WIDTH     .req      ip
-    TMP1      .req      r3
-    TMP2      .req      r4
-    PF_OFFS   .req      r7
-    TMP3      .req      r8
-    TMP4      .req      r9
-    STRIDE    .req      r2
-
-    mov       ip, sp
-    push      {r4, r5, r6, r7, r8, r9}
-    mov       PF_OFFS, #prefetch_distance
-    ldmia     ip, {WB, X, UX, WIDTH}
-    mul       PF_OFFS, PF_OFFS, UX
-
-.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
-    vpush     {d8-d15}
-.endif
-
-    sub       STRIDE, BOTTOM, TOP
-    .unreq    BOTTOM
-
-    cmp       WIDTH, #0
-    ble       3f
-
-    vdup.u16  q12, X
-    vdup.u16  q13, UX
-    vdup.u8   d28, WT
-    vdup.u8   d29, WB
-    vadd.u16  d25, d25, d26
-
-    /* ensure good destination alignment  */
-    cmp       WIDTH, #1
-    blt       0f
-    tst       OUT, #(1 << dst_bpp_shift)
-    beq       0f
-    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vadd.u16  q12, q12, q13
-    bilinear_interpolate_last_pixel src_fmt, dst_fmt
-    sub       WIDTH, WIDTH, #1
-0:
-    vadd.u16  q13, q13, q13
-    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vadd.u16  q12, q12, q13
-
-    cmp       WIDTH, #2
-    blt       0f
-    tst       OUT, #(1 << (dst_bpp_shift + 1))
-    beq       0f
-    bilinear_interpolate_two_pixels src_fmt, dst_fmt
-    sub       WIDTH, WIDTH, #2
-0:
-.if ((flags) & BILINEAR_FLAG_UNROLL_8) != 0
-/*********** 8 pixels per iteration *****************/
-    cmp       WIDTH, #4
-    blt       0f
-    tst       OUT, #(1 << (dst_bpp_shift + 2))
-    beq       0f
-    bilinear_interpolate_four_pixels src_fmt, dst_fmt
-    sub       WIDTH, WIDTH, #4
-0:
-    subs      WIDTH, WIDTH, #8
-    blt       1f
-    mov       PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
-    bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt
-    subs      WIDTH, WIDTH, #8
-    blt       5f
-0:
-    bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt
-    subs      WIDTH, WIDTH, #8
-    bge       0b
-5:
-    bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt
-1:
-    tst       WIDTH, #4
-    beq       2f
-    bilinear_interpolate_four_pixels src_fmt, dst_fmt
-2:
-.else
-/*********** 4 pixels per iteration *****************/
-    subs      WIDTH, WIDTH, #4
-    blt       1f
-    mov       PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
-    bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
-    subs      WIDTH, WIDTH, #4
-    blt       5f
-0:
-    bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
-    subs      WIDTH, WIDTH, #4
-    bge       0b
-5:
-    bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
-1:
-/****************************************************/
-.endif
-    /* handle the remaining trailing pixels */
-    tst       WIDTH, #2
-    beq       2f
-    bilinear_interpolate_two_pixels src_fmt, dst_fmt
-2:
-    tst       WIDTH, #1
-    beq       3f
-    bilinear_interpolate_last_pixel src_fmt, dst_fmt
-3:
-.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
-    vpop      {d8-d15}
-.endif
-    pop       {r4, r5, r6, r7, r8, r9}
-    bx        lr
-
-    .unreq    OUT
-    .unreq    TOP
-    .unreq    WT
-    .unreq    WB
-    .unreq    X
-    .unreq    UX
-    .unreq    WIDTH
-    .unreq    TMP1
-    .unreq    TMP2
-    .unreq    PF_OFFS
-    .unreq    TMP3
-    .unreq    TMP4
-    .unreq    STRIDE
-.endfunc
-
-.endm
-
-/*****************************************************************************/
-
-.set have_bilinear_interpolate_four_pixels_8888_8888, 1
-
-.macro bilinear_interpolate_four_pixels_8888_8888_head
-    mov       TMP1, X, asr #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, asl #2
-    mov       TMP2, X, asr #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, asl #2
-
-    vld1.32   {d22}, [TMP1], STRIDE
-    vld1.32   {d23}, [TMP1]
-    mov       TMP3, X, asr #16
-    add       X, X, UX
-    add       TMP3, TOP, TMP3, asl #2
-    vmull.u8  q8, d22, d28
-    vmlal.u8  q8, d23, d29
-
-    vld1.32   {d22}, [TMP2], STRIDE
-    vld1.32   {d23}, [TMP2]
-    mov       TMP4, X, asr #16
-    add       X, X, UX
-    add       TMP4, TOP, TMP4, asl #2
-    vmull.u8  q9, d22, d28
-    vmlal.u8  q9, d23, d29
-
-    vld1.32   {d22}, [TMP3], STRIDE
-    vld1.32   {d23}, [TMP3]
-    vmull.u8  q10, d22, d28
-    vmlal.u8  q10, d23, d29
-
-    vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q0, d16, d30
-    vmlal.u16 q0, d17, d30
-
-    pld       [TMP4, PF_OFFS]
-    vld1.32   {d16}, [TMP4], STRIDE
-    vld1.32   {d17}, [TMP4]
-    pld       [TMP4, PF_OFFS]
-    vmull.u8  q11, d16, d28
-    vmlal.u8  q11, d17, d29
-
-    vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q1, d18, d31
-.endm
-
-.macro bilinear_interpolate_four_pixels_8888_8888_tail
-    vmlal.u16 q1, d19, d31
-    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q2, d20, d30
-    vmlal.u16 q2, d21, d30
-    vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q3, d22, d31
-    vmlal.u16 q3, d23, d31
-    vadd.u16  q12, q12, q13
-    vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vmovn.u16 d6, q0
-    vmovn.u16 d7, q2
-    vadd.u16  q12, q12, q13
-    vst1.32   {d6, d7}, [OUT, :128]!
-.endm
-
-.macro bilinear_interpolate_four_pixels_8888_8888_tail_head
-    mov       TMP1, X, asr #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, asl #2
-    mov       TMP2, X, asr #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, asl #2
-        vmlal.u16 q1, d19, d31
-        vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-        vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS
-        vmlsl.u16 q2, d20, d30
-        vmlal.u16 q2, d21, d30
-        vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS
-    vld1.32   {d20}, [TMP1], STRIDE
-        vmlsl.u16 q3, d22, d31
-        vmlal.u16 q3, d23, d31
-    vld1.32   {d21}, [TMP1]
-    vmull.u8  q8, d20, d28
-    vmlal.u8  q8, d21, d29
-        vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
-        vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
-        vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vld1.32   {d22}, [TMP2], STRIDE
-        vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS)
-        vadd.u16  q12, q12, q13
-    vld1.32   {d23}, [TMP2]
-    vmull.u8  q9, d22, d28
-    mov       TMP3, X, asr #16
-    add       X, X, UX
-    add       TMP3, TOP, TMP3, asl #2
-    mov       TMP4, X, asr #16
-    add       X, X, UX
-    add       TMP4, TOP, TMP4, asl #2
-    vmlal.u8  q9, d23, d29
-    vld1.32   {d22}, [TMP3], STRIDE
-        vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vld1.32   {d23}, [TMP3]
-    vmull.u8  q10, d22, d28
-    vmlal.u8  q10, d23, d29
-        vmovn.u16 d6, q0
-    vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS
-        vmovn.u16 d7, q2
-    vmlsl.u16 q0, d16, d30
-    vmlal.u16 q0, d17, d30
-    pld       [TMP4, PF_OFFS]
-    vld1.32   {d16}, [TMP4], STRIDE
-        vadd.u16  q12, q12, q13
-    vld1.32   {d17}, [TMP4]
-    pld       [TMP4, PF_OFFS]
-    vmull.u8  q11, d16, d28
-    vmlal.u8  q11, d17, d29
-        vst1.32   {d6, d7}, [OUT, :128]!
-    vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q1, d18, d31
-.endm
-
-/*****************************************************************************/
-
-.set have_bilinear_interpolate_eight_pixels_8888_0565, 1
-
-.macro bilinear_interpolate_eight_pixels_8888_0565_head
-    mov       TMP1, X, asr #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, asl #2
-    mov       TMP2, X, asr #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, asl #2
-    vld1.32   {d20}, [TMP1], STRIDE
-    vld1.32   {d21}, [TMP1]
-    vmull.u8  q8, d20, d28
-    vmlal.u8  q8, d21, d29
-    vld1.32   {d22}, [TMP2], STRIDE
-    vld1.32   {d23}, [TMP2]
-    vmull.u8  q9, d22, d28
-    mov       TMP3, X, asr #16
-    add       X, X, UX
-    add       TMP3, TOP, TMP3, asl #2
-    mov       TMP4, X, asr #16
-    add       X, X, UX
-    add       TMP4, TOP, TMP4, asl #2
-    vmlal.u8  q9, d23, d29
-    vld1.32   {d22}, [TMP3], STRIDE
-    vld1.32   {d23}, [TMP3]
-    vmull.u8  q10, d22, d28
-    vmlal.u8  q10, d23, d29
-    vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q0, d16, d30
-    vmlal.u16 q0, d17, d30
-    pld       [TMP4, PF_OFFS]
-    vld1.32   {d16}, [TMP4], STRIDE
-    vld1.32   {d17}, [TMP4]
-    pld       [TMP4, PF_OFFS]
-    vmull.u8  q11, d16, d28
-    vmlal.u8  q11, d17, d29
-    vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q1, d18, d31
-
-    mov       TMP1, X, asr #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, asl #2
-    mov       TMP2, X, asr #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, asl #2
-        vmlal.u16 q1, d19, d31
-        vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-        vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS
-        vmlsl.u16 q2, d20, d30
-        vmlal.u16 q2, d21, d30
-        vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS
-    vld1.32   {d20}, [TMP1], STRIDE
-        vmlsl.u16 q3, d22, d31
-        vmlal.u16 q3, d23, d31
-    vld1.32   {d21}, [TMP1]
-    vmull.u8  q8, d20, d28
-    vmlal.u8  q8, d21, d29
-        vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
-        vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
-        vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vld1.32   {d22}, [TMP2], STRIDE
-        vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS)
-        vadd.u16  q12, q12, q13
-    vld1.32   {d23}, [TMP2]
-    vmull.u8  q9, d22, d28
-    mov       TMP3, X, asr #16
-    add       X, X, UX
-    add       TMP3, TOP, TMP3, asl #2
-    mov       TMP4, X, asr #16
-    add       X, X, UX
-    add       TMP4, TOP, TMP4, asl #2
-    vmlal.u8  q9, d23, d29
-    vld1.32   {d22}, [TMP3], STRIDE
-        vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vld1.32   {d23}, [TMP3]
-    vmull.u8  q10, d22, d28
-    vmlal.u8  q10, d23, d29
-        vmovn.u16 d8, q0
-    vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS
-        vmovn.u16 d9, q2
-    vmlsl.u16 q0, d16, d30
-    vmlal.u16 q0, d17, d30
-    pld       [TMP4, PF_OFFS]
-    vld1.32   {d16}, [TMP4], STRIDE
-        vadd.u16  q12, q12, q13
-    vld1.32   {d17}, [TMP4]
-    pld       [TMP4, PF_OFFS]
-    vmull.u8  q11, d16, d28
-    vmlal.u8  q11, d17, d29
-    vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q1, d18, d31
-.endm
-
-.macro bilinear_interpolate_eight_pixels_8888_0565_tail
-    vmlal.u16 q1, d19, d31
-    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q2, d20, d30
-    vmlal.u16 q2, d21, d30
-    vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q3, d22, d31
-    vmlal.u16 q3, d23, d31
-    vadd.u16  q12, q12, q13
-    vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vmovn.u16 d10, q0
-    vmovn.u16 d11, q2
-    vadd.u16  q12, q12, q13
-
-    vuzp.u8   d8, d9
-    vuzp.u8   d10, d11
-    vuzp.u8   d9, d11
-    vuzp.u8   d8, d10
-    vshll.u8  q6, d9, #8
-    vshll.u8  q5, d10, #8
-    vshll.u8  q7, d8, #8
-    vsri.u16  q5, q6, #5
-    vsri.u16  q5, q7, #11
-    vst1.32   {d10, d11}, [OUT, :128]!
-.endm
-
-.macro bilinear_interpolate_eight_pixels_8888_0565_tail_head
-    mov       TMP1, X, asr #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, asl #2
-    mov       TMP2, X, asr #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, asl #2
-        vmlal.u16 q1, d19, d31
-        vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-            vuzp.u8 d8, d9
-        vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS
-        vmlsl.u16 q2, d20, d30
-        vmlal.u16 q2, d21, d30
-        vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS
-    vld1.32   {d20}, [TMP1], STRIDE
-        vmlsl.u16 q3, d22, d31
-        vmlal.u16 q3, d23, d31
-    vld1.32   {d21}, [TMP1]
-    vmull.u8  q8, d20, d28
-    vmlal.u8  q8, d21, d29
-        vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
-        vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
-        vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vld1.32   {d22}, [TMP2], STRIDE
-        vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS)
-        vadd.u16  q12, q12, q13
-    vld1.32   {d23}, [TMP2]
-    vmull.u8  q9, d22, d28
-    mov       TMP3, X, asr #16
-    add       X, X, UX
-    add       TMP3, TOP, TMP3, asl #2
-    mov       TMP4, X, asr #16
-    add       X, X, UX
-    add       TMP4, TOP, TMP4, asl #2
-    vmlal.u8  q9, d23, d29
-    vld1.32   {d22}, [TMP3], STRIDE
-        vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vld1.32   {d23}, [TMP3]
-    vmull.u8  q10, d22, d28
-    vmlal.u8  q10, d23, d29
-        vmovn.u16 d10, q0
-    vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS
-        vmovn.u16 d11, q2
-    vmlsl.u16 q0, d16, d30
-    vmlal.u16 q0, d17, d30
-    pld       [TMP4, PF_OFFS]
-    vld1.32   {d16}, [TMP4], STRIDE
-        vadd.u16  q12, q12, q13
-    vld1.32   {d17}, [TMP4]
-    pld       [TMP4, PF_OFFS]
-    vmull.u8  q11, d16, d28
-    vmlal.u8  q11, d17, d29
-            vuzp.u8 d10, d11
-    vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS
-    vmlsl.u16 q1, d18, d31
-
-    mov       TMP1, X, asr #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, asl #2
-    mov       TMP2, X, asr #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, asl #2
-        vmlal.u16 q1, d19, d31
-            vuzp.u8 d9, d11
-        vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-        vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS
-            vuzp.u8 d8, d10
-        vmlsl.u16 q2, d20, d30
-        vmlal.u16 q2, d21, d30
-        vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS
-    vld1.32   {d20}, [TMP1], STRIDE
-        vmlsl.u16 q3, d22, d31
-        vmlal.u16 q3, d23, d31
-    vld1.32   {d21}, [TMP1]
-    vmull.u8  q8, d20, d28
-    vmlal.u8  q8, d21, d29
-            vshll.u8  q6, d9, #8
-            vshll.u8  q5, d10, #8
-            vshll.u8  q7, d8, #8
-        vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
-            vsri.u16  q5, q6, #5
-        vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
-            vsri.u16  q5, q7, #11
-        vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS)
-    vld1.32   {d22}, [TMP2], STRIDE
-        vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS)
-        vadd.u16  q12, q12, q13
-    vld1.32   {d23}, [TMP2]
-    vmull.u8  q9, d22, d28
-    mov       TMP3, X, asr #16
-    add       X, X, UX
-    add       TMP3, TOP, TMP3, asl #2
-    mov       TMP4, X, asr #16
-    add       X, X, UX
-    add       TMP4, TOP, TMP4, asl #2
-    vmlal.u8  q9, d23, d29
-    vld1.32   {d22}, [TMP3], STRIDE
-        vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
-    vld1.32   {d23}, [TMP3]
-    vmull.u8  q10, d22, d28
-    vmlal.u8  q10, d23, d29
-        vmovn.u16 d8, q0
-    vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS
-        vmovn.u16 d9, q2
-    vmlsl.u16 q0, d16, d30
-    vmlal.u16 q0, d17, d30
-    pld       [TMP4, PF_OFFS]
-    vld1.32   {d16}, [TMP4], STRIDE
-        vadd.u16  q12, q12, q13
-    vld1.32   {d17}, [TMP4]
-    pld       [TMP4, PF_OFFS]
-    vmull.u8  q11, d16, d28
-    vmlal.u8  q11, d17, d29
-    vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS
-            vst1.32   {d10, d11}, [OUT, :128]!
-    vmlsl.u16 q1, d18, d31
-.endm
-/*****************************************************************************/
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, \
-    2, 2, 28, BILINEAR_FLAG_UNROLL_4
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, \
-    2, 1, 28, BILINEAR_FLAG_UNROLL_8 | BILINEAR_FLAG_USE_ALL_NEON_REGS
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, \
-    1, 2, 28, BILINEAR_FLAG_UNROLL_4
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, \
-    1, 1, 28, BILINEAR_FLAG_UNROLL_4
diff --git a/vendor/pixman/pixman/pixman-arm-neon-asm.h b/vendor/pixman/pixman/pixman-arm-neon-asm.h
deleted file mode 100644
index bdcf6a9d4..000000000
--- a/vendor/pixman/pixman/pixman-arm-neon-asm.h
+++ /dev/null
@@ -1,1184 +0,0 @@
-/*
- * Copyright © 2009 Nokia Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author:  Siarhei Siamashka (siarhei.siamashka@nokia.com)
- */
-
-/*
- * This file contains a macro ('generate_composite_function') which can
- * construct 2D image processing functions, based on a common template.
- * Any combinations of source, destination and mask images with 8bpp,
- * 16bpp, 24bpp, 32bpp color formats are supported.
- *
- * This macro takes care of:
- *  - handling of leading and trailing unaligned pixels
- *  - doing most of the work related to L2 cache preload
- *  - encourages the use of software pipelining for better instructions
- *    scheduling
- *
- * The user of this macro has to provide some configuration parameters
- * (bit depths for the images, prefetch distance, etc.) and a set of
- * macros, which should implement basic code chunks responsible for
- * pixels processing. See 'pixman-arm-neon-asm.S' file for the usage
- * examples.
- *
- * TODO:
- *  - try overlapped pixel method (from Ian Rickards) when processing
- *    exactly two blocks of pixels
- *  - maybe add an option to do reverse scanline processing
- */
-
-/*
- * Bit flags for 'generate_composite_function' macro which are used
- * to tune generated functions behavior.
- */
-.set FLAG_DST_WRITEONLY,       0
-.set FLAG_DST_READWRITE,       1
-.set FLAG_DEINTERLEAVE_32BPP,  2
-
-/*
- * Offset in stack where mask and source pointer/stride can be accessed
- * from 'init' macro. This is useful for doing special handling for solid mask.
- */
-.set ARGS_STACK_OFFSET,        40
-
-/*
- * Constants for selecting preferable prefetch type.
- */
-.set PREFETCH_TYPE_NONE,       0 /* No prefetch at all */
-.set PREFETCH_TYPE_SIMPLE,     1 /* A simple, fixed-distance-ahead prefetch */
-.set PREFETCH_TYPE_ADVANCED,   2 /* Advanced fine-grained prefetch */
-
-/*
- * Definitions of supplementary pixld/pixst macros (for partial load/store of
- * pixel data).
- */
-
-.macro pixldst1 op, elem_size, reg1, mem_operand, abits
-.if abits > 0
-    op&.&elem_size {d&reg1}, [&mem_operand&, :&abits&]!
-.else
-    op&.&elem_size {d&reg1}, [&mem_operand&]!
-.endif
-.endm
-
-.macro pixldst2 op, elem_size, reg1, reg2, mem_operand, abits
-.if abits > 0
-    op&.&elem_size {d&reg1, d&reg2}, [&mem_operand&, :&abits&]!
-.else
-    op&.&elem_size {d&reg1, d&reg2}, [&mem_operand&]!
-.endif
-.endm
-
-.macro pixldst4 op, elem_size, reg1, reg2, reg3, reg4, mem_operand, abits
-.if abits > 0
-    op&.&elem_size {d&reg1, d&reg2, d&reg3, d&reg4}, [&mem_operand&, :&abits&]!
-.else
-    op&.&elem_size {d&reg1, d&reg2, d&reg3, d&reg4}, [&mem_operand&]!
-.endif
-.endm
-
-.macro pixldst0 op, elem_size, reg1, idx, mem_operand, abits
-    op&.&elem_size {d&reg1[idx]}, [&mem_operand&]!
-.endm
-
-.macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand
-    op&.&elem_size {d&reg1, d&reg2, d&reg3}, [&mem_operand&]!
-.endm
-
-.macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand
-    op&.&elem_size {d&reg1[idx], d&reg2[idx], d&reg3[idx]}, [&mem_operand&]!
-.endm
-
-.macro pixldst numbytes, op, elem_size, basereg, mem_operand, abits
-.if numbytes == 32
-    pixldst4 op, elem_size, %(basereg+4), %(basereg+5), \
-                              %(basereg+6), %(basereg+7), mem_operand, abits
-.elseif numbytes == 16
-    pixldst2 op, elem_size, %(basereg+2), %(basereg+3), mem_operand, abits
-.elseif numbytes == 8
-    pixldst1 op, elem_size, %(basereg+1), mem_operand, abits
-.elseif numbytes == 4
-    .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 32)
-        pixldst0 op, 32, %(basereg+0), 1, mem_operand, abits
-    .elseif elem_size == 16
-        pixldst0 op, 16, %(basereg+0), 2, mem_operand, abits
-        pixldst0 op, 16, %(basereg+0), 3, mem_operand, abits
-    .else
-        pixldst0 op, 8, %(basereg+0), 4, mem_operand, abits
-        pixldst0 op, 8, %(basereg+0), 5, mem_operand, abits
-        pixldst0 op, 8, %(basereg+0), 6, mem_operand, abits
-        pixldst0 op, 8, %(basereg+0), 7, mem_operand, abits
-    .endif
-.elseif numbytes == 2
-    .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 16)
-        pixldst0 op, 16, %(basereg+0), 1, mem_operand, abits
-    .else
-        pixldst0 op, 8, %(basereg+0), 2, mem_operand, abits
-        pixldst0 op, 8, %(basereg+0), 3, mem_operand, abits
-    .endif
-.elseif numbytes == 1
-    pixldst0 op, 8, %(basereg+0), 1, mem_operand, abits
-.else
-    .error "unsupported size: numbytes"
-.endif
-.endm
-
-.macro pixld numpix, bpp, basereg, mem_operand, abits=0
-.if bpp > 0
-.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
-    pixldst4 vld4, 8, %(basereg+4), %(basereg+5), \
-                      %(basereg+6), %(basereg+7), mem_operand, abits
-.elseif (bpp == 24) && (numpix == 8)
-    pixldst3 vld3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
-.elseif (bpp == 24) && (numpix == 4)
-    pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
-    pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
-    pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
-    pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
-.elseif (bpp == 24) && (numpix == 2)
-    pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
-    pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
-.elseif (bpp == 24) && (numpix == 1)
-    pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
-.else
-    pixldst %(numpix * bpp / 8), vld1, %(bpp), basereg, mem_operand, abits
-.endif
-.endif
-.endm
-
-.macro pixst numpix, bpp, basereg, mem_operand, abits=0
-.if bpp > 0
-.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
-    pixldst4 vst4, 8, %(basereg+4), %(basereg+5), \
-                      %(basereg+6), %(basereg+7), mem_operand, abits
-.elseif (bpp == 24) && (numpix == 8)
-    pixldst3 vst3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
-.elseif (bpp == 24) && (numpix == 4)
-    pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
-    pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
-    pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
-    pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
-.elseif (bpp == 24) && (numpix == 2)
-    pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
-    pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
-.elseif (bpp == 24) && (numpix == 1)
-    pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
-.else
-    pixldst %(numpix * bpp / 8), vst1, %(bpp), basereg, mem_operand, abits
-.endif
-.endif
-.endm
-
-.macro pixld_a numpix, bpp, basereg, mem_operand
-.if (bpp * numpix) <= 128
-    pixld numpix, bpp, basereg, mem_operand, %(bpp * numpix)
-.else
-    pixld numpix, bpp, basereg, mem_operand, 128
-.endif
-.endm
-
-.macro pixst_a numpix, bpp, basereg, mem_operand
-.if (bpp * numpix) <= 128
-    pixst numpix, bpp, basereg, mem_operand, %(bpp * numpix)
-.else
-    pixst numpix, bpp, basereg, mem_operand, 128
-.endif
-.endm
-
-/*
- * Pixel fetcher for nearest scaling (needs TMP1, TMP2, VX, UNIT_X register
- * aliases to be defined)
- */
-.macro pixld1_s elem_size, reg1, mem_operand
-.if elem_size == 16
-    mov     TMP1, VX, asr #16
-    adds    VX, VX, UNIT_X
-5:  subpls  VX, VX, SRC_WIDTH_FIXED
-    bpl     5b
-    add     TMP1, mem_operand, TMP1, asl #1
-    mov     TMP2, VX, asr #16
-    adds    VX, VX, UNIT_X
-5:  subpls  VX, VX, SRC_WIDTH_FIXED
-    bpl     5b
-    add     TMP2, mem_operand, TMP2, asl #1
-    vld1.16 {d&reg1&[0]}, [TMP1, :16]
-    mov     TMP1, VX, asr #16
-    adds    VX, VX, UNIT_X
-5:  subpls  VX, VX, SRC_WIDTH_FIXED
-    bpl     5b
-    add     TMP1, mem_operand, TMP1, asl #1
-    vld1.16 {d&reg1&[1]}, [TMP2, :16]
-    mov     TMP2, VX, asr #16
-    adds    VX, VX, UNIT_X
-5:  subpls  VX, VX, SRC_WIDTH_FIXED
-    bpl     5b
-    add     TMP2, mem_operand, TMP2, asl #1
-    vld1.16 {d&reg1&[2]}, [TMP1, :16]
-    vld1.16 {d&reg1&[3]}, [TMP2, :16]
-.elseif elem_size == 32
-    mov     TMP1, VX, asr #16
-    adds    VX, VX, UNIT_X
-5:  subpls  VX, VX, SRC_WIDTH_FIXED
-    bpl     5b
-    add     TMP1, mem_operand, TMP1, asl #2
-    mov     TMP2, VX, asr #16
-    adds    VX, VX, UNIT_X
-5:  subpls  VX, VX, SRC_WIDTH_FIXED
-    bpl     5b
-    add     TMP2, mem_operand, TMP2, asl #2
-    vld1.32 {d&reg1&[0]}, [TMP1, :32]
-    vld1.32 {d&reg1&[1]}, [TMP2, :32]
-.else
-    .error "unsupported"
-.endif
-.endm
-
-.macro pixld2_s elem_size, reg1, reg2, mem_operand
-.if 0 /* elem_size == 32 */
-    mov     TMP1, VX, asr #16
-    add     VX, VX, UNIT_X, asl #1
-    add     TMP1, mem_operand, TMP1, asl #2
-    mov     TMP2, VX, asr #16
-    sub     VX, VX, UNIT_X
-    add     TMP2, mem_operand, TMP2, asl #2
-    vld1.32 {d&reg1&[0]}, [TMP1, :32]
-    mov     TMP1, VX, asr #16
-    add     VX, VX, UNIT_X, asl #1
-    add     TMP1, mem_operand, TMP1, asl #2
-    vld1.32 {d&reg2&[0]}, [TMP2, :32]
-    mov     TMP2, VX, asr #16
-    add     VX, VX, UNIT_X
-    add     TMP2, mem_operand, TMP2, asl #2
-    vld1.32 {d&reg1&[1]}, [TMP1, :32]
-    vld1.32 {d&reg2&[1]}, [TMP2, :32]
-.else
-    pixld1_s elem_size, reg1, mem_operand
-    pixld1_s elem_size, reg2, mem_operand
-.endif
-.endm
-
-.macro pixld0_s elem_size, reg1, idx, mem_operand
-.if elem_size == 16
-    mov     TMP1, VX, asr #16
-    adds    VX, VX, UNIT_X
-5:  subpls  VX, VX, SRC_WIDTH_FIXED
-    bpl     5b
-    add     TMP1, mem_operand, TMP1, asl #1
-    vld1.16 {d&reg1&[idx]}, [TMP1, :16]
-.elseif elem_size == 32
-    mov     TMP1, VX, asr #16
-    adds    VX, VX, UNIT_X
-5:  subpls  VX, VX, SRC_WIDTH_FIXED
-    bpl     5b
-    add     TMP1, mem_operand, TMP1, asl #2
-    vld1.32 {d&reg1&[idx]}, [TMP1, :32]
-.endif
-.endm
-
-.macro pixld_s_internal numbytes, elem_size, basereg, mem_operand
-.if numbytes == 32
-    pixld2_s elem_size, %(basereg+4), %(basereg+5), mem_operand
-    pixld2_s elem_size, %(basereg+6), %(basereg+7), mem_operand
-    pixdeinterleave elem_size, %(basereg+4)
-.elseif numbytes == 16
-    pixld2_s elem_size, %(basereg+2), %(basereg+3), mem_operand
-.elseif numbytes == 8
-    pixld1_s elem_size, %(basereg+1), mem_operand
-.elseif numbytes == 4
-    .if elem_size == 32
-        pixld0_s elem_size, %(basereg+0), 1, mem_operand
-    .elseif elem_size == 16
-        pixld0_s elem_size, %(basereg+0), 2, mem_operand
-        pixld0_s elem_size, %(basereg+0), 3, mem_operand
-    .else
-        pixld0_s elem_size, %(basereg+0), 4, mem_operand
-        pixld0_s elem_size, %(basereg+0), 5, mem_operand
-        pixld0_s elem_size, %(basereg+0), 6, mem_operand
-        pixld0_s elem_size, %(basereg+0), 7, mem_operand
-    .endif
-.elseif numbytes == 2
-    .if elem_size == 16
-        pixld0_s elem_size, %(basereg+0), 1, mem_operand
-    .else
-        pixld0_s elem_size, %(basereg+0), 2, mem_operand
-        pixld0_s elem_size, %(basereg+0), 3, mem_operand
-    .endif
-.elseif numbytes == 1
-    pixld0_s elem_size, %(basereg+0), 1, mem_operand
-.else
-    .error "unsupported size: numbytes"
-.endif
-.endm
-
-.macro pixld_s numpix, bpp, basereg, mem_operand
-.if bpp > 0
-    pixld_s_internal %(numpix * bpp / 8), %(bpp), basereg, mem_operand
-.endif
-.endm
-
-.macro vuzp8 reg1, reg2
-    vuzp.8 d&reg1, d&reg2
-.endm
-
-.macro vzip8 reg1, reg2
-    vzip.8 d&reg1, d&reg2
-.endm
-
-/* deinterleave B, G, R, A channels for eight 32bpp pixels in 4 registers */
-.macro pixdeinterleave bpp, basereg
-.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
-    vuzp8 %(basereg+0), %(basereg+1)
-    vuzp8 %(basereg+2), %(basereg+3)
-    vuzp8 %(basereg+1), %(basereg+3)
-    vuzp8 %(basereg+0), %(basereg+2)
-.endif
-.endm
-
-/* interleave B, G, R, A channels for eight 32bpp pixels in 4 registers */
-.macro pixinterleave bpp, basereg
-.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
-    vzip8 %(basereg+0), %(basereg+2)
-    vzip8 %(basereg+1), %(basereg+3)
-    vzip8 %(basereg+2), %(basereg+3)
-    vzip8 %(basereg+0), %(basereg+1)
-.endif
-.endm
-
-/*
- * This is a macro for implementing cache preload. The main idea is that
- * cache preload logic is mostly independent from the rest of pixels
- * processing code. It starts at the top left pixel and moves forward
- * across pixels and can jump across scanlines. Prefetch distance is
- * handled in an 'incremental' way: it starts from 0 and advances to the
- * optimal distance over time. After reaching optimal prefetch distance,
- * it is kept constant. There are some checks which prevent prefetching
- * unneeded pixel lines below the image (but it still can prefetch a bit
- * more data on the right side of the image - not a big issue and may
- * be actually helpful when rendering text glyphs). Additional trick is
- * the use of LDR instruction for prefetch instead of PLD when moving to
- * the next line, the point is that we have a high chance of getting TLB
- * miss in this case, and PLD would be useless.
- *
- * This sounds like it may introduce a noticeable overhead (when working with
- * fully cached data). But in reality, due to having a separate pipeline and
- * instruction queue for NEON unit in ARM Cortex-A8, normal ARM code can
- * execute simultaneously with NEON and be completely shadowed by it. Thus
- * we get no performance overhead at all (*). This looks like a very nice
- * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher,
- * but still can implement some rather advanced prefetch logic in software
- * for almost zero cost!
- *
- * (*) The overhead of the prefetcher is visible when running some trivial
- * pixels processing like simple copy. Anyway, having prefetch is a must
- * when working with the graphics data.
- */
-.macro PF a, x:vararg
-.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_ADVANCED)
-    a x
-.endif
-.endm
-
-.macro cache_preload std_increment, boost_increment
-.if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0)
-.if regs_shortage
-    PF ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */
-.endif
-.if std_increment != 0
-    PF add PF_X, PF_X, #std_increment
-.endif
-    PF tst PF_CTL, #0xF
-    PF addne PF_X, PF_X, #boost_increment
-    PF subne PF_CTL, PF_CTL, #1
-    PF cmp PF_X, ORIG_W
-.if src_bpp_shift >= 0
-    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
-.endif
-.if dst_r_bpp != 0
-    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
-.endif
-.if mask_bpp_shift >= 0
-    PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift]
-.endif
-    PF subge PF_X, PF_X, ORIG_W
-    PF subges PF_CTL, PF_CTL, #0x10
-.if src_bpp_shift >= 0
-    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-.endif
-.if dst_r_bpp != 0
-    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-.endif
-.if mask_bpp_shift >= 0
-    PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]!
-.endif
-.endif
-.endm
-
-.macro cache_preload_simple
-.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_SIMPLE)
-.if src_bpp > 0
-    pld [SRC, #(PREFETCH_DISTANCE_SIMPLE * src_bpp / 8)]
-.endif
-.if dst_r_bpp > 0
-    pld [DST_R, #(PREFETCH_DISTANCE_SIMPLE * dst_r_bpp / 8)]
-.endif
-.if mask_bpp > 0
-    pld [MASK, #(PREFETCH_DISTANCE_SIMPLE * mask_bpp / 8)]
-.endif
-.endif
-.endm
-
-.macro fetch_mask_pixblock
-    pixld       pixblock_size, mask_bpp, \
-                (mask_basereg - pixblock_size * mask_bpp / 64), MASK
-.endm
-
-/*
- * Macro which is used to process leading pixels until destination
- * pointer is properly aligned (at 16 bytes boundary). When destination
- * buffer uses 16bpp format, this is unnecessary, or even pointless.
- */
-.macro ensure_destination_ptr_alignment process_pixblock_head, \
-                                        process_pixblock_tail, \
-                                        process_pixblock_tail_head
-.if dst_w_bpp != 24
-    tst         DST_R, #0xF
-    beq         2f
-
-.irp lowbit, 1, 2, 4, 8, 16
-local skip1
-.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
-.if lowbit < 16 /* we don't need more than 16-byte alignment */
-    tst         DST_R, #lowbit
-    beq         1f
-.endif
-    pixld_src   (lowbit * 8 / dst_w_bpp), src_bpp, src_basereg, SRC
-    pixld       (lowbit * 8 / dst_w_bpp), mask_bpp, mask_basereg, MASK
-.if dst_r_bpp > 0
-    pixld_a     (lowbit * 8 / dst_r_bpp), dst_r_bpp, dst_r_basereg, DST_R
-.else
-    add         DST_R, DST_R, #lowbit
-.endif
-    PF add      PF_X, PF_X, #(lowbit * 8 / dst_w_bpp)
-    sub         W, W, #(lowbit * 8 / dst_w_bpp)
-1:
-.endif
-.endr
-    pixdeinterleave src_bpp, src_basereg
-    pixdeinterleave mask_bpp, mask_basereg
-    pixdeinterleave dst_r_bpp, dst_r_basereg
-
-    process_pixblock_head
-    cache_preload 0, pixblock_size
-    cache_preload_simple
-    process_pixblock_tail
-
-    pixinterleave dst_w_bpp, dst_w_basereg
-.irp lowbit, 1, 2, 4, 8, 16
-.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
-.if lowbit < 16 /* we don't need more than 16-byte alignment */
-    tst         DST_W, #lowbit
-    beq         1f
-.endif
-    pixst_a     (lowbit * 8 / dst_w_bpp), dst_w_bpp, dst_w_basereg, DST_W
-1:
-.endif
-.endr
-.endif
-2:
-.endm
-
-/*
- * Special code for processing up to (pixblock_size - 1) remaining
- * trailing pixels. As SIMD processing performs operation on
- * pixblock_size pixels, anything smaller than this has to be loaded
- * and stored in a special way. Loading and storing of pixel data is
- * performed in such a way that we fill some 'slots' in the NEON
- * registers (some slots naturally are unused), then perform compositing
- * operation as usual. In the end, the data is taken from these 'slots'
- * and saved to memory.
- *
- * cache_preload_flag - allows to suppress prefetch if
- *                      set to 0
- * dst_aligned_flag   - selects whether destination buffer
- *                      is aligned
- */
-.macro process_trailing_pixels cache_preload_flag, \
-                               dst_aligned_flag, \
-                               process_pixblock_head, \
-                               process_pixblock_tail, \
-                               process_pixblock_tail_head
-    tst         W, #(pixblock_size - 1)
-    beq         2f
-.irp chunk_size, 16, 8, 4, 2, 1
-.if pixblock_size > chunk_size
-    tst         W, #chunk_size
-    beq         1f
-    pixld_src   chunk_size, src_bpp, src_basereg, SRC
-    pixld       chunk_size, mask_bpp, mask_basereg, MASK
-.if dst_aligned_flag != 0
-    pixld_a     chunk_size, dst_r_bpp, dst_r_basereg, DST_R
-.else
-    pixld       chunk_size, dst_r_bpp, dst_r_basereg, DST_R
-.endif
-.if cache_preload_flag != 0
-    PF add      PF_X, PF_X, #chunk_size
-.endif
-1:
-.endif
-.endr
-    pixdeinterleave src_bpp, src_basereg
-    pixdeinterleave mask_bpp, mask_basereg
-    pixdeinterleave dst_r_bpp, dst_r_basereg
-
-    process_pixblock_head
-.if cache_preload_flag != 0
-    cache_preload 0, pixblock_size
-    cache_preload_simple
-.endif
-    process_pixblock_tail
-    pixinterleave dst_w_bpp, dst_w_basereg
-.irp chunk_size, 16, 8, 4, 2, 1
-.if pixblock_size > chunk_size
-    tst         W, #chunk_size
-    beq         1f
-.if dst_aligned_flag != 0
-    pixst_a     chunk_size, dst_w_bpp, dst_w_basereg, DST_W
-.else
-    pixst       chunk_size, dst_w_bpp, dst_w_basereg, DST_W
-.endif
-1:
-.endif
-.endr
-2:
-.endm
-
-/*
- * Macro, which performs all the needed operations to switch to the next
- * scanline and start the next loop iteration unless all the scanlines
- * are already processed.
- */
-.macro advance_to_next_scanline start_of_loop_label
-.if regs_shortage
-    ldrd        W, [sp] /* load W and H (width and height) from stack */
-.else
-    mov         W, ORIG_W
-.endif
-    add         DST_W, DST_W, DST_STRIDE, lsl #dst_bpp_shift
-.if src_bpp != 0
-    add         SRC, SRC, SRC_STRIDE, lsl #src_bpp_shift
-.endif
-.if mask_bpp != 0
-    add         MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift
-.endif
-.if (dst_w_bpp != 24)
-    sub         DST_W, DST_W, W, lsl #dst_bpp_shift
-.endif
-.if (src_bpp != 24) && (src_bpp != 0)
-    sub         SRC, SRC, W, lsl #src_bpp_shift
-.endif
-.if (mask_bpp != 24) && (mask_bpp != 0)
-    sub         MASK, MASK, W, lsl #mask_bpp_shift
-.endif
-    subs        H, H, #1
-    mov         DST_R, DST_W
-.if regs_shortage
-    str         H, [sp, #4] /* save updated height to stack */
-.endif
-    bge         start_of_loop_label
-.endm
-
-/*
- * Registers are allocated in the following way by default:
- * d0, d1, d2, d3     - reserved for loading source pixel data
- * d4, d5, d6, d7     - reserved for loading destination pixel data
- * d24, d25, d26, d27 - reserved for loading mask pixel data
- * d28, d29, d30, d31 - final destination pixel data for writeback to memory
- */
-.macro generate_composite_function fname, \
-                                   src_bpp_, \
-                                   mask_bpp_, \
-                                   dst_w_bpp_, \
-                                   flags, \
-                                   pixblock_size_, \
-                                   prefetch_distance, \
-                                   init, \
-                                   cleanup, \
-                                   process_pixblock_head, \
-                                   process_pixblock_tail, \
-                                   process_pixblock_tail_head, \
-                                   dst_w_basereg_ = 28, \
-                                   dst_r_basereg_ = 4, \
-                                   src_basereg_   = 0, \
-                                   mask_basereg_  = 24
-
-    pixman_asm_function fname
-
-    push        {r4-r12, lr}        /* save all registers */
-
-/*
- * Select prefetch type for this function. If prefetch distance is
- * set to 0 or one of the color formats is 24bpp, SIMPLE prefetch
- * has to be used instead of ADVANCED.
- */
-    .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_DEFAULT
-.if prefetch_distance == 0
-    .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
-.elseif (PREFETCH_TYPE_CURRENT > PREFETCH_TYPE_SIMPLE) && \
-        ((src_bpp_ == 24) || (mask_bpp_ == 24) || (dst_w_bpp_ == 24))
-    .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_SIMPLE
-.endif
-
-/*
- * Make some macro arguments globally visible and accessible
- * from other macros
- */
-    .set src_bpp, src_bpp_
-    .set mask_bpp, mask_bpp_
-    .set dst_w_bpp, dst_w_bpp_
-    .set pixblock_size, pixblock_size_
-    .set dst_w_basereg, dst_w_basereg_
-    .set dst_r_basereg, dst_r_basereg_
-    .set src_basereg, src_basereg_
-    .set mask_basereg, mask_basereg_
-
-    .macro pixld_src x:vararg
-        pixld x
-    .endm
-    .macro fetch_src_pixblock
-        pixld_src   pixblock_size, src_bpp, \
-                    (src_basereg - pixblock_size * src_bpp / 64), SRC
-    .endm
-/*
- * Assign symbolic names to registers
- */
-    W           .req        r0      /* width (is updated during processing) */
-    H           .req        r1      /* height (is updated during processing) */
-    DST_W       .req        r2      /* destination buffer pointer for writes */
-    DST_STRIDE  .req        r3      /* destination image stride */
-    SRC         .req        r4      /* source buffer pointer */
-    SRC_STRIDE  .req        r5      /* source image stride */
-    DST_R       .req        r6      /* destination buffer pointer for reads */
-
-    MASK        .req        r7      /* mask pointer */
-    MASK_STRIDE .req        r8      /* mask stride */
-
-    PF_CTL      .req        r9      /* combined lines counter and prefetch */
-                                    /* distance increment counter */
-    PF_X        .req        r10     /* pixel index in a scanline for current */
-                                    /* pretetch position */
-    PF_SRC      .req        r11     /* pointer to source scanline start */
-                                    /* for prefetch purposes */
-    PF_DST      .req        r12     /* pointer to destination scanline start */
-                                    /* for prefetch purposes */
-    PF_MASK     .req        r14     /* pointer to mask scanline start */
-                                    /* for prefetch purposes */
-/*
- * Check whether we have enough registers for all the local variables.
- * If we don't have enough registers, original width and height are
- * kept on top of stack (and 'regs_shortage' variable is set to indicate
- * this for the rest of code). Even if there are enough registers, the
- * allocation scheme may be a bit different depending on whether source
- * or mask is not used.
- */
-.if (PREFETCH_TYPE_CURRENT < PREFETCH_TYPE_ADVANCED)
-    ORIG_W      .req        r10     /* saved original width */
-    DUMMY       .req        r12     /* temporary register */
-    .set        regs_shortage, 0
-.elseif mask_bpp == 0
-    ORIG_W      .req        r7      /* saved original width */
-    DUMMY       .req        r8      /* temporary register */
-    .set        regs_shortage, 0
-.elseif src_bpp == 0
-    ORIG_W      .req        r4      /* saved original width */
-    DUMMY       .req        r5      /* temporary register */
-    .set        regs_shortage, 0
-.else
-    ORIG_W      .req        r1      /* saved original width */
-    DUMMY       .req        r1      /* temporary register */
-    .set        regs_shortage, 1
-.endif
-
-    .set mask_bpp_shift, -1
-.if src_bpp == 32
-    .set src_bpp_shift, 2
-.elseif src_bpp == 24
-    .set src_bpp_shift, 0
-.elseif src_bpp == 16
-    .set src_bpp_shift, 1
-.elseif src_bpp == 8
-    .set src_bpp_shift, 0
-.elseif src_bpp == 0
-    .set src_bpp_shift, -1
-.else
-    .error "requested src bpp (src_bpp) is not supported"
-.endif
-.if mask_bpp == 32
-    .set mask_bpp_shift, 2
-.elseif mask_bpp == 24
-    .set mask_bpp_shift, 0
-.elseif mask_bpp == 8
-    .set mask_bpp_shift, 0
-.elseif mask_bpp == 0
-    .set mask_bpp_shift, -1
-.else
-    .error "requested mask bpp (mask_bpp) is not supported"
-.endif
-.if dst_w_bpp == 32
-    .set dst_bpp_shift, 2
-.elseif dst_w_bpp == 24
-    .set dst_bpp_shift, 0
-.elseif dst_w_bpp == 16
-    .set dst_bpp_shift, 1
-.elseif dst_w_bpp == 8
-    .set dst_bpp_shift, 0
-.else
-    .error "requested dst bpp (dst_w_bpp) is not supported"
-.endif
-
-.if (((flags) & FLAG_DST_READWRITE) != 0)
-    .set dst_r_bpp, dst_w_bpp
-.else
-    .set dst_r_bpp, 0
-.endif
-.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
-    .set DEINTERLEAVE_32BPP_ENABLED, 1
-.else
-    .set DEINTERLEAVE_32BPP_ENABLED, 0
-.endif
-
-.if prefetch_distance < 0 || prefetch_distance > 15
-    .error "invalid prefetch distance (prefetch_distance)"
-.endif
-
-.if src_bpp > 0
-    ldr         SRC, [sp, #40]
-.endif
-.if mask_bpp > 0
-    ldr         MASK, [sp, #48]
-.endif
-    PF mov      PF_X, #0
-.if src_bpp > 0
-    ldr         SRC_STRIDE, [sp, #44]
-.endif
-.if mask_bpp > 0
-    ldr         MASK_STRIDE, [sp, #52]
-.endif
-    mov         DST_R, DST_W
-
-.if src_bpp == 24
-    sub         SRC_STRIDE, SRC_STRIDE, W
-    sub         SRC_STRIDE, SRC_STRIDE, W, lsl #1
-.endif
-.if mask_bpp == 24
-    sub         MASK_STRIDE, MASK_STRIDE, W
-    sub         MASK_STRIDE, MASK_STRIDE, W, lsl #1
-.endif
-.if dst_w_bpp == 24
-    sub         DST_STRIDE, DST_STRIDE, W
-    sub         DST_STRIDE, DST_STRIDE, W, lsl #1
-.endif
-
-/*
- * Setup advanced prefetcher initial state
- */
-    PF mov      PF_SRC, SRC
-    PF mov      PF_DST, DST_R
-    PF mov      PF_MASK, MASK
-    /* PF_CTL = prefetch_distance | ((h - 1) << 4) */
-    PF mov      PF_CTL, H, lsl #4
-    PF add      PF_CTL, #(prefetch_distance - 0x10)
-
-    init
-.if regs_shortage
-    push        {r0, r1}
-.endif
-    subs        H, H, #1
-.if regs_shortage
-    str         H, [sp, #4] /* save updated height to stack */
-.else
-    mov         ORIG_W, W
-.endif
-    blt         9f
-    cmp         W, #(pixblock_size * 2)
-    blt         8f
-/*
- * This is the start of the pipelined loop, which if optimized for
- * long scanlines
- */
-0:
-    ensure_destination_ptr_alignment process_pixblock_head, \
-                                     process_pixblock_tail, \
-                                     process_pixblock_tail_head
-
-    /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
-    pixld_a     pixblock_size, dst_r_bpp, \
-                (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
-    fetch_src_pixblock
-    pixld       pixblock_size, mask_bpp, \
-                (mask_basereg - pixblock_size * mask_bpp / 64), MASK
-    PF add      PF_X, PF_X, #pixblock_size
-    process_pixblock_head
-    cache_preload 0, pixblock_size
-    cache_preload_simple
-    subs        W, W, #(pixblock_size * 2)
-    blt         2f
-1:
-    process_pixblock_tail_head
-    cache_preload_simple
-    subs        W, W, #pixblock_size
-    bge         1b
-2:
-    process_pixblock_tail
-    pixst_a     pixblock_size, dst_w_bpp, \
-                (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
-
-    /* Process the remaining trailing pixels in the scanline */
-    process_trailing_pixels 1, 1, \
-                            process_pixblock_head, \
-                            process_pixblock_tail, \
-                            process_pixblock_tail_head
-    advance_to_next_scanline 0b
-
-.if regs_shortage
-    pop         {r0, r1}
-.endif
-    cleanup
-    pop         {r4-r12, pc}  /* exit */
-/*
- * This is the start of the loop, designed to process images with small width
- * (less than pixblock_size * 2 pixels). In this case neither pipelining
- * nor prefetch are used.
- */
-8:
-    /* Process exactly pixblock_size pixels if needed */
-    tst         W, #pixblock_size
-    beq         1f
-    pixld       pixblock_size, dst_r_bpp, \
-                (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
-    fetch_src_pixblock
-    pixld       pixblock_size, mask_bpp, \
-                (mask_basereg - pixblock_size * mask_bpp / 64), MASK
-    process_pixblock_head
-    process_pixblock_tail
-    pixst       pixblock_size, dst_w_bpp, \
-                (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
-1:
-    /* Process the remaining trailing pixels in the scanline */
-    process_trailing_pixels 0, 0, \
-                            process_pixblock_head, \
-                            process_pixblock_tail, \
-                            process_pixblock_tail_head
-    advance_to_next_scanline 8b
-9:
-.if regs_shortage
-    pop         {r0, r1}
-.endif
-    cleanup
-    pop         {r4-r12, pc}  /* exit */
-
-    .purgem     fetch_src_pixblock
-    .purgem     pixld_src
-
-    .unreq      SRC
-    .unreq      MASK
-    .unreq      DST_R
-    .unreq      DST_W
-    .unreq      ORIG_W
-    .unreq      W
-    .unreq      H
-    .unreq      SRC_STRIDE
-    .unreq      DST_STRIDE
-    .unreq      MASK_STRIDE
-    .unreq      PF_CTL
-    .unreq      PF_X
-    .unreq      PF_SRC
-    .unreq      PF_DST
-    .unreq      PF_MASK
-    .unreq      DUMMY
-    .endfunc
-.endm
-
-/*
- * A simplified variant of function generation template for a single
- * scanline processing (for implementing pixman combine functions)
- */
-.macro generate_composite_function_scanline        use_nearest_scaling, \
-                                                   fname, \
-                                                   src_bpp_, \
-                                                   mask_bpp_, \
-                                                   dst_w_bpp_, \
-                                                   flags, \
-                                                   pixblock_size_, \
-                                                   init, \
-                                                   cleanup, \
-                                                   process_pixblock_head, \
-                                                   process_pixblock_tail, \
-                                                   process_pixblock_tail_head, \
-                                                   dst_w_basereg_ = 28, \
-                                                   dst_r_basereg_ = 4, \
-                                                   src_basereg_   = 0, \
-                                                   mask_basereg_  = 24
-
-    pixman_asm_function fname
-
-    .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
-/*
- * Make some macro arguments globally visible and accessible
- * from other macros
- */
-    .set src_bpp, src_bpp_
-    .set mask_bpp, mask_bpp_
-    .set dst_w_bpp, dst_w_bpp_
-    .set pixblock_size, pixblock_size_
-    .set dst_w_basereg, dst_w_basereg_
-    .set dst_r_basereg, dst_r_basereg_
-    .set src_basereg, src_basereg_
-    .set mask_basereg, mask_basereg_
-
-.if use_nearest_scaling != 0
-    /*
-     * Assign symbolic names to registers for nearest scaling
-     */
-    W           .req        r0
-    DST_W       .req        r1
-    SRC         .req        r2
-    VX          .req        r3
-    UNIT_X      .req        ip
-    MASK        .req        lr
-    TMP1        .req        r4
-    TMP2        .req        r5
-    DST_R       .req        r6
-    SRC_WIDTH_FIXED .req        r7
-
-    .macro pixld_src x:vararg
-        pixld_s x
-    .endm
-
-    ldr         UNIT_X, [sp]
-    push        {r4-r8, lr}
-    ldr         SRC_WIDTH_FIXED, [sp, #(24 + 4)]
-    .if mask_bpp != 0
-    ldr         MASK, [sp, #(24 + 8)]
-    .endif
-.else
-    /*
-     * Assign symbolic names to registers
-     */
-    W           .req        r0      /* width (is updated during processing) */
-    DST_W       .req        r1      /* destination buffer pointer for writes */
-    SRC         .req        r2      /* source buffer pointer */
-    DST_R       .req        ip      /* destination buffer pointer for reads */
-    MASK        .req        r3      /* mask pointer */
-
-    .macro pixld_src x:vararg
-        pixld x
-    .endm
-.endif
-
-.if (((flags) & FLAG_DST_READWRITE) != 0)
-    .set dst_r_bpp, dst_w_bpp
-.else
-    .set dst_r_bpp, 0
-.endif
-.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
-    .set DEINTERLEAVE_32BPP_ENABLED, 1
-.else
-    .set DEINTERLEAVE_32BPP_ENABLED, 0
-.endif
-
-    .macro fetch_src_pixblock
-        pixld_src   pixblock_size, src_bpp, \
-                    (src_basereg - pixblock_size * src_bpp / 64), SRC
-    .endm
-
-    init
-    mov         DST_R, DST_W
-
-    cmp         W, #pixblock_size
-    blt         8f
-
-    ensure_destination_ptr_alignment process_pixblock_head, \
-                                     process_pixblock_tail, \
-                                     process_pixblock_tail_head
-
-    subs        W, W, #pixblock_size
-    blt         7f
-
-    /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
-    pixld_a     pixblock_size, dst_r_bpp, \
-                (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
-    fetch_src_pixblock
-    pixld       pixblock_size, mask_bpp, \
-                (mask_basereg - pixblock_size * mask_bpp / 64), MASK
-    process_pixblock_head
-    subs        W, W, #pixblock_size
-    blt         2f
-1:
-    process_pixblock_tail_head
-    subs        W, W, #pixblock_size
-    bge         1b
-2:
-    process_pixblock_tail
-    pixst_a     pixblock_size, dst_w_bpp, \
-                (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
-7:
-    /* Process the remaining trailing pixels in the scanline (dst aligned) */
-    process_trailing_pixels 0, 1, \
-                            process_pixblock_head, \
-                            process_pixblock_tail, \
-                            process_pixblock_tail_head
-
-    cleanup
-.if use_nearest_scaling != 0
-    pop         {r4-r8, pc}  /* exit */
-.else
-    bx          lr  /* exit */
-.endif
-8:
-    /* Process the remaining trailing pixels in the scanline (dst unaligned) */
-    process_trailing_pixels 0, 0, \
-                            process_pixblock_head, \
-                            process_pixblock_tail, \
-                            process_pixblock_tail_head
-
-    cleanup
-
-.if use_nearest_scaling != 0
-    pop         {r4-r8, pc}  /* exit */
-
-    .unreq      DST_R
-    .unreq      SRC
-    .unreq      W
-    .unreq      VX
-    .unreq      UNIT_X
-    .unreq      TMP1
-    .unreq      TMP2
-    .unreq      DST_W
-    .unreq      MASK
-    .unreq      SRC_WIDTH_FIXED
-
-.else
-    bx          lr  /* exit */
-
-    .unreq      SRC
-    .unreq      MASK
-    .unreq      DST_R
-    .unreq      DST_W
-    .unreq      W
-.endif
-
-    .purgem     fetch_src_pixblock
-    .purgem     pixld_src
-
-    .endfunc
-.endm
-
-.macro generate_composite_function_single_scanline x:vararg
-    generate_composite_function_scanline 0, x
-.endm
-
-.macro generate_composite_function_nearest_scanline x:vararg
-    generate_composite_function_scanline 1, x
-.endm
-
-/* Default prologue/epilogue, nothing special needs to be done */
-
-.macro default_init
-.endm
-
-.macro default_cleanup
-.endm
-
-/*
- * Prologue/epilogue variant which additionally saves/restores d8-d15
- * registers (they need to be saved/restored by callee according to ABI).
- * This is required if the code needs to use all the NEON registers.
- */
-
-.macro default_init_need_all_regs
-    vpush       {d8-d15}
-.endm
-
-.macro default_cleanup_need_all_regs
-    vpop        {d8-d15}
-.endm
-
-/******************************************************************************/
-
-/*
- * Conversion of 8 r5g6b6 pixels packed in 128-bit register (in)
- * into a planar a8r8g8b8 format (with a, r, g, b color components
- * stored into 64-bit registers out_a, out_r, out_g, out_b respectively).
- *
- * Warning: the conversion is destructive and the original
- *          value (in) is lost.
- */
-.macro convert_0565_to_8888 in, out_a, out_r, out_g, out_b
-    vshrn.u16   out_r, in,    #8
-    vshrn.u16   out_g, in,    #3
-    vsli.u16    in,    in,    #5
-    vmov.u8     out_a, #255
-    vsri.u8     out_r, out_r, #5
-    vsri.u8     out_g, out_g, #6
-    vshrn.u16   out_b, in,    #2
-.endm
-
-.macro convert_0565_to_x888 in, out_r, out_g, out_b
-    vshrn.u16   out_r, in,    #8
-    vshrn.u16   out_g, in,    #3
-    vsli.u16    in,    in,    #5
-    vsri.u8     out_r, out_r, #5
-    vsri.u8     out_g, out_g, #6
-    vshrn.u16   out_b, in,    #2
-.endm
-
-/*
- * Conversion from planar a8r8g8b8 format (with a, r, g, b color components
- * in 64-bit registers in_a, in_r, in_g, in_b respectively) into 8 r5g6b6
- * pixels packed in 128-bit register (out). Requires two temporary 128-bit
- * registers (tmp1, tmp2)
- */
-.macro convert_8888_to_0565 in_r, in_g, in_b, out, tmp1, tmp2
-    vshll.u8    tmp1, in_g, #8
-    vshll.u8    out, in_r, #8
-    vshll.u8    tmp2, in_b, #8
-    vsri.u16    out, tmp1, #5
-    vsri.u16    out, tmp2, #11
-.endm
-
-/*
- * Conversion of four r5g6b5 pixels (in) to four x8r8g8b8 pixels
- * returned in (out0, out1) registers pair. Requires one temporary
- * 64-bit register (tmp). 'out1' and 'in' may overlap, the original
- * value from 'in' is lost
- */
-.macro convert_four_0565_to_x888_packed in, out0, out1, tmp
-    vshl.u16    out0, in,   #5  /* G top 6 bits */
-    vshl.u16    tmp,  in,   #11 /* B top 5 bits */
-    vsri.u16    in,   in,   #5  /* R is ready in top bits */
-    vsri.u16    out0, out0, #6  /* G is ready in top bits */
-    vsri.u16    tmp,  tmp,  #5  /* B is ready in top bits */
-    vshr.u16    out1, in,   #8  /* R is in place */
-    vsri.u16    out0, tmp,  #8  /* G & B is in place */
-    vzip.u16    out0, out1      /* everything is in place */
-.endm
diff --git a/vendor/pixman/pixman/pixman-arm-neon.c b/vendor/pixman/pixman/pixman-arm-neon.c
deleted file mode 100644
index 103f1c2db..000000000
--- a/vendor/pixman/pixman/pixman-arm-neon.c
+++ /dev/null
@@ -1,493 +0,0 @@
-/*
- * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of ARM Ltd not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  ARM Ltd makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author:  Ian Rickards (ian.rickards@arm.com)
- * Author:  Jonathan Morton (jonathan.morton@movial.com)
- * Author:  Markku Vire (markku.vire@movial.com)
- *
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include <string.h>
-#include "pixman-private.h"
-#include "pixman-arm-common.h"
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888,
-                                   uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888,
-                                   uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565,
-                                   uint16_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888,
-                                   uint8_t, 3, uint8_t, 3)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565,
-                                   uint32_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888,
-                                   uint16_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev,
-                                   uint8_t, 3, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev,
-                                   uint8_t, 3, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888,
-                                   uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888,
-                                   uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8,
-                                   uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888,
-                                   uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
-                                   uint32_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
-                                   uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565,
-                                   uint8_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_8888,
-                                   uint8_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565,
-                                 uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888,
-                                 uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888,
-                                 uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8,
-                                 uint8_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565,
-                                      uint8_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888,
-                                      uint8_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca,
-                                      uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_0565_ca,
-				      uint32_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8,
-                                      uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8,
-                                      uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888,
-                                      uint8_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8888,
-                                      uint8_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8,
-                                      uint8_t, 1, uint8_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888,
-                                     uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565,
-                                     uint32_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565,
-                                     uint16_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888,
-                                     uint32_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
-                                        uint8_t, 1, uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565,
-                                        uint16_t, 1, uint8_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888,
-                                        uint32_t, 1, uint8_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
-                                        uint32_t, 1, uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
-                                        uint32_t, 1, uint8_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
-                                        uint32_t, 1, uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565,
-                                        uint32_t, 1, uint8_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565,
-                                        uint16_t, 1, uint8_t, 1, uint16_t, 1)
-
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER,
-                                        uint32_t, uint32_t)
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER,
-                                        uint32_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC,
-                                        uint32_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC,
-                                        uint16_t, uint32_t)
-
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
-                                           OVER, uint32_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
-                                           OVER, uint16_t, uint16_t)
-
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
-                                         uint32_t, uint32_t)
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
-                                         uint32_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
-                                         uint16_t, uint32_t)
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
-                                         uint16_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, OVER,
-                                         uint32_t, uint32_t)
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, ADD,
-                                         uint32_t, uint32_t)
-
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_8888, SRC,
-                                            uint32_t, uint32_t)
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_0565, SRC,
-                                            uint32_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_x888, SRC,
-                                            uint16_t, uint32_t)
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_0565, SRC,
-                                            uint16_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, OVER,
-                                            uint32_t, uint32_t)
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, ADD,
-                                            uint32_t, uint32_t)
-
-void
-pixman_composite_src_n_8_asm_neon (int32_t   w,
-                                   int32_t   h,
-                                   uint8_t  *dst,
-                                   int32_t   dst_stride,
-                                   uint8_t   src);
-
-void
-pixman_composite_src_n_0565_asm_neon (int32_t   w,
-                                      int32_t   h,
-                                      uint16_t *dst,
-                                      int32_t   dst_stride,
-                                      uint16_t  src);
-
-void
-pixman_composite_src_n_8888_asm_neon (int32_t   w,
-                                      int32_t   h,
-                                      uint32_t *dst,
-                                      int32_t   dst_stride,
-                                      uint32_t  src);
-
-static pixman_bool_t
-arm_neon_fill (pixman_implementation_t *imp,
-               uint32_t *               bits,
-               int                      stride,
-               int                      bpp,
-               int                      x,
-               int                      y,
-               int                      width,
-               int                      height,
-	       uint32_t                 _xor)
-{
-    /* stride is always multiple of 32bit units in pixman */
-    int32_t byte_stride = stride * sizeof(uint32_t);
-
-    switch (bpp)
-    {
-    case 8:
-	pixman_composite_src_n_8_asm_neon (
-		width,
-		height,
-		(uint8_t *)(((char *) bits) + y * byte_stride + x),
-		byte_stride,
-		_xor & 0xff);
-	return TRUE;
-    case 16:
-	pixman_composite_src_n_0565_asm_neon (
-		width,
-		height,
-		(uint16_t *)(((char *) bits) + y * byte_stride + x * 2),
-		byte_stride / 2,
-		_xor & 0xffff);
-	return TRUE;
-    case 32:
-	pixman_composite_src_n_8888_asm_neon (
-		width,
-		height,
-		(uint32_t *)(((char *) bits) + y * byte_stride + x * 4),
-		byte_stride / 4,
-		_xor);
-	return TRUE;
-    default:
-	return FALSE;
-    }
-}
-
-static pixman_bool_t
-arm_neon_blt (pixman_implementation_t *imp,
-              uint32_t *               src_bits,
-              uint32_t *               dst_bits,
-              int                      src_stride,
-              int                      dst_stride,
-              int                      src_bpp,
-              int                      dst_bpp,
-              int                      src_x,
-              int                      src_y,
-              int                      dest_x,
-              int                      dest_y,
-              int                      width,
-              int                      height)
-{
-    if (src_bpp != dst_bpp)
-	return FALSE;
-
-    switch (src_bpp)
-    {
-    case 16:
-	pixman_composite_src_0565_0565_asm_neon (
-		width, height,
-		(uint16_t *)(((char *) dst_bits) +
-		dest_y * dst_stride * 4 + dest_x * 2), dst_stride * 2,
-		(uint16_t *)(((char *) src_bits) +
-		src_y * src_stride * 4 + src_x * 2), src_stride * 2);
-	return TRUE;
-    case 32:
-	pixman_composite_src_8888_8888_asm_neon (
-		width, height,
-		(uint32_t *)(((char *) dst_bits) +
-		dest_y * dst_stride * 4 + dest_x * 4), dst_stride,
-		(uint32_t *)(((char *) src_bits) +
-		src_y * src_stride * 4 + src_x * 4), src_stride);
-	return TRUE;
-    default:
-	return FALSE;
-    }
-}
-
-static const pixman_fast_path_t arm_neon_fast_paths[] =
-{
-    PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     r5g6b5,   neon_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     b5g6r5,   neon_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     r5g6b5,   neon_composite_src_8888_0565),
-    PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     r5g6b5,   neon_composite_src_8888_0565),
-    PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     b5g6r5,   neon_composite_src_8888_0565),
-    PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     b5g6r5,   neon_composite_src_8888_0565),
-    PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     a8r8g8b8, neon_composite_src_0565_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     x8r8g8b8, neon_composite_src_0565_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     a8b8g8r8, neon_composite_src_0565_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     x8b8g8r8, neon_composite_src_0565_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     x8r8g8b8, neon_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     x8r8g8b8, neon_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     x8b8g8r8, neon_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     x8b8g8r8, neon_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     a8r8g8b8, neon_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     a8b8g8r8, neon_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     a8r8g8b8, neon_composite_src_x888_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     a8b8g8r8, neon_composite_src_x888_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  r8g8b8,   null,     r8g8b8,   neon_composite_src_0888_0888),
-    PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     x8r8g8b8, neon_composite_src_0888_8888_rev),
-    PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     r5g6b5,   neon_composite_src_0888_0565_rev),
-    PIXMAN_STD_FAST_PATH (SRC,  pixbuf,   pixbuf,   a8r8g8b8, neon_composite_src_pixbuf_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  pixbuf,   pixbuf,   a8b8g8r8, neon_composite_src_rpixbuf_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  rpixbuf,  rpixbuf,  a8r8g8b8, neon_composite_src_rpixbuf_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  rpixbuf,  rpixbuf,  a8b8g8r8, neon_composite_src_pixbuf_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       a8r8g8b8, neon_composite_src_n_8_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       x8r8g8b8, neon_composite_src_n_8_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       a8b8g8r8, neon_composite_src_n_8_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       x8b8g8r8, neon_composite_src_n_8_8888),
-    PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       a8,       neon_composite_src_n_8_8),
-
-    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8,       neon_composite_over_n_8_8),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   neon_composite_over_n_8_0565),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   neon_composite_over_n_8_0565),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8r8g8b8, neon_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8r8g8b8, neon_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8b8g8r8, neon_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8b8g8r8, neon_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     r5g6b5,   neon_composite_over_n_0565),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     a8r8g8b8, neon_composite_over_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     x8r8g8b8, neon_composite_over_n_8888),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5,   neon_composite_over_n_8888_0565_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5,   neon_composite_over_n_8888_0565_ca),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    a8r8g8b8, neon_composite_over_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    x8r8g8b8, neon_composite_over_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    r5g6b5,   neon_composite_over_8888_n_0565),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid,    b5g6r5,   neon_composite_over_8888_n_0565),
-    PIXMAN_STD_FAST_PATH (OVER, r5g6b5,   solid,    r5g6b5,   neon_composite_over_0565_n_0565),
-    PIXMAN_STD_FAST_PATH (OVER, b5g6r5,   solid,    b5g6r5,   neon_composite_over_0565_n_0565),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       a8r8g8b8, neon_composite_over_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       x8r8g8b8, neon_composite_over_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       a8b8g8r8, neon_composite_over_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       x8b8g8r8, neon_composite_over_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       r5g6b5,   neon_composite_over_8888_8_0565),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       b5g6r5,   neon_composite_over_8888_8_0565),
-    PIXMAN_STD_FAST_PATH (OVER, r5g6b5,   a8,       r5g6b5,   neon_composite_over_0565_8_0565),
-    PIXMAN_STD_FAST_PATH (OVER, b5g6r5,   a8,       b5g6r5,   neon_composite_over_0565_8_0565),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, x8r8g8b8, neon_composite_over_8888_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     r5g6b5,   neon_composite_over_8888_0565),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     b5g6r5,   neon_composite_over_8888_0565),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     a8r8g8b8, neon_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     x8r8g8b8, neon_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     a8b8g8r8, neon_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     x8b8g8r8, neon_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null,     a8r8g8b8, neon_composite_src_x888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null,     a8b8g8r8, neon_composite_src_x888_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8,       neon_composite_add_n_8_8),
-    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       x8r8g8b8, neon_composite_add_n_8_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8r8g8b8, neon_composite_add_n_8_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       x8b8g8r8, neon_composite_add_n_8_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8b8g8r8, neon_composite_add_n_8_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8,       a8,       a8,       neon_composite_add_8_8_8),
-    PIXMAN_STD_FAST_PATH (ADD,  r5g6b5,   a8,       r5g6b5,   neon_composite_add_0565_8_0565),
-    PIXMAN_STD_FAST_PATH (ADD,  b5g6r5,   a8,       b5g6r5,   neon_composite_add_0565_8_0565),
-    PIXMAN_STD_FAST_PATH (ADD,  x8r8g8b8, a8,       x8r8g8b8, neon_composite_add_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8,       x8r8g8b8, neon_composite_add_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  x8b8g8r8, a8,       x8b8g8r8, neon_composite_add_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, a8,       x8b8g8r8, neon_composite_add_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8,       a8r8g8b8, neon_composite_add_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, a8,       a8b8g8r8, neon_composite_add_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  x8r8g8b8, a8r8g8b8, x8r8g8b8, neon_composite_add_8888_8888_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8r8g8b8, x8r8g8b8, neon_composite_add_8888_8888_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  x8r8g8b8, solid,    x8r8g8b8, neon_composite_add_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, solid,    x8r8g8b8, neon_composite_add_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  x8b8g8r8, solid,    x8b8g8r8, neon_composite_add_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, solid,    x8b8g8r8, neon_composite_add_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, solid,    a8r8g8b8, neon_composite_add_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, solid,    a8b8g8r8, neon_composite_add_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8,       null,     a8,       neon_composite_add_8_8),
-    PIXMAN_STD_FAST_PATH (ADD,  x8r8g8b8, null,     x8r8g8b8, neon_composite_add_8888_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     x8r8g8b8, neon_composite_add_8888_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  x8b8g8r8, null,     x8b8g8r8, neon_composite_add_8888_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null,     x8b8g8r8, neon_composite_add_8888_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     a8r8g8b8, neon_composite_add_8888_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null,     a8b8g8r8, neon_composite_add_8888_8888),
-    PIXMAN_STD_FAST_PATH (IN,   solid,    null,     a8,       neon_composite_in_n_8),
-    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
-    PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, r5g6b5,   neon_composite_out_reverse_8_0565),
-    PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, b5g6r5,   neon_composite_out_reverse_8_0565),
-    PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, x8r8g8b8, neon_composite_out_reverse_8_8888),
-    PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, a8r8g8b8, neon_composite_out_reverse_8_8888),
-    PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, x8b8g8r8, neon_composite_out_reverse_8_8888),
-    PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, a8b8g8r8, neon_composite_out_reverse_8_8888),
-
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888),
-
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565),
-
-    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
-    SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
-    SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565),
-    SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565),
-
-    SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888),
-    SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888),
-    /* Note: NONE repeat is not supported yet */
-    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
-    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
-    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
-    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
-
-    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565),
-    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565),
-
-    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565),
-    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565),
-
-    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888),
-
-    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
-
-    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565),
-
-    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
-    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
-
-    SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8888),
-    SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8888),
-    SIMPLE_BILINEAR_FAST_PATH (ADD, x8r8g8b8, x8r8g8b8, neon_8888_8888),
-
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8_8888),
-
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_8_0565),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_8_0565),
-
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8_x888),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_8_0565),
-
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
-
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, x8r8g8b8, x8r8g8b8, neon_8888_8_8888),
-
-    { PIXMAN_OP_NONE },
-};
-
-#define BIND_COMBINE_U(name)                                             \
-void                                                                     \
-pixman_composite_scanline_##name##_mask_asm_neon (int32_t         w,     \
-                                                  const uint32_t *dst,   \
-                                                  const uint32_t *src,   \
-                                                  const uint32_t *mask); \
-                                                                         \
-void                                                                     \
-pixman_composite_scanline_##name##_asm_neon (int32_t         w,          \
-                                             const uint32_t *dst,        \
-                                             const uint32_t *src);       \
-                                                                         \
-static void                                                              \
-neon_combine_##name##_u (pixman_implementation_t *imp,                   \
-                         pixman_op_t              op,                    \
-                         uint32_t *               dest,                  \
-                         const uint32_t *         src,                   \
-                         const uint32_t *         mask,                  \
-                         int                      width)                 \
-{                                                                        \
-    if (mask)                                                            \
-	pixman_composite_scanline_##name##_mask_asm_neon (width, dest,   \
-	                                                  src, mask);    \
-    else                                                                 \
-	pixman_composite_scanline_##name##_asm_neon (width, dest, src);  \
-}
-
-BIND_COMBINE_U (over)
-BIND_COMBINE_U (add)
-BIND_COMBINE_U (out_reverse)
-
-pixman_implementation_t *
-_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback)
-{
-    pixman_implementation_t *imp =
-	_pixman_implementation_create (fallback, arm_neon_fast_paths);
-
-    imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
-    imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
-    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u;
-
-    imp->blt = arm_neon_blt;
-    imp->fill = arm_neon_fill;
-
-    return imp;
-}
diff --git a/vendor/pixman/pixman/pixman-arm-simd-asm-scaled.S b/vendor/pixman/pixman/pixman-arm-simd-asm-scaled.S
deleted file mode 100644
index e050292e0..000000000
--- a/vendor/pixman/pixman/pixman-arm-simd-asm-scaled.S
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright © 2008 Mozilla Corporation
- * Copyright © 2010 Nokia Corporation
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Mozilla Corporation not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Mozilla Corporation makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author:  Jeff Muizelaar (jeff@infidigm.net)
- *
- */
-
-/* Prevent the stack from becoming executable */
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
-
-	.text
-	.arch armv6
-	.object_arch armv4
-	.arm
-	.altmacro
-	.p2align 2
-
-#include "pixman-arm-asm.h"
-
-/*
- * Note: This code is only using armv5te instructions (not even armv6),
- *       but is scheduled for ARM Cortex-A8 pipeline. So it might need to
- *       be split into a few variants, tuned for each microarchitecture.
- *
- * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
- * have efficient write combining), it needs to be changed to use 16-byte
- * aligned writes using STM instruction.
- *
- * Nearest scanline scaler macro template uses the following arguments:
- *  fname                     - name of the function to generate
- *  bpp_shift                 - (1 << bpp_shift) is the size of pixel in bytes
- *  t                         - type suffix for LDR/STR instructions
- *  prefetch_distance         - prefetch in the source image by that many
- *                              pixels ahead
- *  prefetch_braking_distance - stop prefetching when that many pixels are
- *                              remaining before the end of scanline
- */
-
-.macro generate_nearest_scanline_func fname, bpp_shift, t,      \
-                                      prefetch_distance,        \
-                                      prefetch_braking_distance
-
-pixman_asm_function fname
-	W		.req	r0
-	DST		.req	r1
-	SRC		.req	r2
-	VX		.req	r3
-	UNIT_X		.req	ip
-	TMP1		.req	r4
-	TMP2		.req	r5
-	VXMASK		.req	r6
-	PF_OFFS		.req	r7
-	SRC_WIDTH_FIXED	.req	r8
-
-	ldr	UNIT_X, [sp]
-	push	{r4, r5, r6, r7, r8, r10}
-	mvn	VXMASK, #((1 << bpp_shift) - 1)
-	ldr	SRC_WIDTH_FIXED, [sp, #28]
-
-	/* define helper macro */
-	.macro	scale_2_pixels
-		ldr&t	TMP1, [SRC, TMP1]
-		and	TMP2, VXMASK, VX, asr #(16 - bpp_shift)
-		adds	VX, VX, UNIT_X
-		str&t	TMP1, [DST], #(1 << bpp_shift)
-9:		subpls	VX, VX, SRC_WIDTH_FIXED
-		bpl	9b
-
-		ldr&t	TMP2, [SRC, TMP2]
-		and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
-		adds	VX, VX, UNIT_X
-		str&t	TMP2, [DST], #(1 << bpp_shift)
-9:		subpls	VX, VX, SRC_WIDTH_FIXED
-		bpl	9b
-	.endm
-
-	/* now do the scaling */
-	and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
-	adds	VX, VX, UNIT_X
-9:	subpls	VX, VX, SRC_WIDTH_FIXED
-	bpl	9b
-	subs	W, W, #(8 + prefetch_braking_distance)
-	blt	2f
-	/* calculate prefetch offset */
-	mov	PF_OFFS, #prefetch_distance
-	mla	PF_OFFS, UNIT_X, PF_OFFS, VX
-1:	/* main loop, process 8 pixels per iteration with prefetch */
-	pld	[SRC, PF_OFFS, asr #(16 - bpp_shift)]
-	add	PF_OFFS, UNIT_X, lsl #3
-	scale_2_pixels
-	scale_2_pixels
-	scale_2_pixels
-	scale_2_pixels
-	subs	W, W, #8
-	bge	1b
-2:
-	subs	W, W, #(4 - 8 - prefetch_braking_distance)
-	blt	2f
-1:	/* process the remaining pixels */
-	scale_2_pixels
-	scale_2_pixels
-	subs	W, W, #4
-	bge	1b
-2:
-	tst	W, #2
-	beq	2f
-	scale_2_pixels
-2:
-	tst	W, #1
-	ldrne&t	TMP1, [SRC, TMP1]
-	strne&t	TMP1, [DST]
-	/* cleanup helper macro */
-	.purgem	scale_2_pixels
-	.unreq	DST
-	.unreq	SRC
-	.unreq	W
-	.unreq	VX
-	.unreq	UNIT_X
-	.unreq	TMP1
-	.unreq	TMP2
-	.unreq	VXMASK
-	.unreq	PF_OFFS
-	.unreq  SRC_WIDTH_FIXED
-	/* return */
-	pop	{r4, r5, r6, r7, r8, r10}
-	bx	lr
-.endfunc
-.endm
-
-generate_nearest_scanline_func \
-    pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
-
-generate_nearest_scanline_func \
-    pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2,  , 48, 32
diff --git a/vendor/pixman/pixman/pixman-arm-simd-asm.S b/vendor/pixman/pixman/pixman-arm-simd-asm.S
deleted file mode 100644
index a74a0a8f3..000000000
--- a/vendor/pixman/pixman/pixman-arm-simd-asm.S
+++ /dev/null
@@ -1,1179 +0,0 @@
-/*
- * Copyright © 2012 Raspberry Pi Foundation
- * Copyright © 2012 RISC OS Open Ltd
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of the copyright holders not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  The copyright holders make no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author:  Ben Avison (bavison@riscosopen.org)
- *
- */
-
-/* Prevent the stack from becoming executable */
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
-
-	.text
-	.arch armv6
-	.object_arch armv4
-	.arm
-	.altmacro
-	.p2align 2
-
-#include "pixman-arm-asm.h"
-#include "pixman-arm-simd-asm.h"
-
-/* A head macro should do all processing which results in an output of up to
- * 16 bytes, as far as the final load instruction. The corresponding tail macro
- * should complete the processing of the up-to-16 bytes. The calling macro will
- * sometimes choose to insert a preload or a decrement of X between them.
- *   cond           ARM condition code for code block
- *   numbytes       Number of output bytes that should be generated this time
- *   firstreg       First WK register in which to place output
- *   unaligned_src  Whether to use non-wordaligned loads of source image
- *   unaligned_mask Whether to use non-wordaligned loads of mask image
- *   preload        If outputting 16 bytes causes 64 bytes to be read, whether an extra preload should be output
- */
-
-.macro blit_init
-        line_saved_regs STRIDE_D, STRIDE_S
-.endm
-
-.macro blit_process_head   cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
-        pixld   cond, numbytes, firstreg, SRC, unaligned_src
-.endm
-
-.macro blit_inner_loop  process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment
-    WK4     .req    STRIDE_D
-    WK5     .req    STRIDE_S
-    WK6     .req    MASK
-    WK7     .req    STRIDE_M
-110:    pixld   , 16, 0, SRC, unaligned_src
-        pixld   , 16, 4, SRC, unaligned_src
-        pld     [SRC, SCRATCH]
-        pixst   , 16, 0, DST
-        pixst   , 16, 4, DST
-        subs    X, X, #32*8/src_bpp
-        bhs     110b
-    .unreq  WK4
-    .unreq  WK5
-    .unreq  WK6
-    .unreq  WK7
-.endm
-
-generate_composite_function \
-    pixman_composite_src_8888_8888_asm_armv6, 32, 0, 32, \
-    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
-    4, /* prefetch distance */ \
-    blit_init, \
-    nop_macro, /* newline */ \
-    nop_macro, /* cleanup */ \
-    blit_process_head, \
-    nop_macro, /* process tail */ \
-    blit_inner_loop
-
-generate_composite_function \
-    pixman_composite_src_0565_0565_asm_armv6, 16, 0, 16, \
-    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
-    4, /* prefetch distance */ \
-    blit_init, \
-    nop_macro, /* newline */ \
-    nop_macro, /* cleanup */ \
-    blit_process_head, \
-    nop_macro, /* process tail */ \
-    blit_inner_loop
-
-generate_composite_function \
-    pixman_composite_src_8_8_asm_armv6, 8, 0, 8, \
-    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
-    3, /* prefetch distance */ \
-    blit_init, \
-    nop_macro, /* newline */ \
-    nop_macro, /* cleanup */ \
-    blit_process_head, \
-    nop_macro, /* process tail */ \
-    blit_inner_loop
-
-/******************************************************************************/
-
-.macro src_n_8888_init
-        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
-        mov     STRIDE_S, SRC
-        mov     MASK, SRC
-        mov     STRIDE_M, SRC
-.endm
-
-.macro src_n_0565_init
-        ldrh    SRC, [sp, #ARGS_STACK_OFFSET]
-        orr     SRC, SRC, lsl #16
-        mov     STRIDE_S, SRC
-        mov     MASK, SRC
-        mov     STRIDE_M, SRC
-.endm
-
-.macro src_n_8_init
-        ldrb    SRC, [sp, #ARGS_STACK_OFFSET]
-        orr     SRC, SRC, lsl #8
-        orr     SRC, SRC, lsl #16
-        mov     STRIDE_S, SRC
-        mov     MASK, SRC
-        mov     STRIDE_M, SRC
-.endm
-
-.macro fill_process_tail  cond, numbytes, firstreg
-    WK4     .req    SRC
-    WK5     .req    STRIDE_S
-    WK6     .req    MASK
-    WK7     .req    STRIDE_M
-        pixst   cond, numbytes, 4, DST
-    .unreq  WK4
-    .unreq  WK5
-    .unreq  WK6
-    .unreq  WK7
-.endm
-
-generate_composite_function \
-    pixman_composite_src_n_8888_asm_armv6, 0, 0, 32, \
-    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
-    0, /* prefetch distance doesn't apply */ \
-    src_n_8888_init \
-    nop_macro, /* newline */ \
-    nop_macro /* cleanup */ \
-    nop_macro /* process head */ \
-    fill_process_tail
-
-generate_composite_function \
-    pixman_composite_src_n_0565_asm_armv6, 0, 0, 16, \
-    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
-    0, /* prefetch distance doesn't apply */ \
-    src_n_0565_init \
-    nop_macro, /* newline */ \
-    nop_macro /* cleanup */ \
-    nop_macro /* process head */ \
-    fill_process_tail
-
-generate_composite_function \
-    pixman_composite_src_n_8_asm_armv6, 0, 0, 8, \
-    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
-    0, /* prefetch distance doesn't apply */ \
-    src_n_8_init \
-    nop_macro, /* newline */ \
-    nop_macro /* cleanup */ \
-    nop_macro /* process head */ \
-    fill_process_tail
-
-/******************************************************************************/
-
-.macro src_x888_8888_pixel, cond, reg
-        orr&cond WK&reg, WK&reg, #0xFF000000
-.endm
-
-.macro pixman_composite_src_x888_8888_process_head   cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
-        pixld   cond, numbytes, firstreg, SRC, unaligned_src
-.endm
-
-.macro pixman_composite_src_x888_8888_process_tail   cond, numbytes, firstreg
-        src_x888_8888_pixel cond, %(firstreg+0)
- .if numbytes >= 8
-        src_x888_8888_pixel cond, %(firstreg+1)
-  .if numbytes == 16
-        src_x888_8888_pixel cond, %(firstreg+2)
-        src_x888_8888_pixel cond, %(firstreg+3)
-  .endif
- .endif
-.endm
-
-generate_composite_function \
-    pixman_composite_src_x888_8888_asm_armv6, 32, 0, 32, \
-    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_SCRATCH, \
-    3, /* prefetch distance */ \
-    nop_macro, /* init */ \
-    nop_macro, /* newline */ \
-    nop_macro, /* cleanup */ \
-    pixman_composite_src_x888_8888_process_head, \
-    pixman_composite_src_x888_8888_process_tail
-
-/******************************************************************************/
-
-.macro src_0565_8888_init
-        /* Hold loop invariants in MASK and STRIDE_M */
-        ldr     MASK, =0x07E007E0
-        mov     STRIDE_M, #0xFF000000
-        /* Set GE[3:0] to 1010 so SEL instructions do what we want */
-        ldr     SCRATCH, =0x80008000
-        uadd8   SCRATCH, SCRATCH, SCRATCH
-.endm
-
-.macro src_0565_8888_2pixels, reg1, reg2
-        and     SCRATCH, WK&reg1, MASK             @ 00000GGGGGG0000000000gggggg00000
-        bic     WK&reg2, WK&reg1, MASK             @ RRRRR000000BBBBBrrrrr000000bbbbb
-        orr     SCRATCH, SCRATCH, SCRATCH, lsr #6  @ 00000GGGGGGGGGGGG0000ggggggggggg
-        mov     WK&reg1, WK&reg2, lsl #16          @ rrrrr000000bbbbb0000000000000000
-        mov     SCRATCH, SCRATCH, ror #19          @ GGGG0000ggggggggggg00000GGGGGGGG
-        bic     WK&reg2, WK&reg2, WK&reg1, lsr #16 @ RRRRR000000BBBBB0000000000000000
-        orr     WK&reg1, WK&reg1, WK&reg1, lsr #5  @ rrrrrrrrrr0bbbbbbbbbb00000000000
-        orr     WK&reg2, WK&reg2, WK&reg2, lsr #5  @ RRRRRRRRRR0BBBBBBBBBB00000000000
-        pkhtb   WK&reg1, WK&reg1, WK&reg1, asr #5  @ rrrrrrrr--------bbbbbbbb--------
-        sel     WK&reg1, WK&reg1, SCRATCH          @ rrrrrrrrggggggggbbbbbbbb--------
-        mov     SCRATCH, SCRATCH, ror #16          @ ggg00000GGGGGGGGGGGG0000gggggggg
-        pkhtb   WK&reg2, WK&reg2, WK&reg2, asr #5  @ RRRRRRRR--------BBBBBBBB--------
-        sel     WK&reg2, WK&reg2, SCRATCH          @ RRRRRRRRGGGGGGGGBBBBBBBB--------
-        orr     WK&reg1, STRIDE_M, WK&reg1, lsr #8 @ 11111111rrrrrrrrggggggggbbbbbbbb
-        orr     WK&reg2, STRIDE_M, WK&reg2, lsr #8 @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB
-.endm
-
-/* This version doesn't need STRIDE_M, but is one instruction longer.
-   It would however be preferable for an XRGB target, since we could knock off the last 2 instructions, but is that a common case?
-        and     SCRATCH, WK&reg1, MASK             @ 00000GGGGGG0000000000gggggg00000
-        bic     WK&reg1, WK&reg1, MASK             @ RRRRR000000BBBBBrrrrr000000bbbbb
-        orr     SCRATCH, SCRATCH, SCRATCH, lsr #6  @ 00000GGGGGGGGGGGG0000ggggggggggg
-        mov     WK&reg2, WK&reg1, lsr #16          @ 0000000000000000RRRRR000000BBBBB
-        mov     SCRATCH, SCRATCH, ror #27          @ GGGGGGGGGGGG0000ggggggggggg00000
-        bic     WK&reg1, WK&reg1, WK&reg2, lsl #16 @ 0000000000000000rrrrr000000bbbbb
-        mov     WK&reg2, WK&reg2, lsl #3           @ 0000000000000RRRRR000000BBBBB000
-        mov     WK&reg1, WK&reg1, lsl #3           @ 0000000000000rrrrr000000bbbbb000
-        orr     WK&reg2, WK&reg2, WK&reg2, lsr #5  @ 0000000000000RRRRRRRRRR0BBBBBBBB
-        orr     WK&reg1, WK&reg1, WK&reg1, lsr #5  @ 0000000000000rrrrrrrrrr0bbbbbbbb
-        pkhbt   WK&reg2, WK&reg2, WK&reg2, lsl #5  @ --------RRRRRRRR--------BBBBBBBB
-        pkhbt   WK&reg1, WK&reg1, WK&reg1, lsl #5  @ --------rrrrrrrr--------bbbbbbbb
-        sel     WK&reg2, SCRATCH, WK&reg2          @ --------RRRRRRRRGGGGGGGGBBBBBBBB
-        sel     WK&reg1, SCRATCH, WK&reg1          @ --------rrrrrrrrggggggggbbbbbbbb
-        orr     WK&reg2, WK&reg2, #0xFF000000      @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB
-        orr     WK&reg1, WK&reg1, #0xFF000000      @ 11111111rrrrrrrrggggggggbbbbbbbb
-*/
-
-.macro src_0565_8888_1pixel, reg
-        bic     SCRATCH, WK&reg, MASK              @ 0000000000000000rrrrr000000bbbbb
-        and     WK&reg, WK&reg, MASK               @ 000000000000000000000gggggg00000
-        mov     SCRATCH, SCRATCH, lsl #3           @ 0000000000000rrrrr000000bbbbb000
-        mov     WK&reg, WK&reg, lsl #5             @ 0000000000000000gggggg0000000000
-        orr     SCRATCH, SCRATCH, SCRATCH, lsr #5  @ 0000000000000rrrrrrrrrr0bbbbbbbb
-        orr     WK&reg, WK&reg, WK&reg, lsr #6     @ 000000000000000gggggggggggg00000
-        pkhbt   SCRATCH, SCRATCH, SCRATCH, lsl #5  @ --------rrrrrrrr--------bbbbbbbb
-        sel     WK&reg, WK&reg, SCRATCH            @ --------rrrrrrrrggggggggbbbbbbbb
-        orr     WK&reg, WK&reg, #0xFF000000        @ 11111111rrrrrrrrggggggggbbbbbbbb
-.endm
-
-.macro src_0565_8888_process_head   cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
- .if numbytes == 16
-        pixldst ld,, 8, firstreg, %(firstreg+2),,, SRC, unaligned_src
- .elseif numbytes == 8
-        pixld   , 4, firstreg, SRC, unaligned_src
- .elseif numbytes == 4
-        pixld   , 2, firstreg, SRC, unaligned_src
- .endif
-.endm
-
-.macro src_0565_8888_process_tail   cond, numbytes, firstreg
- .if numbytes == 16
-        src_0565_8888_2pixels firstreg, %(firstreg+1)
-        src_0565_8888_2pixels %(firstreg+2), %(firstreg+3)
- .elseif numbytes == 8
-        src_0565_8888_2pixels firstreg, %(firstreg+1)
- .else
-        src_0565_8888_1pixel firstreg
- .endif
-.endm
-
-generate_composite_function \
-    pixman_composite_src_0565_8888_asm_armv6, 16, 0, 32, \
-    FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER, \
-    3, /* prefetch distance */ \
-    src_0565_8888_init, \
-    nop_macro, /* newline */ \
-    nop_macro, /* cleanup */ \
-    src_0565_8888_process_head, \
-    src_0565_8888_process_tail
-
-/******************************************************************************/
-
-.macro src_x888_0565_init
-        /* Hold loop invariant in MASK */
-        ldr     MASK, =0x001F001F
-        line_saved_regs  STRIDE_S, ORIG_W
-.endm
-
-.macro src_x888_0565_1pixel  s, d
-        and     WK&d, MASK, WK&s, lsr #3           @ 00000000000rrrrr00000000000bbbbb
-        and     STRIDE_S, WK&s, #0xFC00            @ 0000000000000000gggggg0000000000
-        orr     WK&d, WK&d, WK&d, lsr #5           @ 00000000000-----rrrrr000000bbbbb
-        orr     WK&d, WK&d, STRIDE_S, lsr #5       @ 00000000000-----rrrrrggggggbbbbb
-        /* Top 16 bits are discarded during the following STRH */
-.endm
-
-.macro src_x888_0565_2pixels  slo, shi, d, tmp
-        and     SCRATCH, WK&shi, #0xFC00           @ 0000000000000000GGGGGG0000000000
-        and     WK&tmp, MASK, WK&shi, lsr #3       @ 00000000000RRRRR00000000000BBBBB
-        and     WK&shi, MASK, WK&slo, lsr #3       @ 00000000000rrrrr00000000000bbbbb
-        orr     WK&tmp, WK&tmp, WK&tmp, lsr #5     @ 00000000000-----RRRRR000000BBBBB
-        orr     WK&tmp, WK&tmp, SCRATCH, lsr #5    @ 00000000000-----RRRRRGGGGGGBBBBB
-        and     SCRATCH, WK&slo, #0xFC00           @ 0000000000000000gggggg0000000000
-        orr     WK&shi, WK&shi, WK&shi, lsr #5     @ 00000000000-----rrrrr000000bbbbb
-        orr     WK&shi, WK&shi, SCRATCH, lsr #5    @ 00000000000-----rrrrrggggggbbbbb
-        pkhbt   WK&d, WK&shi, WK&tmp, lsl #16      @ RRRRRGGGGGGBBBBBrrrrrggggggbbbbb
-.endm
-
-.macro src_x888_0565_process_head   cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
-        WK4     .req    STRIDE_S
-        WK5     .req    STRIDE_M
-        WK6     .req    WK3
-        WK7     .req    ORIG_W
- .if numbytes == 16
-        pixld   , 16, 4, SRC, 0
-        src_x888_0565_2pixels  4, 5, 0, 0
-        pixld   , 8, 4, SRC, 0
-        src_x888_0565_2pixels  6, 7, 1, 1
-        pixld   , 8, 6, SRC, 0
- .else
-        pixld   , numbytes*2, 4, SRC, 0
- .endif
-.endm
-
-.macro src_x888_0565_process_tail   cond, numbytes, firstreg
- .if numbytes == 16
-        src_x888_0565_2pixels  4, 5, 2, 2
-        src_x888_0565_2pixels  6, 7, 3, 4
- .elseif numbytes == 8
-        src_x888_0565_2pixels  4, 5, 1, 1
-        src_x888_0565_2pixels  6, 7, 2, 2
- .elseif numbytes == 4
-        src_x888_0565_2pixels  4, 5, 1, 1
- .else
-        src_x888_0565_1pixel  4, 1
- .endif
- .if numbytes == 16
-        pixst   , numbytes, 0, DST
- .else
-        pixst   , numbytes, 1, DST
- .endif
-        .unreq  WK4
-        .unreq  WK5
-        .unreq  WK6
-        .unreq  WK7
-.endm
-
-generate_composite_function \
-    pixman_composite_src_x888_0565_asm_armv6, 32, 0, 16, \
-    FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \
-    3, /* prefetch distance */ \
-    src_x888_0565_init, \
-    nop_macro, /* newline */ \
-    nop_macro, /* cleanup */ \
-    src_x888_0565_process_head, \
-    src_x888_0565_process_tail
-
-/******************************************************************************/
-
-.macro add_8_8_8pixels  cond, dst1, dst2
-        uqadd8&cond  WK&dst1, WK&dst1, MASK
-        uqadd8&cond  WK&dst2, WK&dst2, STRIDE_M
-.endm
-
-.macro add_8_8_4pixels  cond, dst
-        uqadd8&cond  WK&dst, WK&dst, MASK
-.endm
-
-.macro add_8_8_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
-    WK4     .req    MASK
-    WK5     .req    STRIDE_M
- .if numbytes == 16
-        pixld   cond, 8, 4, SRC, unaligned_src
-        pixld   cond, 16, firstreg, DST, 0
-        add_8_8_8pixels cond, firstreg, %(firstreg+1)
-        pixld   cond, 8, 4, SRC, unaligned_src
- .else
-        pixld   cond, numbytes, 4, SRC, unaligned_src
-        pixld   cond, numbytes, firstreg, DST, 0
- .endif
-    .unreq  WK4
-    .unreq  WK5
-.endm
-
-.macro add_8_8_process_tail  cond, numbytes, firstreg
- .if numbytes == 16
-        add_8_8_8pixels cond, %(firstreg+2), %(firstreg+3)
- .elseif numbytes == 8
-        add_8_8_8pixels cond, firstreg, %(firstreg+1)
- .else
-        add_8_8_4pixels cond, firstreg
- .endif
-.endm
-
-generate_composite_function \
-    pixman_composite_add_8_8_asm_armv6, 8, 0, 8, \
-    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_PRESERVES_SCRATCH, \
-    2, /* prefetch distance */ \
-    nop_macro, /* init */ \
-    nop_macro, /* newline */ \
-    nop_macro, /* cleanup */ \
-    add_8_8_process_head, \
-    add_8_8_process_tail
-
-/******************************************************************************/
-
-.macro over_8888_8888_init
-        /* Hold loop invariant in MASK */
-        ldr     MASK, =0x00800080
-        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
-        uadd8   SCRATCH, MASK, MASK
-        line_saved_regs STRIDE_D, STRIDE_S, ORIG_W
-.endm
-
-.macro over_8888_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
-    WK4     .req    STRIDE_D
-    WK5     .req    STRIDE_S
-    WK6     .req    STRIDE_M
-    WK7     .req    ORIG_W
-        pixld   , numbytes, %(4+firstreg), SRC, unaligned_src
-        pixld   , numbytes, firstreg, DST, 0
-    .unreq  WK4
-    .unreq  WK5
-    .unreq  WK6
-    .unreq  WK7
-.endm
-
-.macro over_8888_8888_check_transparent  numbytes, reg0, reg1, reg2, reg3
-        /* Since these colours a premultiplied by alpha, only 0 indicates transparent (any other colour with 0 in the alpha byte is luminous) */
-        teq     WK&reg0, #0
- .if numbytes > 4
-        teqeq   WK&reg1, #0
-  .if numbytes > 8
-        teqeq   WK&reg2, #0
-        teqeq   WK&reg3, #0
-  .endif
- .endif
-.endm
-
-.macro over_8888_8888_prepare  next
-        mov     WK&next, WK&next, lsr #24
-.endm
-
-.macro over_8888_8888_1pixel src, dst, offset, next
-        /* src = destination component multiplier */
-        rsb     WK&src, WK&src, #255
-        /* Split even/odd bytes of dst into SCRATCH/dst */
-        uxtb16  SCRATCH, WK&dst
-        uxtb16  WK&dst, WK&dst, ror #8
-        /* Multiply through, adding 0.5 to the upper byte of result for rounding */
-        mla     SCRATCH, SCRATCH, WK&src, MASK
-        mla     WK&dst, WK&dst, WK&src, MASK
-        /* Where we would have had a stall between the result of the first MLA and the shifter input,
-         * reload the complete source pixel */
-        ldr     WK&src, [SRC, #offset]
-        /* Multiply by 257/256 to approximate 256/255 */
-        uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
-        /* In this stall, start processing the next pixel */
- .if offset < -4
-        mov     WK&next, WK&next, lsr #24
- .endif
-        uxtab16 WK&dst, WK&dst, WK&dst, ror #8
-        /* Recombine even/odd bytes of multiplied destination */
-        mov     SCRATCH, SCRATCH, ror #8
-        sel     WK&dst, SCRATCH, WK&dst
-        /* Saturated add of source to multiplied destination */
-        uqadd8  WK&dst, WK&dst, WK&src
-.endm
-
-.macro over_8888_8888_process_tail  cond, numbytes, firstreg
-    WK4     .req    STRIDE_D
-    WK5     .req    STRIDE_S
-    WK6     .req    STRIDE_M
-    WK7     .req    ORIG_W
-        over_8888_8888_check_transparent numbytes, %(4+firstreg), %(5+firstreg), %(6+firstreg), %(7+firstreg)
-        beq     10f
-        over_8888_8888_prepare  %(4+firstreg)
- .set PROCESS_REG, firstreg
- .set PROCESS_OFF, -numbytes
- .rept numbytes / 4
-        over_8888_8888_1pixel %(4+PROCESS_REG), %(0+PROCESS_REG), PROCESS_OFF, %(5+PROCESS_REG)
-  .set PROCESS_REG, PROCESS_REG+1
-  .set PROCESS_OFF, PROCESS_OFF+4
- .endr
-        pixst   , numbytes, firstreg, DST
-10:
-    .unreq  WK4
-    .unreq  WK5
-    .unreq  WK6
-    .unreq  WK7
-.endm
-
-generate_composite_function \
-    pixman_composite_over_8888_8888_asm_armv6, 32, 0, 32 \
-    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \
-    2, /* prefetch distance */ \
-    over_8888_8888_init, \
-    nop_macro, /* newline */ \
-    nop_macro, /* cleanup */ \
-    over_8888_8888_process_head, \
-    over_8888_8888_process_tail
-
-/******************************************************************************/
-
-/* Multiply each byte of a word by a byte.
- * Useful when there aren't any obvious ways to fill the stalls with other instructions.
- * word  Register containing 4 bytes
- * byte  Register containing byte multiplier (bits 8-31 must be 0)
- * tmp   Scratch register
- * half  Register containing the constant 0x00800080
- * GE[3:0] bits must contain 0101
- */
-.macro mul_8888_8  word, byte, tmp, half
-        /* Split even/odd bytes of word apart */
-        uxtb16  tmp, word
-        uxtb16  word, word, ror #8
-        /* Multiply bytes together with rounding, then by 257/256 */
-        mla     tmp, tmp, byte, half
-        mla     word, word, byte, half /* 1 stall follows */
-        uxtab16 tmp, tmp, tmp, ror #8  /* 1 stall follows */
-        uxtab16 word, word, word, ror #8
-        /* Recombine bytes */
-        mov     tmp, tmp, ror #8
-        sel     word, tmp, word
-.endm
-
-/******************************************************************************/
-
-.macro over_8888_n_8888_init
-        /* Mask is constant */
-        ldr     MASK, [sp, #ARGS_STACK_OFFSET+8]
-        /* Hold loop invariant in STRIDE_M */
-        ldr     STRIDE_M, =0x00800080
-        /* We only want the alpha bits of the constant mask */
-        mov     MASK, MASK, lsr #24
-        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
-        uadd8   SCRATCH, STRIDE_M, STRIDE_M
-        line_saved_regs Y, STRIDE_D, STRIDE_S, ORIG_W
-.endm
-
-.macro over_8888_n_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
-    WK4     .req    Y
-    WK5     .req    STRIDE_D
-    WK6     .req    STRIDE_S
-    WK7     .req    ORIG_W
-        pixld   , numbytes, %(4+(firstreg%2)), SRC, unaligned_src
-        pixld   , numbytes, firstreg, DST, 0
-    .unreq  WK4
-    .unreq  WK5
-    .unreq  WK6
-    .unreq  WK7
-.endm
-
-.macro over_8888_n_8888_1pixel src, dst
-        mul_8888_8  WK&src, MASK, SCRATCH, STRIDE_M
-        sub     WK7, WK6, WK&src, lsr #24
-        mul_8888_8  WK&dst, WK7, SCRATCH, STRIDE_M
-        uqadd8  WK&dst, WK&dst, WK&src
-.endm
-
-.macro over_8888_n_8888_process_tail  cond, numbytes, firstreg
-    WK4     .req    Y
-    WK5     .req    STRIDE_D
-    WK6     .req    STRIDE_S
-    WK7     .req    ORIG_W
-        over_8888_8888_check_transparent numbytes, %(4+(firstreg%2)), %(5+(firstreg%2)), %(6+firstreg), %(7+firstreg)
-        beq     10f
-        mov     WK6, #255
- .set PROCESS_REG, firstreg
- .rept numbytes / 4
-  .if numbytes == 16 && PROCESS_REG == 2
-        /* We're using WK6 and WK7 as temporaries, so half way through
-         * 4 pixels, reload the second two source pixels but this time
-         * into WK4 and WK5 */
-        ldmdb   SRC, {WK4, WK5}
-  .endif
-        over_8888_n_8888_1pixel  %(4+(PROCESS_REG%2)), %(PROCESS_REG)
-  .set PROCESS_REG, PROCESS_REG+1
- .endr
-        pixst   , numbytes, firstreg, DST
-10:
-    .unreq  WK4
-    .unreq  WK5
-    .unreq  WK6
-    .unreq  WK7
-.endm
-
-generate_composite_function \
-    pixman_composite_over_8888_n_8888_asm_armv6, 32, 0, 32 \
-    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \
-    2, /* prefetch distance */ \
-    over_8888_n_8888_init, \
-    nop_macro, /* newline */ \
-    nop_macro, /* cleanup */ \
-    over_8888_n_8888_process_head, \
-    over_8888_n_8888_process_tail
-
-/******************************************************************************/
-
-.macro over_n_8_8888_init
-        /* Source is constant, but splitting it into even/odd bytes is a loop invariant */
-        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
-        /* Not enough registers to hold this constant, but we still use it here to set GE[3:0] */
-        ldr     SCRATCH, =0x00800080
-        uxtb16  STRIDE_S, SRC
-        uxtb16  SRC, SRC, ror #8
-        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
-        uadd8   SCRATCH, SCRATCH, SCRATCH
-        line_saved_regs Y, STRIDE_D, STRIDE_M, ORIG_W
-.endm
-
-.macro over_n_8_8888_newline
-        ldr     STRIDE_D, =0x00800080
-        b       1f
- .ltorg
-1:
-.endm
-
-.macro over_n_8_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
-    WK4     .req    STRIDE_M
-        pixld   , numbytes/4, 4, MASK, unaligned_mask
-        pixld   , numbytes, firstreg, DST, 0
-    .unreq  WK4
-.endm
-
-.macro over_n_8_8888_1pixel src, dst
-        uxtb    Y, WK4, ror #src*8
-        /* Trailing part of multiplication of source */
-        mla     SCRATCH, STRIDE_S, Y, STRIDE_D
-        mla     Y, SRC, Y, STRIDE_D
-        mov     ORIG_W, #255
-        uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
-        uxtab16 Y, Y, Y, ror #8
-        mov     SCRATCH, SCRATCH, ror #8
-        sub     ORIG_W, ORIG_W, Y, lsr #24
-        sel     Y, SCRATCH, Y
-        /* Then multiply the destination */
-        mul_8888_8  WK&dst, ORIG_W, SCRATCH, STRIDE_D
-        uqadd8  WK&dst, WK&dst, Y
-.endm
-
-.macro over_n_8_8888_process_tail  cond, numbytes, firstreg
-    WK4     .req    STRIDE_M
-        teq     WK4, #0
-        beq     10f
- .set PROCESS_REG, firstreg
- .rept numbytes / 4
-        over_n_8_8888_1pixel  %(PROCESS_REG-firstreg), %(PROCESS_REG)
-  .set PROCESS_REG, PROCESS_REG+1
- .endr
-        pixst   , numbytes, firstreg, DST
-10:
-    .unreq  WK4
-.endm
-
-generate_composite_function \
-    pixman_composite_over_n_8_8888_asm_armv6, 0, 8, 32 \
-    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \
-    2, /* prefetch distance */ \
-    over_n_8_8888_init, \
-    over_n_8_8888_newline, \
-    nop_macro, /* cleanup */ \
-    over_n_8_8888_process_head, \
-    over_n_8_8888_process_tail
-
-/******************************************************************************/
-
-.macro over_reverse_n_8888_init
-        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
-        ldr     MASK, =0x00800080
-        /* Split source pixel into RB/AG parts */
-        uxtb16  STRIDE_S, SRC
-        uxtb16  STRIDE_M, SRC, ror #8
-        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
-        uadd8   SCRATCH, MASK, MASK
-        line_saved_regs  STRIDE_D, ORIG_W
-.endm
-
-.macro over_reverse_n_8888_newline
-        mov     STRIDE_D, #0xFF
-.endm
-
-.macro over_reverse_n_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
-        pixld   , numbytes, firstreg, DST, 0
-.endm
-
-.macro over_reverse_n_8888_1pixel  d, is_only
-        teq     WK&d, #0
-        beq     8f       /* replace with source */
-        bics    ORIG_W, STRIDE_D, WK&d, lsr #24
- .if is_only == 1
-        beq     49f      /* skip store */
- .else
-        beq     9f       /* write same value back */
- .endif
-        mla     SCRATCH, STRIDE_S, ORIG_W, MASK /* red/blue */
-        mla     ORIG_W, STRIDE_M, ORIG_W, MASK  /* alpha/green */
-        uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
-        uxtab16 ORIG_W, ORIG_W, ORIG_W, ror #8
-        mov     SCRATCH, SCRATCH, ror #8
-        sel     ORIG_W, SCRATCH, ORIG_W
-        uqadd8  WK&d, WK&d, ORIG_W
-        b       9f
-8:      mov     WK&d, SRC
-9:
-.endm
-
-.macro over_reverse_n_8888_tail  numbytes, reg1, reg2, reg3, reg4
- .if numbytes == 4
-        over_reverse_n_8888_1pixel  reg1, 1
- .else
-        and     SCRATCH, WK&reg1, WK&reg2
-  .if numbytes == 16
-        and     SCRATCH, SCRATCH, WK&reg3
-        and     SCRATCH, SCRATCH, WK&reg4
-  .endif
-        mvns    SCRATCH, SCRATCH, asr #24
-        beq     49f /* skip store if all opaque */
-        over_reverse_n_8888_1pixel  reg1, 0
-        over_reverse_n_8888_1pixel  reg2, 0
-  .if numbytes == 16
-        over_reverse_n_8888_1pixel  reg3, 0
-        over_reverse_n_8888_1pixel  reg4, 0
-  .endif
- .endif
-        pixst   , numbytes, reg1, DST
-49:
-.endm
-
-.macro over_reverse_n_8888_process_tail  cond, numbytes, firstreg
-        over_reverse_n_8888_tail  numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
-.endm
-
-generate_composite_function \
-    pixman_composite_over_reverse_n_8888_asm_armv6, 0, 0, 32 \
-    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \
-    3, /* prefetch distance */ \
-    over_reverse_n_8888_init, \
-    over_reverse_n_8888_newline, \
-    nop_macro, /* cleanup */ \
-    over_reverse_n_8888_process_head, \
-    over_reverse_n_8888_process_tail
-
-/******************************************************************************/
-
-.macro over_white_8888_8888_ca_init
-        HALF    .req    SRC
-        TMP0    .req    STRIDE_D
-        TMP1    .req    STRIDE_S
-        TMP2    .req    STRIDE_M
-        TMP3    .req    ORIG_W
-        WK4     .req    SCRATCH
-        line_saved_regs STRIDE_D, STRIDE_M, ORIG_W
-        ldr     SCRATCH, =0x800080
-        mov     HALF, #0x80
-        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
-        uadd8   SCRATCH, SCRATCH, SCRATCH
-        .set DST_PRELOAD_BIAS, 8
-.endm
-
-.macro over_white_8888_8888_ca_cleanup
-        .set DST_PRELOAD_BIAS, 0
-        .unreq  HALF
-        .unreq  TMP0
-        .unreq  TMP1
-        .unreq  TMP2
-        .unreq  TMP3
-        .unreq  WK4
-.endm
-
-.macro over_white_8888_8888_ca_combine  m, d
-        uxtb16  TMP1, TMP0                /* rb_notmask */
-        uxtb16  TMP2, d                   /* rb_dest; 1 stall follows */
-        smlatt  TMP3, TMP2, TMP1, HALF    /* red */
-        smlabb  TMP2, TMP2, TMP1, HALF    /* blue */
-        uxtb16  TMP0, TMP0, ror #8        /* ag_notmask */
-        uxtb16  TMP1, d, ror #8           /* ag_dest; 1 stall follows */
-        smlatt  d, TMP1, TMP0, HALF       /* alpha */
-        smlabb  TMP1, TMP1, TMP0, HALF    /* green */
-        pkhbt   TMP0, TMP2, TMP3, lsl #16 /* rb; 1 stall follows */
-        pkhbt   TMP1, TMP1, d, lsl #16    /* ag */
-        uxtab16 TMP0, TMP0, TMP0, ror #8
-        uxtab16 TMP1, TMP1, TMP1, ror #8
-        mov     TMP0, TMP0, ror #8
-        sel     d, TMP0, TMP1
-        uqadd8  d, d, m                   /* d is a late result */
-.endm
-
-.macro over_white_8888_8888_ca_1pixel_head
-        pixld   , 4, 1, MASK, 0
-        pixld   , 4, 3, DST, 0
-.endm
-
-.macro over_white_8888_8888_ca_1pixel_tail
-        mvn     TMP0, WK1
-        teq     WK1, WK1, asr #32
-        bne     01f
-        bcc     03f
-        mov     WK3, WK1
-        b       02f
-01:     over_white_8888_8888_ca_combine WK1, WK3
-02:     pixst   , 4, 3, DST
-03:
-.endm
-
-.macro over_white_8888_8888_ca_2pixels_head
-        pixld   , 8, 1, MASK, 0
-.endm
-
-.macro over_white_8888_8888_ca_2pixels_tail
-        pixld   , 8, 3, DST
-        mvn     TMP0, WK1
-        teq     WK1, WK1, asr #32
-        bne     01f
-        movcs   WK3, WK1
-        bcs     02f
-        teq     WK2, #0
-        beq     05f
-        b       02f
-01:     over_white_8888_8888_ca_combine WK1, WK3
-02:     mvn     TMP0, WK2
-        teq     WK2, WK2, asr #32
-        bne     03f
-        movcs   WK4, WK2
-        b       04f
-03:     over_white_8888_8888_ca_combine WK2, WK4
-04:     pixst   , 8, 3, DST
-05:
-.endm
-
-.macro over_white_8888_8888_ca_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
- .if numbytes == 4
-        over_white_8888_8888_ca_1pixel_head
- .else
-  .if numbytes == 16
-        over_white_8888_8888_ca_2pixels_head
-        over_white_8888_8888_ca_2pixels_tail
-  .endif
-        over_white_8888_8888_ca_2pixels_head
- .endif
-.endm
-
-.macro over_white_8888_8888_ca_process_tail  cond, numbytes, firstreg
- .if numbytes == 4
-        over_white_8888_8888_ca_1pixel_tail
- .else
-        over_white_8888_8888_ca_2pixels_tail
- .endif
-.endm
-
-generate_composite_function \
-    pixman_composite_over_white_8888_8888_ca_asm_armv6, 0, 32, 32 \
-    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH \
-    2, /* prefetch distance */ \
-    over_white_8888_8888_ca_init, \
-    nop_macro, /* newline */ \
-    over_white_8888_8888_ca_cleanup, \
-    over_white_8888_8888_ca_process_head, \
-    over_white_8888_8888_ca_process_tail
-
-
-.macro over_n_8888_8888_ca_init
-        /* Set up constants. RB_SRC and AG_SRC are in registers;
-         * RB_FLDS, A_SRC, and the two HALF values need to go on the
-         * stack (and the ful SRC value is already there) */
-        ldr     SCRATCH, [sp, #ARGS_STACK_OFFSET]
-        mov     WK0, #0x00FF0000
-        orr     WK0, WK0, #0xFF        /* RB_FLDS (0x00FF00FF) */
-        mov     WK1, #0x80             /* HALF default value */
-        mov     WK2, SCRATCH, lsr #24  /* A_SRC */
-        orr     WK3, WK1, WK1, lsl #16 /* HALF alternate value (0x00800080) */
-        push    {WK0-WK3}
- .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET+16
-        uxtb16  SRC, SCRATCH
-        uxtb16  STRIDE_S, SCRATCH, ror #8
-
-        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
-        uadd8   SCRATCH, WK3, WK3
-
-        .unreq  WK0
-        .unreq  WK1
-        .unreq  WK2
-        .unreq  WK3
-        WK0     .req    Y
-        WK1     .req    STRIDE_D
-        RB_SRC  .req    SRC
-        AG_SRC  .req    STRIDE_S
-        WK2     .req    STRIDE_M
-        RB_FLDS .req    r8       /* the reloaded constants have to be at consecutive registers starting at an even one */
-        A_SRC   .req    r8
-        HALF    .req    r9
-        WK3     .req    r10
-        WK4     .req    r11
-        WK5     .req    SCRATCH
-        WK6     .req    ORIG_W
-
-        line_saved_regs Y, STRIDE_D, STRIDE_M, ORIG_W
-.endm
-
-.macro over_n_8888_8888_ca_cleanup
-        add     sp, sp, #16
- .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET-16
-
-        .unreq  WK0
-        .unreq  WK1
-        .unreq  RB_SRC
-        .unreq  AG_SRC
-        .unreq  WK2
-        .unreq  RB_FLDS
-        .unreq  A_SRC
-        .unreq  HALF
-        .unreq  WK3
-        .unreq  WK4
-        .unreq  WK5
-        .unreq  WK6
-        WK0     .req    r8
-        WK1     .req    r9
-        WK2     .req    r10
-        WK3     .req    r11
-.endm
-
-.macro over_n_8888_8888_ca_1pixel_head
-        pixld   , 4, 6, MASK, 0
-        pixld   , 4, 0, DST, 0
-.endm
-
-.macro over_n_8888_8888_ca_1pixel_tail
-        ldrd    A_SRC, HALF, [sp, #LOCALS_STACK_OFFSET+8]
-        uxtb16  WK1, WK6                 /* rb_mask (first step of hard case placed in what would otherwise be a stall) */
-        teq     WK6, WK6, asr #32        /* Zc if transparent, ZC if opaque */
-        bne     20f
-        bcc     40f
-        /* Mask is fully opaque (all channels) */
-        ldr     WK6, [sp, #ARGS_STACK_OFFSET] /* get SRC back */
-        eors    A_SRC, A_SRC, #0xFF
-        bne     10f
-        /* Source is also opaque - same as src_8888_8888 */
-        mov     WK0, WK6
-        b       30f
-10:     /* Same as over_8888_8888 */
-        mul_8888_8 WK0, A_SRC, WK5, HALF
-        uqadd8  WK0, WK0, WK6
-        b       30f
-20:     /* No simplifications possible - do it the hard way */
-        uxtb16  WK2, WK6, ror #8         /* ag_mask */
-        mla     WK3, WK1, A_SRC, HALF    /* rb_mul; 2 cycles */
-        mla     WK4, WK2, A_SRC, HALF    /* ag_mul; 2 cycles */
-        ldrd    RB_FLDS, HALF, [sp, #LOCALS_STACK_OFFSET]
-        uxtb16  WK5, WK0                 /* rb_dest */
-        uxtab16 WK3, WK3, WK3, ror #8
-        uxtb16  WK6, WK0, ror #8         /* ag_dest */
-        uxtab16 WK4, WK4, WK4, ror #8
-        smlatt  WK0, RB_SRC, WK1, HALF   /* red1 */
-        smlabb  WK1, RB_SRC, WK1, HALF   /* blue1 */
-        bic     WK3, RB_FLDS, WK3, lsr #8
-        bic     WK4, RB_FLDS, WK4, lsr #8
-        pkhbt   WK1, WK1, WK0, lsl #16   /* rb1 */
-        smlatt  WK0, WK5, WK3, HALF      /* red2 */
-        smlabb  WK3, WK5, WK3, HALF      /* blue2 */
-        uxtab16 WK1, WK1, WK1, ror #8
-        smlatt  WK5, AG_SRC, WK2, HALF   /* alpha1 */
-        pkhbt   WK3, WK3, WK0, lsl #16   /* rb2 */
-        smlabb  WK0, AG_SRC, WK2, HALF   /* green1 */
-        smlatt  WK2, WK6, WK4, HALF      /* alpha2 */
-        smlabb  WK4, WK6, WK4, HALF      /* green2 */
-        pkhbt   WK0, WK0, WK5, lsl #16   /* ag1 */
-        uxtab16 WK3, WK3, WK3, ror #8
-        pkhbt   WK4, WK4, WK2, lsl #16   /* ag2 */
-        uxtab16 WK0, WK0, WK0, ror #8
-        uxtab16 WK4, WK4, WK4, ror #8
-        mov     WK1, WK1, ror #8
-        mov     WK3, WK3, ror #8
-        sel     WK2, WK1, WK0            /* recombine source*mask */
-        sel     WK1, WK3, WK4            /* recombine dest*(1-source_alpha*mask) */
-        uqadd8  WK0, WK1, WK2            /* followed by 1 stall */
-30:     /* The destination buffer is already in the L1 cache, so
-         * there's little point in amalgamating writes */
-        pixst   , 4, 0, DST
-40:
-.endm
-
-.macro over_n_8888_8888_ca_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
- .rept (numbytes / 4) - 1
-        over_n_8888_8888_ca_1pixel_head
-        over_n_8888_8888_ca_1pixel_tail
- .endr
-        over_n_8888_8888_ca_1pixel_head
-.endm
-
-.macro over_n_8888_8888_ca_process_tail  cond, numbytes, firstreg
-        over_n_8888_8888_ca_1pixel_tail
-.endm
-
-pixman_asm_function pixman_composite_over_n_8888_8888_ca_asm_armv6
-        ldr     ip, [sp]
-        cmp     ip, #-1
-        beq     pixman_composite_over_white_8888_8888_ca_asm_armv6
-        /* else drop through... */
- .endfunc
-generate_composite_function \
-    pixman_composite_over_n_8888_8888_ca_asm_armv6_helper, 0, 32, 32 \
-    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH | FLAG_PROCESS_CORRUPTS_WK0 \
-    2, /* prefetch distance */ \
-    over_n_8888_8888_ca_init, \
-    nop_macro, /* newline */ \
-    over_n_8888_8888_ca_cleanup, \
-    over_n_8888_8888_ca_process_head, \
-    over_n_8888_8888_ca_process_tail
-
-/******************************************************************************/
-
-.macro in_reverse_8888_8888_init
-        /* Hold loop invariant in MASK */
-        ldr     MASK, =0x00800080
-        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
-        uadd8   SCRATCH, MASK, MASK
-        /* Offset the source pointer: we only need the alpha bytes */
-        add     SRC, SRC, #3
-        line_saved_regs  ORIG_W
-.endm
-
-.macro in_reverse_8888_8888_head  numbytes, reg1, reg2, reg3
-        ldrb    ORIG_W, [SRC], #4
- .if numbytes >= 8
-        ldrb    WK&reg1, [SRC], #4
-  .if numbytes == 16
-        ldrb    WK&reg2, [SRC], #4
-        ldrb    WK&reg3, [SRC], #4
-  .endif
- .endif
-        add     DST, DST, #numbytes
-.endm
-
-.macro in_reverse_8888_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
-        in_reverse_8888_8888_head  numbytes, firstreg, %(firstreg+1), %(firstreg+2)
-.endm
-
-.macro in_reverse_8888_8888_1pixel  s, d, offset, is_only
- .if is_only != 1
-        movs    s, ORIG_W
-  .if offset != 0
-        ldrb    ORIG_W, [SRC, #offset]
-  .endif
-        beq     01f
-        teq     STRIDE_M, #0xFF
-        beq     02f
- .endif
-        uxtb16  SCRATCH, d                 /* rb_dest */
-        uxtb16  d, d, ror #8               /* ag_dest */
-        mla     SCRATCH, SCRATCH, s, MASK
-        mla     d, d, s, MASK
-        uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
-        uxtab16 d, d, d, ror #8
-        mov     SCRATCH, SCRATCH, ror #8
-        sel     d, SCRATCH, d
-        b       02f
- .if offset == 0
-48:     /* Last mov d,#0 of the set - used as part of shortcut for
-         * source values all 0 */
- .endif
-01:     mov     d, #0
-02:
-.endm
-
-.macro in_reverse_8888_8888_tail  numbytes, reg1, reg2, reg3, reg4
- .if numbytes == 4
-        teq     ORIG_W, ORIG_W, asr #32
-        ldrne   WK&reg1, [DST, #-4]
- .elseif numbytes == 8
-        teq     ORIG_W, WK&reg1
-        teqeq   ORIG_W, ORIG_W, asr #32  /* all 0 or all -1? */
-        ldmnedb DST, {WK&reg1-WK&reg2}
- .else
-        teq     ORIG_W, WK&reg1
-        teqeq   ORIG_W, WK&reg2
-        teqeq   ORIG_W, WK&reg3
-        teqeq   ORIG_W, ORIG_W, asr #32  /* all 0 or all -1? */
-        ldmnedb DST, {WK&reg1-WK&reg4}
- .endif
-        cmnne   DST, #0   /* clear C if NE */
-        bcs     49f       /* no writes to dest if source all -1 */
-        beq     48f       /* set dest to all 0 if source all 0 */
- .if numbytes == 4
-        in_reverse_8888_8888_1pixel  ORIG_W, WK&reg1, 0, 1
-        str     WK&reg1, [DST, #-4]
- .elseif numbytes == 8
-        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg1, -4, 0
-        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg2, 0, 0
-        stmdb   DST, {WK&reg1-WK&reg2}
- .else
-        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg1, -12, 0
-        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg2, -8, 0
-        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg3, -4, 0
-        in_reverse_8888_8888_1pixel  STRIDE_M, WK&reg4, 0, 0
-        stmdb   DST, {WK&reg1-WK&reg4}
- .endif
-49:
-.endm
-
-.macro in_reverse_8888_8888_process_tail  cond, numbytes, firstreg
-        in_reverse_8888_8888_tail  numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
-.endm
-
-generate_composite_function \
-    pixman_composite_in_reverse_8888_8888_asm_armv6, 32, 0, 32 \
-    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH | FLAG_NO_PRELOAD_DST \
-    2, /* prefetch distance */ \
-    in_reverse_8888_8888_init, \
-    nop_macro, /* newline */ \
-    nop_macro, /* cleanup */ \
-    in_reverse_8888_8888_process_head, \
-    in_reverse_8888_8888_process_tail
-
-/******************************************************************************/
-
-.macro over_n_8888_init
-        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
-        /* Hold loop invariant in MASK */
-        ldr     MASK, =0x00800080
-        /* Hold multiplier for destination in STRIDE_M */
-        mov     STRIDE_M, #255
-        sub     STRIDE_M, STRIDE_M, SRC, lsr #24
-        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
-        uadd8   SCRATCH, MASK, MASK
-.endm
-
-.macro over_n_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
-        pixld   , numbytes, firstreg, DST, 0
-.endm
-
-.macro over_n_8888_1pixel dst
-        mul_8888_8  WK&dst, STRIDE_M, SCRATCH, MASK
-        uqadd8  WK&dst, WK&dst, SRC
-.endm
-
-.macro over_n_8888_process_tail  cond, numbytes, firstreg
- .set PROCESS_REG, firstreg
- .rept numbytes / 4
-        over_n_8888_1pixel %(PROCESS_REG)
-  .set PROCESS_REG, PROCESS_REG+1
- .endr
-        pixst   , numbytes, firstreg, DST
-.endm
-
-generate_composite_function \
-    pixman_composite_over_n_8888_asm_armv6, 0, 0, 32 \
-    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE \
-    2, /* prefetch distance */ \
-    over_n_8888_init, \
-    nop_macro, /* newline */ \
-    nop_macro, /* cleanup */ \
-    over_n_8888_process_head, \
-    over_n_8888_process_tail
-
-/******************************************************************************/
diff --git a/vendor/pixman/pixman/pixman-arm-simd-asm.h b/vendor/pixman/pixman/pixman-arm-simd-asm.h
deleted file mode 100644
index da153c3f5..000000000
--- a/vendor/pixman/pixman/pixman-arm-simd-asm.h
+++ /dev/null
@@ -1,966 +0,0 @@
-/*
- * Copyright © 2012 Raspberry Pi Foundation
- * Copyright © 2012 RISC OS Open Ltd
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of the copyright holders not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  The copyright holders make no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author:  Ben Avison (bavison@riscosopen.org)
- *
- */
-
-/*
- * Because the alignment of pixel data to cachelines, and even the number of
- * cachelines per row can vary from row to row, and because of the need to
- * preload each scanline once and only once, this prefetch strategy treats
- * each row of pixels independently. When a pixel row is long enough, there
- * are three distinct phases of prefetch:
- * * an inner loop section, where each time a cacheline of data is
- *    processed, another cacheline is preloaded (the exact distance ahead is
- *    determined empirically using profiling results from lowlevel-blt-bench)
- * * a leading section, where enough cachelines are preloaded to ensure no
- *    cachelines escape being preloaded when the inner loop starts
- * * a trailing section, where a limited number (0 or more) of cachelines
- *    are preloaded to deal with data (if any) that hangs off the end of the
- *    last iteration of the inner loop, plus any trailing bytes that were not
- *    enough to make up one whole iteration of the inner loop
- * 
- * There are (in general) three distinct code paths, selected between
- * depending upon how long the pixel row is. If it is long enough that there
- * is at least one iteration of the inner loop (as described above) then
- * this is described as the "wide" case. If it is shorter than that, but
- * there are still enough bytes output that there is at least one 16-byte-
- * long, 16-byte-aligned write to the destination (the optimum type of
- * write), then this is the "medium" case. If it is not even this long, then
- * this is the "narrow" case, and there is no attempt to align writes to
- * 16-byte boundaries. In the "medium" and "narrow" cases, all the
- * cachelines containing data from the pixel row are prefetched up-front.
- */
-
-/*
- * Determine whether we put the arguments on the stack for debugging.
- */
-#undef DEBUG_PARAMS
-
-/*
- * Bit flags for 'generate_composite_function' macro which are used
- * to tune generated functions behavior.
- */
-.set FLAG_DST_WRITEONLY,         0
-.set FLAG_DST_READWRITE,         1
-.set FLAG_COND_EXEC,             0
-.set FLAG_BRANCH_OVER,           2
-.set FLAG_PROCESS_PRESERVES_PSR, 0
-.set FLAG_PROCESS_CORRUPTS_PSR,  4
-.set FLAG_PROCESS_DOESNT_STORE,  0
-.set FLAG_PROCESS_DOES_STORE,    8 /* usually because it needs to conditionally skip it */
-.set FLAG_NO_SPILL_LINE_VARS,        0
-.set FLAG_SPILL_LINE_VARS_WIDE,      16
-.set FLAG_SPILL_LINE_VARS_NON_WIDE,  32
-.set FLAG_SPILL_LINE_VARS,           48
-.set FLAG_PROCESS_CORRUPTS_SCRATCH,  0
-.set FLAG_PROCESS_PRESERVES_SCRATCH, 64
-.set FLAG_PROCESS_PRESERVES_WK0,     0
-.set FLAG_PROCESS_CORRUPTS_WK0,      128 /* if possible, use the specified register(s) instead so WK0 can hold number of leading pixels */
-.set FLAG_PRELOAD_DST,               0
-.set FLAG_NO_PRELOAD_DST,            256
-
-/*
- * Number of bytes by which to adjust preload offset of destination
- * buffer (allows preload instruction to be moved before the load(s))
- */
-.set DST_PRELOAD_BIAS, 0
-
-/*
- * Offset into stack where mask and source pointer/stride can be accessed.
- */
-#ifdef DEBUG_PARAMS
-.set ARGS_STACK_OFFSET,        (9*4+9*4)
-#else
-.set ARGS_STACK_OFFSET,        (9*4)
-#endif
-
-/*
- * Offset into stack where space allocated during init macro can be accessed.
- */
-.set LOCALS_STACK_OFFSET,     0
-
-/*
- * Constants for selecting preferable prefetch type.
- */
-.set PREFETCH_TYPE_NONE,       0
-.set PREFETCH_TYPE_STANDARD,   1
-
-/*
- * Definitions of macros for load/store of pixel data.
- */
-
-.macro pixldst op, cond=al, numbytes, reg0, reg1, reg2, reg3, base, unaligned=0
- .if numbytes == 16
-  .if unaligned == 1
-        op&r&cond    WK&reg0, [base], #4
-        op&r&cond    WK&reg1, [base], #4
-        op&r&cond    WK&reg2, [base], #4
-        op&r&cond    WK&reg3, [base], #4
-  .else
-        op&m&cond&ia base!, {WK&reg0,WK&reg1,WK&reg2,WK&reg3}
-  .endif
- .elseif numbytes == 8
-  .if unaligned == 1
-        op&r&cond    WK&reg0, [base], #4
-        op&r&cond    WK&reg1, [base], #4
-  .else
-        op&m&cond&ia base!, {WK&reg0,WK&reg1}
-  .endif
- .elseif numbytes == 4
-        op&r&cond    WK&reg0, [base], #4
- .elseif numbytes == 2
-        op&r&cond&h  WK&reg0, [base], #2
- .elseif numbytes == 1
-        op&r&cond&b  WK&reg0, [base], #1
- .else
-  .error "unsupported size: numbytes"
- .endif
-.endm
-
-.macro pixst_baseupdated cond, numbytes, reg0, reg1, reg2, reg3, base
- .if numbytes == 16
-        stm&cond&db base, {WK&reg0,WK&reg1,WK&reg2,WK&reg3}
- .elseif numbytes == 8
-        stm&cond&db base, {WK&reg0,WK&reg1}
- .elseif numbytes == 4
-        str&cond    WK&reg0, [base, #-4]
- .elseif numbytes == 2
-        str&cond&h  WK&reg0, [base, #-2]
- .elseif numbytes == 1
-        str&cond&b  WK&reg0, [base, #-1]
- .else
-  .error "unsupported size: numbytes"
- .endif
-.endm
-
-.macro pixld cond, numbytes, firstreg, base, unaligned
-        pixldst ld, cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base, unaligned
-.endm
-
-.macro pixst cond, numbytes, firstreg, base
- .if (flags) & FLAG_DST_READWRITE
-        pixst_baseupdated cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base
- .else
-        pixldst st, cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base
- .endif
-.endm
-
-.macro PF a, x:vararg
- .if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_STANDARD)
-        a x
- .endif
-.endm
-
-
-.macro preload_leading_step1  bpp, ptr, base
-/* If the destination is already 16-byte aligned, then we need to preload
- * between 0 and prefetch_distance (inclusive) cache lines ahead so there
- * are no gaps when the inner loop starts.
- */
- .if bpp > 0
-        PF  bic,    ptr, base, #31
-  .set OFFSET, 0
-  .rept prefetch_distance+1
-        PF  pld,    [ptr, #OFFSET]
-   .set OFFSET, OFFSET+32
-  .endr
- .endif
-.endm
-
-.macro preload_leading_step2  bpp, bpp_shift, ptr, base
-/* However, if the destination is not 16-byte aligned, we may need to
- * preload more cache lines than that. The question we need to ask is:
- * are the bytes corresponding to the leading pixels more than the amount
- * by which the source pointer will be rounded down for preloading, and if
- * so, by how many cache lines? Effectively, we want to calculate
- *     leading_bytes = ((-dst)&15)*src_bpp/dst_bpp
- *     inner_loop_offset = (src+leading_bytes)&31
- *     extra_needed = leading_bytes - inner_loop_offset
- * and test if extra_needed is <= 0, <= 32, or > 32 (where > 32 is only
- * possible when there are 4 src bytes for every 1 dst byte).
- */
- .if bpp > 0
-  .ifc base,DST
-        /* The test can be simplified further when preloading the destination */
-        PF  tst,    base, #16
-        PF  beq,    61f
-  .else
-   .if bpp/dst_w_bpp == 4
-        PF  add,    SCRATCH, base, WK0, lsl #bpp_shift-dst_bpp_shift
-        PF  and,    SCRATCH, SCRATCH, #31
-        PF  rsb,    SCRATCH, SCRATCH, WK0, lsl #bpp_shift-dst_bpp_shift
-        PF  sub,    SCRATCH, SCRATCH, #1        /* so now ranges are -16..-1 / 0..31 / 32..63 */
-        PF  movs,   SCRATCH, SCRATCH, lsl #32-6 /* so this sets         NC   /  nc   /   Nc   */
-        PF  bcs,    61f
-        PF  bpl,    60f
-        PF  pld,    [ptr, #32*(prefetch_distance+2)]
-   .else
-        PF  mov,    SCRATCH, base, lsl #32-5
-        PF  add,    SCRATCH, SCRATCH, WK0, lsl #32-5+bpp_shift-dst_bpp_shift
-        PF  rsbs,   SCRATCH, SCRATCH, WK0, lsl #32-5+bpp_shift-dst_bpp_shift
-        PF  bls,    61f
-   .endif
-  .endif
-60:     PF  pld,    [ptr, #32*(prefetch_distance+1)]
-61:
- .endif
-.endm
-
-#define IS_END_OF_GROUP(INDEX,SIZE) ((SIZE) < 2 || ((INDEX) & ~((INDEX)+1)) & ((SIZE)/2))
-.macro preload_middle   bpp, base, scratch_holds_offset
- .if bpp > 0
-        /* prefetch distance = 256/bpp, stm distance = 128/dst_w_bpp */
-  .if IS_END_OF_GROUP(SUBBLOCK,256/128*dst_w_bpp/bpp)
-   .if scratch_holds_offset
-        PF  pld,    [base, SCRATCH]
-   .else
-        PF  bic,    SCRATCH, base, #31
-        PF  pld,    [SCRATCH, #32*prefetch_distance]
-   .endif
-  .endif
- .endif
-.endm
-
-.macro preload_trailing  bpp, bpp_shift, base
- .if bpp > 0
-  .if bpp*pix_per_block > 256
-        /* Calculations are more complex if more than one fetch per block */
-        PF  and,    WK1, base, #31
-        PF  add,    WK1, WK1, WK0, lsl #bpp_shift
-        PF  add,    WK1, WK1, #32*(bpp*pix_per_block/256-1)*(prefetch_distance+1)
-        PF  bic,    SCRATCH, base, #31
-80:     PF  pld,    [SCRATCH, #32*(prefetch_distance+1)]
-        PF  add,    SCRATCH, SCRATCH, #32
-        PF  subs,   WK1, WK1, #32
-        PF  bhi,    80b
-  .else
-        /* If exactly one fetch per block, then we need either 0, 1 or 2 extra preloads */
-        PF  mov,    SCRATCH, base, lsl #32-5
-        PF  adds,   SCRATCH, SCRATCH, X, lsl #32-5+bpp_shift
-        PF  adceqs, SCRATCH, SCRATCH, #0
-        /* The instruction above has two effects: ensures Z is only
-         * set if C was clear (so Z indicates that both shifted quantities
-         * were 0), and clears C if Z was set (so C indicates that the sum
-         * of the shifted quantities was greater and not equal to 32) */
-        PF  beq,    82f
-        PF  bic,    SCRATCH, base, #31
-        PF  bcc,    81f
-        PF  pld,    [SCRATCH, #32*(prefetch_distance+2)]
-81:     PF  pld,    [SCRATCH, #32*(prefetch_distance+1)]
-82:
-  .endif
- .endif
-.endm
-
-
-.macro preload_line    narrow_case, bpp, bpp_shift, base
-/* "narrow_case" - just means that the macro was invoked from the "narrow"
- *    code path rather than the "medium" one - because in the narrow case,
- *    the row of pixels is known to output no more than 30 bytes, then
- *    (assuming the source pixels are no wider than the the destination
- *    pixels) they cannot possibly straddle more than 2 32-byte cachelines,
- *    meaning there's no need for a loop.
- * "bpp" - number of bits per pixel in the channel (source, mask or
- *    destination) that's being preloaded, or 0 if this channel is not used
- *    for reading
- * "bpp_shift" - log2 of ("bpp"/8) (except if "bpp"=0 of course)
- * "base" - base address register of channel to preload (SRC, MASK or DST)
- */
- .if bpp > 0
-  .if narrow_case && (bpp <= dst_w_bpp)
-        /* In these cases, each line for each channel is in either 1 or 2 cache lines */
-        PF  bic,    WK0, base, #31
-        PF  pld,    [WK0]
-        PF  add,    WK1, base, X, LSL #bpp_shift
-        PF  sub,    WK1, WK1, #1
-        PF  bic,    WK1, WK1, #31
-        PF  cmp,    WK1, WK0
-        PF  beq,    90f
-        PF  pld,    [WK1]
-90:
-  .else
-        PF  bic,    WK0, base, #31
-        PF  pld,    [WK0]
-        PF  add,    WK1, base, X, lsl #bpp_shift
-        PF  sub,    WK1, WK1, #1
-        PF  bic,    WK1, WK1, #31
-        PF  cmp,    WK1, WK0
-        PF  beq,    92f
-91:     PF  add,    WK0, WK0, #32
-        PF  cmp,    WK0, WK1
-        PF  pld,    [WK0]
-        PF  bne,    91b
-92:
-  .endif
- .endif
-.endm
-
-
-.macro conditional_process1_helper  cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx
-        process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0
- .if decrementx
-        sub&cond X, X, #8*numbytes/dst_w_bpp
- .endif
-        process_tail  cond, numbytes, firstreg
- .if !((flags) & FLAG_PROCESS_DOES_STORE)
-        pixst   cond, numbytes, firstreg, DST
- .endif
-.endm
-
-.macro conditional_process1  cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx
- .if (flags) & FLAG_BRANCH_OVER
-  .ifc cond,mi
-        bpl     100f
-  .endif
-  .ifc cond,cs
-        bcc     100f
-  .endif
-  .ifc cond,ne
-        beq     100f
-  .endif
-        conditional_process1_helper  , process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx
-100:
- .else
-        conditional_process1_helper  cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx
- .endif
-.endm
-
-.macro conditional_process2  test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx
- .if (flags) & (FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE)
-        /* Can't interleave reads and writes */
-        test
-        conditional_process1  cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx
-  .if (flags) & FLAG_PROCESS_CORRUPTS_PSR
-        test
-  .endif
-        conditional_process1  cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx
- .else
-        /* Can interleave reads and writes for better scheduling */
-        test
-        process_head  cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0
-        process_head  cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0
-  .if decrementx
-        sub&cond1 X, X, #8*numbytes1/dst_w_bpp
-        sub&cond2 X, X, #8*numbytes2/dst_w_bpp
-  .endif
-        process_tail  cond1, numbytes1, firstreg1
-        process_tail  cond2, numbytes2, firstreg2
-        pixst   cond1, numbytes1, firstreg1, DST
-        pixst   cond2, numbytes2, firstreg2, DST
- .endif
-.endm
-
-
-.macro test_bits_1_0_ptr
- .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
-        movs    SCRATCH, X, lsl #32-1  /* C,N = bits 1,0 of DST */
- .else
-        movs    SCRATCH, WK0, lsl #32-1  /* C,N = bits 1,0 of DST */
- .endif
-.endm
-
-.macro test_bits_3_2_ptr
- .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
-        movs    SCRATCH, X, lsl #32-3  /* C,N = bits 3, 2 of DST */
- .else
-        movs    SCRATCH, WK0, lsl #32-3  /* C,N = bits 3, 2 of DST */
- .endif
-.endm
-
-.macro leading_15bytes  process_head, process_tail
-        /* On entry, WK0 bits 0-3 = number of bytes until destination is 16-byte aligned */
- .set DECREMENT_X, 1
- .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
-  .set DECREMENT_X, 0
-        sub     X, X, WK0, lsr #dst_bpp_shift
-        str     X, [sp, #LINE_SAVED_REG_COUNT*4]
-        mov     X, WK0
- .endif
-        /* Use unaligned loads in all cases for simplicity */
- .if dst_w_bpp == 8
-        conditional_process2  test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X
- .elseif dst_w_bpp == 16
-        test_bits_1_0_ptr
-        conditional_process1  cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X
- .endif
-        conditional_process2  test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X
- .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
-        ldr     X, [sp, #LINE_SAVED_REG_COUNT*4]
- .endif
-.endm
-
-.macro test_bits_3_2_pix
-        movs    SCRATCH, X, lsl #dst_bpp_shift+32-3
-.endm
-
-.macro test_bits_1_0_pix
- .if dst_w_bpp == 8
-        movs    SCRATCH, X, lsl #dst_bpp_shift+32-1
- .else
-        movs    SCRATCH, X, lsr #1
- .endif
-.endm
-
-.macro trailing_15bytes  process_head, process_tail, unaligned_src, unaligned_mask
-        conditional_process2  test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0
- .if dst_w_bpp == 16
-        test_bits_1_0_pix
-        conditional_process1  cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0
- .elseif dst_w_bpp == 8
-        conditional_process2  test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0
- .endif
-.endm
-
-
-.macro wide_case_inner_loop  process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment
-110:
- .set SUBBLOCK, 0 /* this is a count of STMs; there can be up to 8 STMs per block */
- .rept pix_per_block*dst_w_bpp/128
-        process_head  , 16, 0, unaligned_src, unaligned_mask, 1
-  .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
-        preload_middle  src_bpp, SRC, 1
-  .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
-        preload_middle  mask_bpp, MASK, 1
-  .else
-        preload_middle  src_bpp, SRC, 0
-        preload_middle  mask_bpp, MASK, 0
-  .endif
-  .if (dst_r_bpp > 0) && ((SUBBLOCK % 2) == 0) && (((flags) & FLAG_NO_PRELOAD_DST) == 0)
-        /* Because we know that writes are 16-byte aligned, it's relatively easy to ensure that
-         * destination prefetches are 32-byte aligned. It's also the easiest channel to offset
-         * preloads for, to achieve staggered prefetches for multiple channels, because there are
-         * always two STMs per prefetch, so there is always an opposite STM on which to put the
-         * preload. Note, no need to BIC the base register here */
-        PF  pld,    [DST, #32*prefetch_distance - dst_alignment]
-  .endif
-        process_tail  , 16, 0
-  .if !((flags) & FLAG_PROCESS_DOES_STORE)
-        pixst   , 16, 0, DST
-  .endif
-  .set SUBBLOCK, SUBBLOCK+1
- .endr
-        subs    X, X, #pix_per_block
-        bhs     110b
-.endm
-
-.macro wide_case_inner_loop_and_trailing_pixels  process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask
-        /* Destination now 16-byte aligned; we have at least one block before we have to stop preloading */
- .if dst_r_bpp > 0
-        tst     DST, #16
-        bne     111f
-        process_inner_loop  process_head, process_tail, unaligned_src, unaligned_mask, 16 + DST_PRELOAD_BIAS
-        b       112f
-111:
- .endif
-        process_inner_loop  process_head, process_tail, unaligned_src, unaligned_mask, 0 + DST_PRELOAD_BIAS
-112:
-        /* Just before the final (prefetch_distance+1) 32-byte blocks, deal with final preloads */
- .if (src_bpp*pix_per_block > 256) || (mask_bpp*pix_per_block > 256) || (dst_r_bpp*pix_per_block > 256)
-        PF  and,    WK0, X, #pix_per_block-1
- .endif
-        preload_trailing  src_bpp, src_bpp_shift, SRC
-        preload_trailing  mask_bpp, mask_bpp_shift, MASK
- .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
-        preload_trailing  dst_r_bpp, dst_bpp_shift, DST
- .endif
-        add     X, X, #(prefetch_distance+2)*pix_per_block - 128/dst_w_bpp
-        /* The remainder of the line is handled identically to the medium case */
-        medium_case_inner_loop_and_trailing_pixels  process_head, process_tail,, exit_label, unaligned_src, unaligned_mask
-.endm
-
-.macro medium_case_inner_loop_and_trailing_pixels  process_head, process_tail, unused, exit_label, unaligned_src, unaligned_mask
-120:
-        process_head  , 16, 0, unaligned_src, unaligned_mask, 0
-        process_tail  , 16, 0
- .if !((flags) & FLAG_PROCESS_DOES_STORE)
-        pixst   , 16, 0, DST
- .endif
-        subs    X, X, #128/dst_w_bpp
-        bhs     120b
-        /* Trailing pixels */
-        tst     X, #128/dst_w_bpp - 1
-        beq     exit_label
-        trailing_15bytes  process_head, process_tail, unaligned_src, unaligned_mask
-.endm
-
-.macro narrow_case_inner_loop_and_trailing_pixels  process_head, process_tail, unused, exit_label, unaligned_src, unaligned_mask
-        tst     X, #16*8/dst_w_bpp
-        conditional_process1  ne, process_head, process_tail, 16, 0, unaligned_src, unaligned_mask, 0
-        /* Trailing pixels */
-        /* In narrow case, it's relatively unlikely to be aligned, so let's do without a branch here */
-        trailing_15bytes  process_head, process_tail, unaligned_src, unaligned_mask
-.endm
-
-.macro switch_on_alignment  action, process_head, process_tail, process_inner_loop, exit_label
- /* Note that if we're reading the destination, it's already guaranteed to be aligned at this point */
- .if mask_bpp == 8 || mask_bpp == 16
-        tst     MASK, #3
-        bne     141f
- .endif
-  .if src_bpp == 8 || src_bpp == 16
-        tst     SRC, #3
-        bne     140f
-  .endif
-        action  process_head, process_tail, process_inner_loop, exit_label, 0, 0
-  .if src_bpp == 8 || src_bpp == 16
-        b       exit_label
-140:
-        action  process_head, process_tail, process_inner_loop, exit_label, 1, 0
-  .endif
- .if mask_bpp == 8 || mask_bpp == 16
-        b       exit_label
-141:
-  .if src_bpp == 8 || src_bpp == 16
-        tst     SRC, #3
-        bne     142f
-  .endif
-        action  process_head, process_tail, process_inner_loop, exit_label, 0, 1
-  .if src_bpp == 8 || src_bpp == 16
-        b       exit_label
-142:
-        action  process_head, process_tail, process_inner_loop, exit_label, 1, 1
-  .endif
- .endif
-.endm
-
-
-.macro end_of_line      restore_x, vars_spilled, loop_label, last_one
- .if vars_spilled
-        /* Sadly, GAS doesn't seem have an equivalent of the DCI directive? */
-        /* This is ldmia sp,{} */
-        .word   0xE89D0000 | LINE_SAVED_REGS
- .endif
-        subs    Y, Y, #1
- .if vars_spilled
-  .if (LINE_SAVED_REGS) & (1<<1)
-        str     Y, [sp]
-  .endif
- .endif
-        add     DST, DST, STRIDE_D
- .if src_bpp > 0
-        add     SRC, SRC, STRIDE_S
- .endif
- .if mask_bpp > 0
-        add     MASK, MASK, STRIDE_M
- .endif
- .if restore_x
-        mov     X, ORIG_W
- .endif
-        bhs     loop_label
- .ifc "last_one",""
-  .if vars_spilled
-        b       197f
-  .else
-        b       198f
-  .endif
- .else
-  .if (!vars_spilled) && ((flags) & FLAG_SPILL_LINE_VARS)
-        b       198f
-  .endif
- .endif
-.endm
-
-
-.macro generate_composite_function fname, \
-                                   src_bpp_, \
-                                   mask_bpp_, \
-                                   dst_w_bpp_, \
-                                   flags_, \
-                                   prefetch_distance_, \
-                                   init, \
-                                   newline, \
-                                   cleanup, \
-                                   process_head, \
-                                   process_tail, \
-                                   process_inner_loop
-
-    pixman_asm_function fname
-
-/*
- * Make some macro arguments globally visible and accessible
- * from other macros
- */
- .set src_bpp, src_bpp_
- .set mask_bpp, mask_bpp_
- .set dst_w_bpp, dst_w_bpp_
- .set flags, flags_
- .set prefetch_distance, prefetch_distance_
-
-/*
- * Select prefetch type for this function.
- */
- .if prefetch_distance == 0
-  .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
- .else
-  .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_STANDARD
- .endif
-
- .if src_bpp == 32
-  .set src_bpp_shift, 2
- .elseif src_bpp == 24
-  .set src_bpp_shift, 0
- .elseif src_bpp == 16
-  .set src_bpp_shift, 1
- .elseif src_bpp == 8
-  .set src_bpp_shift, 0
- .elseif src_bpp == 0
-  .set src_bpp_shift, -1
- .else
-  .error "requested src bpp (src_bpp) is not supported"
- .endif
-
- .if mask_bpp == 32
-  .set mask_bpp_shift, 2
- .elseif mask_bpp == 24
-  .set mask_bpp_shift, 0
- .elseif mask_bpp == 8
-  .set mask_bpp_shift, 0
- .elseif mask_bpp == 0
-  .set mask_bpp_shift, -1
- .else
-  .error "requested mask bpp (mask_bpp) is not supported"
- .endif
-
- .if dst_w_bpp == 32
-  .set dst_bpp_shift, 2
- .elseif dst_w_bpp == 24
-  .set dst_bpp_shift, 0
- .elseif dst_w_bpp == 16
-  .set dst_bpp_shift, 1
- .elseif dst_w_bpp == 8
-  .set dst_bpp_shift, 0
- .else
-  .error "requested dst bpp (dst_w_bpp) is not supported"
- .endif
-
- .if (((flags) & FLAG_DST_READWRITE) != 0)
-  .set dst_r_bpp, dst_w_bpp
- .else
-  .set dst_r_bpp, 0
- .endif
-
- .set pix_per_block, 16*8/dst_w_bpp
- .if src_bpp != 0
-  .if 32*8/src_bpp > pix_per_block
-   .set pix_per_block, 32*8/src_bpp
-  .endif
- .endif
- .if mask_bpp != 0
-  .if 32*8/mask_bpp > pix_per_block
-   .set pix_per_block, 32*8/mask_bpp
-  .endif
- .endif
- .if dst_r_bpp != 0
-  .if 32*8/dst_r_bpp > pix_per_block
-   .set pix_per_block, 32*8/dst_r_bpp
-  .endif
- .endif
-
-/* The standard entry conditions set up by pixman-arm-common.h are:
- * r0 = width (pixels)
- * r1 = height (rows)
- * r2 = pointer to top-left pixel of destination
- * r3 = destination stride (pixels)
- * [sp] = source pixel value, or pointer to top-left pixel of source
- * [sp,#4] = 0 or source stride (pixels)
- * The following arguments are unused for non-mask operations
- * [sp,#8] = mask pixel value, or pointer to top-left pixel of mask
- * [sp,#12] = 0 or mask stride (pixels)
- */
-
-/*
- * Assign symbolic names to registers
- */
-    X           .req    r0  /* pixels to go on this line */
-    Y           .req    r1  /* lines to go */
-    DST         .req    r2  /* destination pixel pointer */
-    STRIDE_D    .req    r3  /* destination stride (bytes, minus width) */
-    SRC         .req    r4  /* source pixel pointer */
-    STRIDE_S    .req    r5  /* source stride (bytes, minus width) */
-    MASK        .req    r6  /* mask pixel pointer (if applicable) */
-    STRIDE_M    .req    r7  /* mask stride (bytes, minus width) */
-    WK0         .req    r8  /* pixel data registers */
-    WK1         .req    r9
-    WK2         .req    r10
-    WK3         .req    r11
-    SCRATCH     .req    r12
-    ORIG_W      .req    r14 /* width (pixels) */
-
-        push    {r4-r11, lr}        /* save all registers */
-
-        subs    Y, Y, #1
-        blo     199f
-
-#ifdef DEBUG_PARAMS
-        sub     sp, sp, #9*4
-#endif
-
- .if src_bpp > 0
-        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
-        ldr     STRIDE_S, [sp, #ARGS_STACK_OFFSET+4]
- .endif
- .if mask_bpp > 0
-        ldr     MASK, [sp, #ARGS_STACK_OFFSET+8]
-        ldr     STRIDE_M, [sp, #ARGS_STACK_OFFSET+12]
- .endif
-        
-#ifdef DEBUG_PARAMS
-        add     Y, Y, #1
-        stmia   sp, {r0-r7,pc}
-        sub     Y, Y, #1
-#endif
-
-        init
-
- .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
-        /* Reserve a word in which to store X during leading pixels */
-        sub     sp, sp, #4
-  .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET+4
-  .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET+4
- .endif
-        
-        lsl     STRIDE_D, #dst_bpp_shift /* stride in bytes */
-        sub     STRIDE_D, STRIDE_D, X, lsl #dst_bpp_shift
- .if src_bpp > 0
-        lsl     STRIDE_S, #src_bpp_shift
-        sub     STRIDE_S, STRIDE_S, X, lsl #src_bpp_shift
- .endif
- .if mask_bpp > 0
-        lsl     STRIDE_M, #mask_bpp_shift
-        sub     STRIDE_M, STRIDE_M, X, lsl #mask_bpp_shift
- .endif
- 
-        /* Are we not even wide enough to have one 16-byte aligned 16-byte block write? */
-        cmp     X, #2*16*8/dst_w_bpp - 1
-        blo     170f
- .if src_bpp || mask_bpp || dst_r_bpp /* Wide and medium cases are the same for fill */
-        /* To preload ahead on the current line, we need at least (prefetch_distance+2) 32-byte blocks on all prefetch channels */
-        cmp     X, #(prefetch_distance+3)*pix_per_block - 1
-        blo     160f
-
-        /* Wide case */
-        /* Adjust X so that the decrement instruction can also test for
-         * inner loop termination. We want it to stop when there are
-         * (prefetch_distance+1) complete blocks to go. */
-        sub     X, X, #(prefetch_distance+2)*pix_per_block
-        mov     ORIG_W, X
-  .if (flags) & FLAG_SPILL_LINE_VARS_WIDE
-        /* This is stmdb sp!,{} */
-        .word   0xE92D0000 | LINE_SAVED_REGS
-   .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
-   .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
-  .endif
-151:    /* New line */
-        newline
-        preload_leading_step1  src_bpp, WK1, SRC
-        preload_leading_step1  mask_bpp, WK2, MASK
-  .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
-        preload_leading_step1  dst_r_bpp, WK3, DST
-  .endif
-        
-        ands    WK0, DST, #15
-        beq     154f
-        rsb     WK0, WK0, #16 /* number of leading bytes until destination aligned */
-
-        preload_leading_step2  src_bpp, src_bpp_shift, WK1, SRC
-        preload_leading_step2  mask_bpp, mask_bpp_shift, WK2, MASK
-  .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
-        preload_leading_step2  dst_r_bpp, dst_bpp_shift, WK3, DST
-  .endif
-
-        leading_15bytes  process_head, process_tail
-        
-154:    /* Destination now 16-byte aligned; we have at least one prefetch on each channel as well as at least one 16-byte output block */
-  .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
-        and     SCRATCH, SRC, #31
-        rsb     SCRATCH, SCRATCH, #32*prefetch_distance
-  .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
-        and     SCRATCH, MASK, #31
-        rsb     SCRATCH, SCRATCH, #32*prefetch_distance
-  .endif
-  .ifc "process_inner_loop",""
-        switch_on_alignment  wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, wide_case_inner_loop, 157f
-  .else
-        switch_on_alignment  wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, process_inner_loop, 157f
-  .endif
-
-157:    /* Check for another line */
-        end_of_line 1, %((flags) & FLAG_SPILL_LINE_VARS_WIDE), 151b
-  .if (flags) & FLAG_SPILL_LINE_VARS_WIDE
-   .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
-   .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
-  .endif
- .endif
-
- .ltorg
-
-160:    /* Medium case */
-        mov     ORIG_W, X
- .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE
-        /* This is stmdb sp!,{} */
-        .word   0xE92D0000 | LINE_SAVED_REGS
-  .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
-  .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
- .endif
-161:    /* New line */
-        newline
-        preload_line 0, src_bpp, src_bpp_shift, SRC  /* in: X, corrupts: WK0-WK1 */
-        preload_line 0, mask_bpp, mask_bpp_shift, MASK
- .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
-        preload_line 0, dst_r_bpp, dst_bpp_shift, DST
- .endif
-        
-        sub     X, X, #128/dst_w_bpp     /* simplifies inner loop termination */
-        ands    WK0, DST, #15
-        beq     164f
-        rsb     WK0, WK0, #16 /* number of leading bytes until destination aligned */
-        
-        leading_15bytes  process_head, process_tail
-        
-164:    /* Destination now 16-byte aligned; we have at least one 16-byte output block */
-        switch_on_alignment  medium_case_inner_loop_and_trailing_pixels, process_head, process_tail,, 167f
-        
-167:    /* Check for another line */
-        end_of_line 1, %((flags) & FLAG_SPILL_LINE_VARS_NON_WIDE), 161b
-
- .ltorg
-
-170:    /* Narrow case, less than 31 bytes, so no guarantee of at least one 16-byte block */
- .if dst_w_bpp < 32
-        mov     ORIG_W, X
- .endif
- .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE
-        /* This is stmdb sp!,{} */
-        .word   0xE92D0000 | LINE_SAVED_REGS
- .endif
-171:    /* New line */
-        newline
-        preload_line 1, src_bpp, src_bpp_shift, SRC  /* in: X, corrupts: WK0-WK1 */
-        preload_line 1, mask_bpp, mask_bpp_shift, MASK
- .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
-        preload_line 1, dst_r_bpp, dst_bpp_shift, DST
- .endif
-        
- .if dst_w_bpp == 8
-        tst     DST, #3
-        beq     174f
-172:    subs    X, X, #1
-        blo     177f
-        process_head  , 1, 0, 1, 1, 0
-        process_tail  , 1, 0
-  .if !((flags) & FLAG_PROCESS_DOES_STORE)
-        pixst   , 1, 0, DST
-  .endif
-        tst     DST, #3
-        bne     172b
- .elseif dst_w_bpp == 16
-        tst     DST, #2
-        beq     174f
-        subs    X, X, #1
-        blo     177f
-        process_head  , 2, 0, 1, 1, 0
-        process_tail  , 2, 0
-  .if !((flags) & FLAG_PROCESS_DOES_STORE)
-        pixst   , 2, 0, DST
-  .endif
- .endif
-
-174:    /* Destination now 4-byte aligned; we have 0 or more output bytes to go */
-        switch_on_alignment  narrow_case_inner_loop_and_trailing_pixels, process_head, process_tail,, 177f
-
-177:    /* Check for another line */
-        end_of_line %(dst_w_bpp < 32), %((flags) & FLAG_SPILL_LINE_VARS_NON_WIDE), 171b, last_one
- .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE
-  .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
-  .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
- .endif
-
-197:
- .if (flags) & FLAG_SPILL_LINE_VARS
-        add     sp, sp, #LINE_SAVED_REG_COUNT*4
- .endif
-198:
- .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
-  .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET-4
-  .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET-4
-        add     sp, sp, #4
- .endif
-
-        cleanup
-
-#ifdef DEBUG_PARAMS
-        add     sp, sp, #9*4 /* junk the debug copy of arguments */
-#endif
-199:
-        pop     {r4-r11, pc}  /* exit */
-
- .ltorg
-
-    .unreq  X
-    .unreq  Y
-    .unreq  DST
-    .unreq  STRIDE_D
-    .unreq  SRC
-    .unreq  STRIDE_S
-    .unreq  MASK
-    .unreq  STRIDE_M
-    .unreq  WK0
-    .unreq  WK1
-    .unreq  WK2
-    .unreq  WK3
-    .unreq  SCRATCH
-    .unreq  ORIG_W
-    .endfunc
-.endm
-
-.macro line_saved_regs  x:vararg
- .set LINE_SAVED_REGS, 0
- .set LINE_SAVED_REG_COUNT, 0
- .irp SAVED_REG,x
-  .ifc "SAVED_REG","Y"
-   .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<1)
-   .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1
-  .endif
-  .ifc "SAVED_REG","STRIDE_D"
-   .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<3)
-   .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1
-  .endif
-  .ifc "SAVED_REG","STRIDE_S"
-   .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<5)
-   .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1
-  .endif
-  .ifc "SAVED_REG","STRIDE_M"
-   .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<7)
-   .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1
-  .endif
-  .ifc "SAVED_REG","ORIG_W"
-   .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<14)
-   .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1
-  .endif
- .endr
-.endm
-
-.macro nop_macro x:vararg
-.endm
diff --git a/vendor/pixman/pixman/pixman-arm-simd.c b/vendor/pixman/pixman/pixman-arm-simd.c
deleted file mode 100644
index 40f3a9759..000000000
--- a/vendor/pixman/pixman/pixman-arm-simd.c
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Copyright © 2008 Mozilla Corporation
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Mozilla Corporation not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Mozilla Corporation makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author:  Jeff Muizelaar (jeff@infidigm.net)
- *
- */
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include "pixman-private.h"
-#include "pixman-arm-common.h"
-#include "pixman-inlines.h"
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_8888_8888,
-		                   uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_x888_8888,
-                                   uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_0565,
-                                   uint16_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_8_8,
-                                   uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_8888,
-                                   uint16_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_x888_0565,
-                                   uint32_t, 1, uint16_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
-                                   uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
-                                   uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, in_reverse_8888_8888,
-                                   uint32_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, armv6, over_n_8888,
-                                 uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, over_reverse_n_8888,
-                                 uint32_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
-                                     uint32_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888,
-                                      uint8_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8888_8888_ca,
-                                      uint32_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC,
-                                        uint16_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC,
-                                        uint32_t, uint32_t)
-
-void
-pixman_composite_src_n_8888_asm_armv6 (int32_t   w,
-                                       int32_t   h,
-                                       uint32_t *dst,
-                                       int32_t   dst_stride,
-                                       uint32_t  src);
-
-void
-pixman_composite_src_n_0565_asm_armv6 (int32_t   w,
-                                       int32_t   h,
-                                       uint16_t *dst,
-                                       int32_t   dst_stride,
-                                       uint16_t  src);
-
-void
-pixman_composite_src_n_8_asm_armv6 (int32_t   w,
-                                    int32_t   h,
-                                    uint8_t  *dst,
-                                    int32_t   dst_stride,
-                                    uint8_t  src);
-
-static pixman_bool_t
-arm_simd_fill (pixman_implementation_t *imp,
-               uint32_t *               bits,
-               int                      stride, /* in 32-bit words */
-               int                      bpp,
-               int                      x,
-               int                      y,
-               int                      width,
-               int                      height,
-               uint32_t                 _xor)
-{
-    /* stride is always multiple of 32bit units in pixman */
-    uint32_t byte_stride = stride * sizeof(uint32_t);
-
-    switch (bpp)
-    {
-    case 8:
-	pixman_composite_src_n_8_asm_armv6 (
-		width,
-		height,
-		(uint8_t *)(((char *) bits) + y * byte_stride + x),
-		byte_stride,
-		_xor & 0xff);
-	return TRUE;
-    case 16:
-	pixman_composite_src_n_0565_asm_armv6 (
-		width,
-		height,
-		(uint16_t *)(((char *) bits) + y * byte_stride + x * 2),
-		byte_stride / 2,
-		_xor & 0xffff);
-	return TRUE;
-    case 32:
-	pixman_composite_src_n_8888_asm_armv6 (
-		width,
-		height,
-		(uint32_t *)(((char *) bits) + y * byte_stride + x * 4),
-		byte_stride / 4,
-		_xor);
-	return TRUE;
-    default:
-	return FALSE;
-    }
-}
-
-static pixman_bool_t
-arm_simd_blt (pixman_implementation_t *imp,
-              uint32_t *               src_bits,
-              uint32_t *               dst_bits,
-              int                      src_stride, /* in 32-bit words */
-              int                      dst_stride, /* in 32-bit words */
-              int                      src_bpp,
-              int                      dst_bpp,
-              int                      src_x,
-              int                      src_y,
-              int                      dest_x,
-              int                      dest_y,
-              int                      width,
-              int                      height)
-{
-    if (src_bpp != dst_bpp)
-	return FALSE;
-
-    switch (src_bpp)
-    {
-    case 8:
-        pixman_composite_src_8_8_asm_armv6 (
-                width, height,
-                (uint8_t *)(((char *) dst_bits) +
-                dest_y * dst_stride * 4 + dest_x * 1), dst_stride * 4,
-                (uint8_t *)(((char *) src_bits) +
-                src_y * src_stride * 4 + src_x * 1), src_stride * 4);
-        return TRUE;
-    case 16:
-	pixman_composite_src_0565_0565_asm_armv6 (
-		width, height,
-		(uint16_t *)(((char *) dst_bits) +
-		dest_y * dst_stride * 4 + dest_x * 2), dst_stride * 2,
-		(uint16_t *)(((char *) src_bits) +
-		src_y * src_stride * 4 + src_x * 2), src_stride * 2);
-	return TRUE;
-    case 32:
-	pixman_composite_src_8888_8888_asm_armv6 (
-		width, height,
-		(uint32_t *)(((char *) dst_bits) +
-		dest_y * dst_stride * 4 + dest_x * 4), dst_stride,
-		(uint32_t *)(((char *) src_bits) +
-		src_y * src_stride * 4 + src_x * 4), src_stride);
-	return TRUE;
-    default:
-	return FALSE;
-    }
-}
-
-static const pixman_fast_path_t arm_simd_fast_paths[] =
-{
-    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, armv6_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, armv6_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, armv6_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, armv6_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, armv6_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, armv6_composite_src_8888_8888),
-
-    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, armv6_composite_src_x888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, armv6_composite_src_x888_8888),
-
-    PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, armv6_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, armv6_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, a1r5g5b5, armv6_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC, a1b5g5r5, null, a1b5g5r5, armv6_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, armv6_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC, a1b5g5r5, null, x1b5g5r5, armv6_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, armv6_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC, x1b5g5r5, null, x1b5g5r5, armv6_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC, a4r4g4b4, null, a4r4g4b4, armv6_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC, a4b4g4r4, null, a4b4g4r4, armv6_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC, a4r4g4b4, null, x4r4g4b4, armv6_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC, a4b4g4r4, null, x4b4g4r4, armv6_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC, x4r4g4b4, null, x4r4g4b4, armv6_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC, x4b4g4r4, null, x4b4g4r4, armv6_composite_src_0565_0565),
-
-    PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, armv6_composite_src_8_8),
-    PIXMAN_STD_FAST_PATH (SRC, r3g3b2, null, r3g3b2, armv6_composite_src_8_8),
-    PIXMAN_STD_FAST_PATH (SRC, b2g3r3, null, b2g3r3, armv6_composite_src_8_8),
-    PIXMAN_STD_FAST_PATH (SRC, a2r2g2b2, null, a2r2g2b2, armv6_composite_src_8_8),
-    PIXMAN_STD_FAST_PATH (SRC, a2b2g2r2, null, a2b2g2r2, armv6_composite_src_8_8),
-    PIXMAN_STD_FAST_PATH (SRC, c8, null, c8, armv6_composite_src_8_8),
-    PIXMAN_STD_FAST_PATH (SRC, g8, null, g8, armv6_composite_src_8_8),
-    PIXMAN_STD_FAST_PATH (SRC, x4a4, null, x4a4, armv6_composite_src_8_8),
-    PIXMAN_STD_FAST_PATH (SRC, x4c4, null, x4c4, armv6_composite_src_8_8),
-    PIXMAN_STD_FAST_PATH (SRC, x4g4, null, x4g4, armv6_composite_src_8_8),
-
-    PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, armv6_composite_src_0565_8888),
-    PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, armv6_composite_src_0565_8888),
-    PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, armv6_composite_src_0565_8888),
-    PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, armv6_composite_src_0565_8888),
-
-    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, armv6_composite_src_x888_0565),
-    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, armv6_composite_src_x888_0565),
-    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, armv6_composite_src_x888_0565),
-    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, armv6_composite_src_x888_0565),
-
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, armv6_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, armv6_composite_over_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, armv6_composite_over_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
-
-    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, armv6_composite_over_reverse_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, armv6_composite_over_reverse_n_8888),
-
-    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8),
-
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888),
-
-    PIXMAN_STD_FAST_PATH (IN_REVERSE, a8r8g8b8, null, a8r8g8b8, armv6_composite_in_reverse_8888_8888),
-    PIXMAN_STD_FAST_PATH (IN_REVERSE, a8r8g8b8, null, x8r8g8b8, armv6_composite_in_reverse_8888_8888),
-    PIXMAN_STD_FAST_PATH (IN_REVERSE, a8b8g8r8, null, a8b8g8r8, armv6_composite_in_reverse_8888_8888),
-    PIXMAN_STD_FAST_PATH (IN_REVERSE, a8b8g8r8, null, x8b8g8r8, armv6_composite_in_reverse_8888_8888),
-
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, armv6_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, armv6_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, armv6_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, armv6_composite_over_n_8888_8888_ca),
-
-    SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565),
-    SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565),
-
-    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888),
-
-    { PIXMAN_OP_NONE },
-};
-
-pixman_implementation_t *
-_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback)
-{
-    pixman_implementation_t *imp = _pixman_implementation_create (fallback, arm_simd_fast_paths);
-
-    imp->blt = arm_simd_blt;
-    imp->fill = arm_simd_fill;
-
-    return imp;
-}
diff --git a/vendor/pixman/pixman/pixman-arm.c b/vendor/pixman/pixman/pixman-arm.c
deleted file mode 100644
index 288172b62..000000000
--- a/vendor/pixman/pixman/pixman-arm.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  SuSE makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include "pixman-private.h"
-
-typedef enum
-{
-    ARM_V7		= (1 << 0),
-    ARM_V6		= (1 << 1),
-    ARM_VFP		= (1 << 2),
-    ARM_NEON		= (1 << 3),
-    ARM_IWMMXT		= (1 << 4)
-} arm_cpu_features_t;
-
-#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT)
-
-#if defined(_MSC_VER)
-
-/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */
-#include <windows.h>
-
-extern int pixman_msvc_try_arm_neon_op ();
-extern int pixman_msvc_try_arm_simd_op ();
-
-static arm_cpu_features_t
-detect_cpu_features (void)
-{
-    arm_cpu_features_t features = 0;
-
-    __try
-    {
-	pixman_msvc_try_arm_simd_op ();
-	features |= ARM_V6;
-    }
-    __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION)
-    {
-    }
-
-    __try
-    {
-	pixman_msvc_try_arm_neon_op ();
-	features |= ARM_NEON;
-    }
-    __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION)
-    {
-    }
-
-    return features;
-}
-
-#elif defined(__APPLE__) && defined(TARGET_OS_IPHONE) /* iOS */
-
-#include "TargetConditionals.h"
-
-static arm_cpu_features_t
-detect_cpu_features (void)
-{
-    arm_cpu_features_t features = 0;
-
-    features |= ARM_V6;
-
-    /* Detection of ARM NEON on iOS is fairly simple because iOS binaries
-     * contain separate executable images for each processor architecture.
-     * So all we have to do is detect the armv7 architecture build. The
-     * operating system automatically runs the armv7 binary for armv7 devices
-     * and the armv6 binary for armv6 devices.
-     */
-#if defined(__ARM_NEON__)
-    features |= ARM_NEON;
-#endif
-
-    return features;
-}
-
-#elif defined(__ANDROID__) || defined(ANDROID) /* Android */
-
-#include <cpu-features.h>
-
-static arm_cpu_features_t
-detect_cpu_features (void)
-{
-    arm_cpu_features_t features = 0;
-    AndroidCpuFamily cpu_family;
-    uint64_t cpu_features;
-
-    cpu_family = android_getCpuFamily();
-    cpu_features = android_getCpuFeatures();
-
-    if (cpu_family == ANDROID_CPU_FAMILY_ARM)
-    {
-	if (cpu_features & ANDROID_CPU_ARM_FEATURE_ARMv7)
-	    features |= ARM_V7;
-
-	if (cpu_features & ANDROID_CPU_ARM_FEATURE_VFPv3)
-	    features |= ARM_VFP;
-
-	if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON)
-	    features |= ARM_NEON;
-    }
-
-    return features;
-}
-
-#elif defined (__linux__) /* linux ELF */
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <string.h>
-#include <elf.h>
-
-static arm_cpu_features_t
-detect_cpu_features (void)
-{
-    arm_cpu_features_t features = 0;
-    Elf32_auxv_t aux;
-    int fd;
-
-    fd = open ("/proc/self/auxv", O_RDONLY);
-    if (fd >= 0)
-    {
-	while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t))
-	{
-	    if (aux.a_type == AT_HWCAP)
-	    {
-		uint32_t hwcap = aux.a_un.a_val;
-
-		/* hardcode these values to avoid depending on specific
-		 * versions of the hwcap header, e.g. HWCAP_NEON
-		 */
-		if ((hwcap & 64) != 0)
-		    features |= ARM_VFP;
-		if ((hwcap & 512) != 0)
-		    features |= ARM_IWMMXT;
-		/* this flag is only present on kernel 2.6.29 */
-		if ((hwcap & 4096) != 0)
-		    features |= ARM_NEON;
-	    }
-	    else if (aux.a_type == AT_PLATFORM)
-	    {
-		const char *plat = (const char*) aux.a_un.a_val;
-
-		if (strncmp (plat, "v7l", 3) == 0)
-		    features |= (ARM_V7 | ARM_V6);
-		else if (strncmp (plat, "v6l", 3) == 0)
-		    features |= ARM_V6;
-	    }
-	}
-	close (fd);
-    }
-
-    return features;
-}
-
-#elif defined (_3DS) /* 3DS homebrew (devkitARM) */
-
-static arm_cpu_features_t
-detect_cpu_features (void)
-{
-    arm_cpu_features_t features = 0;
-
-    features |= ARM_V6;
-
-    return features;
-}
-
-#elif defined (PSP2) || defined (__SWITCH__)
-/* Vita (VitaSDK) or Switch (devkitA64) homebrew */
-
-static arm_cpu_features_t
-detect_cpu_features (void)
-{
-    arm_cpu_features_t features = 0;
-
-    features |= ARM_NEON;
-
-    return features;
-}
-
-#else /* Unknown */
-
-static arm_cpu_features_t
-detect_cpu_features (void)
-{
-    return 0;
-}
-
-#endif /* Linux elf */
-
-static pixman_bool_t
-have_feature (arm_cpu_features_t feature)
-{
-    static pixman_bool_t initialized;
-    static arm_cpu_features_t features;
-
-    if (!initialized)
-    {
-	features = detect_cpu_features();
-	initialized = TRUE;
-    }
-
-    return (features & feature) == feature;
-}
-
-#endif /* USE_ARM_SIMD || USE_ARM_NEON || USE_ARM_IWMMXT */
-
-pixman_implementation_t *
-_pixman_arm_get_implementations (pixman_implementation_t *imp)
-{
-#ifdef USE_ARM_SIMD
-    if (!_pixman_disabled ("arm-simd") && have_feature (ARM_V6))
-	imp = _pixman_implementation_create_arm_simd (imp);
-#endif
-
-#ifdef USE_ARM_IWMMXT
-    if (!_pixman_disabled ("arm-iwmmxt") && have_feature (ARM_IWMMXT))
-	imp = _pixman_implementation_create_mmx (imp);
-#endif
-
-#ifdef USE_ARM_NEON
-    if (!_pixman_disabled ("arm-neon") && have_feature (ARM_NEON))
-	imp = _pixman_implementation_create_arm_neon (imp);
-#endif
-
-#ifdef USE_ARM_A64_NEON
-    /* neon is a part of aarch64 */
-    if (!_pixman_disabled ("arm-neon"))
-        imp = _pixman_implementation_create_arm_neon (imp);
-#endif
-
-    return imp;
-}
diff --git a/vendor/pixman/pixman/pixman-arma64-neon-asm-bilinear.S b/vendor/pixman/pixman/pixman-arma64-neon-asm-bilinear.S
deleted file mode 100644
index 31d103d1d..000000000
--- a/vendor/pixman/pixman/pixman-arma64-neon-asm-bilinear.S
+++ /dev/null
@@ -1,1275 +0,0 @@
-/*
- * Copyright © 2011 SCore Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author:  Siarhei Siamashka (siarhei.siamashka@nokia.com)
- * Author:  Taekyun Kim (tkq.kim@samsung.com)
- */
-
-/*
- * This file contains scaled bilinear scanline functions implemented
- * using older siarhei's bilinear macro template.
- *
- * << General scanline function procedures >>
- *  1. bilinear interpolate source pixels
- *  2. load mask pixels
- *  3. load destination pixels
- *  4. duplicate mask to fill whole register
- *  5. interleave source & destination pixels
- *  6. apply mask to source pixels
- *  7. combine source & destination pixels
- *  8, Deinterleave final result
- *  9. store destination pixels
- *
- * All registers with single number (i.e. src0, tmp0) are 64-bits registers.
- * Registers with double numbers(src01, dst01) are 128-bits registers.
- * All temp registers can be used freely outside the code block.
- * Assume that symbol(register .req) OUT and MASK are defined at caller of these macro blocks.
- *
- * Remarks
- *  There can be lots of pipeline stalls inside code block and between code blocks.
- *  Further optimizations will be done by new macro templates using head/tail_head/tail scheme.
- */
-
-/* Prevent the stack from becoming executable for no reason... */
-#if defined(__linux__) && defined (__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
-
-.text
-.arch armv8-a
-.altmacro
-.p2align 2
-
-#include "pixman-private.h"
-#include "pixman-arm-asm.h"
-#include "pixman-arma64-neon-asm.h"
-
-/*
- * Bilinear macros from pixman-arm-neon-asm.S
- */
-
-/*
- * Bilinear scaling support code which tries to provide pixel fetching, color
- * format conversion, and interpolation as separate macros which can be used
- * as the basic building blocks for constructing bilinear scanline functions.
- */
-
-.macro bilinear_load_8888 reg1, reg2, tmp
-    asr       WTMP1, X, #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, lsl #2
-    ld1       {&reg1&.2s}, [TMP1], STRIDE
-    ld1       {&reg2&.2s}, [TMP1]
-.endm
-
-.macro bilinear_load_0565 reg1, reg2, tmp
-    asr       WTMP1, X, #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, lsl #1
-    ld1       {&reg2&.s}[0], [TMP1], STRIDE
-    ld1       {&reg2&.s}[1], [TMP1]
-    convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
-.endm
-
-.macro bilinear_load_and_vertical_interpolate_two_8888 \
-                    acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2
-
-    bilinear_load_8888 reg1, reg2, tmp1
-    umull     &acc1&.8h, &reg1&.8b, v28.8b
-    umlal     &acc1&.8h, &reg2&.8b, v29.8b
-    bilinear_load_8888 reg3, reg4, tmp2
-    umull     &acc2&.8h, &reg3&.8b, v28.8b
-    umlal     &acc2&.8h, &reg4&.8b, v29.8b
-.endm
-
-.macro bilinear_load_and_vertical_interpolate_four_8888 \
-                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
-                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
-
-    bilinear_load_and_vertical_interpolate_two_8888 \
-                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi
-    bilinear_load_and_vertical_interpolate_two_8888 \
-                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
-.endm
-
-.macro vzip reg1, reg2
-    zip1      v24.8b, reg1, reg2
-    zip2      reg2,   reg1, reg2
-    mov       reg1,   v24.8b
-.endm
-
-.macro vuzp reg1, reg2
-    uzp1     v24.8b, reg1, reg2
-    uzp2     reg2,   reg1, reg2
-    mov      reg1,   v24.8b
-.endm
-
-.macro bilinear_load_and_vertical_interpolate_two_0565 \
-                acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi
-    asr       WTMP1, X, #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, lsl #1
-    asr       WTMP2, X, #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, lsl #1
-    ld1       {&acc2&.s}[0], [TMP1], STRIDE
-    ld1       {&acc2&.s}[2], [TMP2], STRIDE
-    ld1       {&acc2&.s}[1], [TMP1]
-    ld1       {&acc2&.s}[3], [TMP2]
-    convert_0565_to_x888 acc2, reg3, reg2, reg1
-    vzip      &reg1&.8b, &reg3&.8b
-    vzip      &reg2&.8b, &reg4&.8b
-    vzip      &reg3&.8b, &reg4&.8b
-    vzip      &reg1&.8b, &reg2&.8b
-    umull     &acc1&.8h, &reg1&.8b, v28.8b
-    umlal     &acc1&.8h, &reg2&.8b, v29.8b
-    umull     &acc2&.8h, &reg3&.8b, v28.8b
-    umlal     &acc2&.8h, &reg4&.8b, v29.8b
-.endm
-
-.macro bilinear_load_and_vertical_interpolate_four_0565 \
-                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
-                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
-
-    asr       WTMP1, X, #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, lsl #1
-    asr       WTMP2, X, #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, lsl #1
-    ld1       {&xacc2&.s}[0], [TMP1], STRIDE
-    ld1       {&xacc2&.s}[2], [TMP2], STRIDE
-    ld1       {&xacc2&.s}[1], [TMP1]
-    ld1       {&xacc2&.s}[3], [TMP2]
-    convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
-    asr       WTMP1, X, #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, lsl #1
-    asr       WTMP2, X, #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, lsl #1
-    ld1       {&yacc2&.s}[0], [TMP1], STRIDE
-    vzip      &xreg1&.8b, &xreg3&.8b
-    ld1       {&yacc2&.s}[2], [TMP2], STRIDE
-    vzip      &xreg2&.8b, &xreg4&.8b
-    ld1       {&yacc2&.s}[1], [TMP1]
-    vzip      &xreg3&.8b, &xreg4&.8b
-    ld1       {&yacc2&.s}[3], [TMP2]
-    vzip      &xreg1&.8b, &xreg2&.8b
-    convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1
-    umull     &xacc1&.8h, &xreg1&.8b, v28.8b
-    vzip      &yreg1&.8b, &yreg3&.8b
-    umlal     &xacc1&.8h, &xreg2&.8b, v29.8b
-    vzip      &yreg2&.8b, &yreg4&.8b
-    umull     &xacc2&.8h, &xreg3&.8b, v28.8b
-    vzip      &yreg3&.8b, &yreg4&.8b
-    umlal     &xacc2&.8h, &xreg4&.8b, v29.8b
-    vzip      &yreg1&.8b, &yreg2&.8b
-    umull     &yacc1&.8h, &yreg1&.8b, v28.8b
-    umlal     &yacc1&.8h, &yreg2&.8b, v29.8b
-    umull     &yacc2&.8h, &yreg3&.8b, v28.8b
-    umlal     &yacc2&.8h, &yreg4&.8b, v29.8b
-.endm
-
-.macro bilinear_store_8888 numpix, tmp1, tmp2
-.if numpix == 4
-    st1       {v0.2s, v1.2s}, [OUT], #16
-.elseif numpix == 2
-    st1       {v0.2s}, [OUT], #8
-.elseif numpix == 1
-    st1       {v0.s}[0], [OUT], #4
-.else
-    .error bilinear_store_8888 numpix is unsupported
-.endif
-.endm
-
-.macro bilinear_store_0565 numpix, tmp1, tmp2
-    vuzp    v0.8b, v1.8b
-    vuzp    v2.8b, v3.8b
-    vuzp    v1.8b, v3.8b
-    vuzp    v0.8b, v2.8b
-    convert_8888_to_0565 v2, v1, v0, v1, tmp1, tmp2
-.if numpix == 4
-    st1       {v1.4h}, [OUT], #8
-.elseif numpix == 2
-    st1       {v1.s}[0], [OUT], #4
-.elseif numpix == 1
-    st1       {v1.h}[0], [OUT], #2
-.else
-    .error bilinear_store_0565 numpix is unsupported
-.endif
-.endm
-
-
-/*
- * Macros for loading mask pixels into register 'mask'.
- * dup must be done in somewhere else.
- */
-.macro bilinear_load_mask_x numpix, mask
-.endm
-
-.macro bilinear_load_mask_8 numpix, mask
-.if numpix == 4
-    ld1         {&mask&.s}[0], [MASK], #4
-.elseif numpix == 2
-    ld1         {&mask&.h}[0], [MASK], #2
-.elseif numpix == 1
-    ld1         {&mask&.b}[0], [MASK], #1
-.else
-    .error bilinear_load_mask_8 numpix is unsupported
-.endif
-    prfm        PREFETCH_MODE, [MASK, #prefetch_offset]
-.endm
-
-.macro bilinear_load_mask mask_fmt, numpix, mask
-    bilinear_load_mask_&mask_fmt numpix, mask
-.endm
-
-
-/*
- * Macros for loading destination pixels into register 'dst0' and 'dst1'.
- * Interleave should be done somewhere else.
- */
-.macro bilinear_load_dst_0565_src numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_load_dst_8888_src numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_load_dst_8888 numpix, dst0, dst1, dst01
-.if numpix == 4
-    ld1         {&dst0&.2s, &dst1&.2s}, [OUT]
-.elseif numpix == 2
-    ld1         {&dst0&.2s}, [OUT]
-.elseif numpix == 1
-    ld1         {&dst0&.s}[0], [OUT]
-.else
-    .error bilinear_load_dst_8888 numpix is unsupported
-.endif
-    mov         &dst01&.d[0], &dst0&.d[0]
-    mov         &dst01&.d[1], &dst1&.d[0]
-    prfm        PREFETCH_MODE, [OUT, #(prefetch_offset * 4)]
-.endm
-
-.macro bilinear_load_dst_8888_over numpix, dst0, dst1, dst01
-    bilinear_load_dst_8888 numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_load_dst_8888_add numpix, dst0, dst1, dst01
-    bilinear_load_dst_8888 numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_load_dst dst_fmt, op, numpix, dst0, dst1, dst01
-    bilinear_load_dst_&dst_fmt&_&op numpix, dst0, dst1, dst01
-.endm
-
-/*
- * Macros for duplicating partially loaded mask to fill entire register.
- * We will apply mask to interleaved source pixels, that is
- *  (r0, r1, r2, r3, g0, g1, g2, g3) x (m0, m1, m2, m3, m0, m1, m2, m3)
- *  (b0, b1, b2, b3, a0, a1, a2, a3) x (m0, m1, m2, m3, m0, m1, m2, m3)
- * So, we need to duplicate loaded mask into whole register.
- *
- * For two pixel case
- *  (r0, r1, x, x, g0, g1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1)
- *  (b0, b1, x, x, a0, a1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1)
- * We can do some optimizations for this including last pixel cases.
- */
-.macro bilinear_duplicate_mask_x numpix, mask
-.endm
-
-.macro bilinear_duplicate_mask_8 numpix, mask
-.if numpix == 4
-    dup         &mask&.2s, &mask&.s[0]
-.elseif numpix == 2
-    dup         &mask&.4h, &mask&.h[0]
-.elseif numpix == 1
-    dup         &mask&.8b, &mask&.b[0]
-.else
-    .error bilinear_duplicate_mask_8 is unsupported
-.endif
-.endm
-
-.macro bilinear_duplicate_mask mask_fmt, numpix, mask
-    bilinear_duplicate_mask_&mask_fmt numpix, mask
-.endm
-
-/*
- * Macros for interleaving src and dst pixels to rrrr gggg bbbb aaaa form.
- * Interleave should be done when maks is enabled or operator is 'over'.
- */
-.macro bilinear_interleave src0, src1, src01, dst0, dst1, dst01
-    vuzp       &src0&.8b, &src1&.8b
-    vuzp       &dst0&.8b, &dst1&.8b
-    vuzp       &src0&.8b, &src1&.8b
-    vuzp       &dst0&.8b, &dst1&.8b
-    mov        &src01&.d[1], &src1&.d[0]
-    mov        &src01&.d[0], &src0&.d[0]
-    mov        &dst01&.d[1], &dst1&.d[0]
-    mov        &dst01&.d[0], &dst0&.d[0]
-.endm
-
-.macro bilinear_interleave_src_dst_x_src \
-                numpix, src0, src1, src01, dst0, dst1, dst01
-.endm
-
-.macro bilinear_interleave_src_dst_x_over \
-                numpix, src0, src1, src01, dst0, dst1, dst01
-
-    bilinear_interleave src0, src1, src01, dst0, dst1, dst01
-.endm
-
-.macro bilinear_interleave_src_dst_x_add \
-                numpix, src0, src1, src01, dst0, dst1, dst01
-    bilinear_interleave src0, src1, src01, dst0, dst1, dst01
-.endm
-
-.macro bilinear_interleave_src_dst_8_src \
-                numpix, src0, src1, src01, dst0, dst1, dst01
-
-    bilinear_interleave src0, src1, src01, dst0, dst1, dst01
-.endm
-
-.macro bilinear_interleave_src_dst_8_over \
-                numpix, src0, src1, src01, dst0, dst1, dst01
-
-    bilinear_interleave src0, src1, src01, dst0, dst1, dst01
-.endm
-
-.macro bilinear_interleave_src_dst_8_add \
-                numpix, src0, src1, src01, dst0, dst1, dst01
-
-    bilinear_interleave src0, src1, src01, dst0, dst1, dst01
-.endm
-
-.macro bilinear_interleave_src_dst \
-                mask_fmt, op, numpix, src0, src1, src01, dst0, dst1, dst01
-
-    bilinear_interleave_src_dst_&mask_fmt&_&op \
-                numpix, src0, src1, src01, dst0, dst1, dst01
-.endm
-
-
-/*
- * Macros for applying masks to src pixels. (see combine_mask_u() function)
- * src, dst should be in interleaved form.
- * mask register should be in form (m0, m1, m2, m3).
- */
-.macro bilinear_apply_mask_to_src_x \
-                numpix, src0, src1, src01, mask, \
-                tmp01, tmp23, tmp45, tmp67
-.endm
-
-.macro bilinear_apply_mask_to_src_8 \
-                numpix, src0, src1, src01, mask, \
-                tmp01, tmp23, tmp45, tmp67
-
-    umull           &tmp01&.8h, &src0&.8b, &mask&.8b
-    umull           &tmp23&.8h, &src1&.8b, &mask&.8b
-    /* bubbles */
-    urshr           &tmp45&.8h, &tmp01&.8h, #8
-    urshr           &tmp67&.8h, &tmp23&.8h, #8
-    /* bubbles */
-    raddhn          &src0&.8b, &tmp45&.8h, &tmp01&.8h
-    raddhn          &src1&.8b, &tmp67&.8h, &tmp23&.8h
-    mov             &src01&.d[0], &src0&.d[0]
-    mov             &src01&.d[1], &src1&.d[0]
-.endm
-
-.macro bilinear_apply_mask_to_src \
-                mask_fmt, numpix, src0, src1, src01, mask, \
-                tmp01, tmp23, tmp45, tmp67
-
-    bilinear_apply_mask_to_src_&mask_fmt \
-                numpix, src0, src1, src01, mask, \
-                tmp01, tmp23, tmp45, tmp67
-.endm
-
-
-/*
- * Macros for combining src and destination pixels.
- * Interleave or not is depending on operator 'op'.
- */
-.macro bilinear_combine_src \
-                numpix, src0, src1, src01, dst0, dst1, dst01, \
-                tmp01, tmp23, tmp45, tmp67, tmp8
-.endm
-
-.macro bilinear_combine_over \
-                numpix, src0, src1, src01, dst0, dst1, dst01, \
-                tmp01, tmp23, tmp45, tmp67, tmp8
-
-    dup         &tmp8&.2s, &src1&.s[1]
-    /* bubbles */
-    mvn         &tmp8&.8b, &tmp8&.8b
-    /* bubbles */
-    umull       &tmp01&.8h, &dst0&.8b, &tmp8&.8b
-    /* bubbles */
-    umull       &tmp23&.8h, &dst1&.8b, &tmp8&.8b
-    /* bubbles */
-    urshr       &tmp45&.8h, &tmp01&.8h, #8
-    urshr       &tmp67&.8h, &tmp23&.8h, #8
-    /* bubbles */
-    raddhn      &dst0&.8b, &tmp45&.8h, &tmp01&.8h
-    raddhn      &dst1&.8b, &tmp67&.8h, &tmp23&.8h
-    mov         &dst01&.d[0], &dst0&.d[0]
-    mov         &dst01&.d[1], &dst1&.d[0]
-    /* bubbles */
-    uqadd       &src0&.8b, &dst0&.8b, &src0&.8b
-    uqadd       &src1&.8b, &dst1&.8b, &src1&.8b
-    mov         &src01&.d[0], &src0&.d[0]
-    mov         &src01&.d[1], &src1&.d[0]
-.endm
-
-.macro bilinear_combine_add \
-                numpix, src0, src1, src01, dst0, dst1, dst01, \
-                tmp01, tmp23, tmp45, tmp67, tmp8
-
-    uqadd       &src0&.8b, &dst0&.8b, &src0&.8b
-    uqadd       &src1&.8b, &dst1&.8b, &src1&.8b
-    mov         &src01&.d[0], &src0&.d[0]
-    mov         &src01&.d[1], &src1&.d[0]
-.endm
-
-.macro bilinear_combine \
-                op, numpix, src0, src1, src01, dst0, dst1, dst01, \
-                tmp01, tmp23, tmp45, tmp67, tmp8
-
-    bilinear_combine_&op \
-                numpix, src0, src1, src01, dst0, dst1, dst01, \
-                tmp01, tmp23, tmp45, tmp67, tmp8
-.endm
-
-/*
- * Macros for final deinterleaving of destination pixels if needed.
- */
-.macro bilinear_deinterleave numpix, dst0, dst1, dst01
-    vuzp       &dst0&.8b, &dst1&.8b
-    /* bubbles */
-    vuzp       &dst0&.8b, &dst1&.8b
-    mov        &dst01&.d[0], &dst0&.d[0]
-    mov        &dst01&.d[1], &dst1&.d[0]
-.endm
-
-.macro bilinear_deinterleave_dst_x_src numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_deinterleave_dst_x_over numpix, dst0, dst1, dst01
-    bilinear_deinterleave numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_deinterleave_dst_x_add numpix, dst0, dst1, dst01
-    bilinear_deinterleave numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_deinterleave_dst_8_src numpix, dst0, dst1, dst01
-    bilinear_deinterleave numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_deinterleave_dst_8_over numpix, dst0, dst1, dst01
-    bilinear_deinterleave numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_deinterleave_dst_8_add numpix, dst0, dst1, dst01
-    bilinear_deinterleave numpix, dst0, dst1, dst01
-.endm
-
-.macro bilinear_deinterleave_dst mask_fmt, op, numpix, dst0, dst1, dst01
-    bilinear_deinterleave_dst_&mask_fmt&_&op numpix, dst0, dst1, dst01
-.endm
-
-
-.macro bilinear_interpolate_last_pixel src_fmt, mask_fmt, dst_fmt, op
-    bilinear_load_&src_fmt v0, v1, v2
-    bilinear_load_mask mask_fmt, 1, v4
-    bilinear_load_dst dst_fmt, op, 1, v18, v19, v9
-    umull     v2.8h, v0.8b, v28.8b
-    umlal     v2.8h, v1.8b, v29.8b
-    /* 5 cycles bubble */
-    ushll     v0.4s, v2.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v0.4s, v2.4h, v15.h[0]
-    umlal2    v0.4s, v2.8h, v15.h[0]
-    /* 5 cycles bubble */
-    bilinear_duplicate_mask mask_fmt, 1, v4
-    shrn      v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    /* 3 cycles bubble */
-    xtn       v0.8b, v0.8h
-    /* 1 cycle bubble */
-    bilinear_interleave_src_dst \
-                mask_fmt, op, 1, v0, v1, v0, v18, v19, v9
-    bilinear_apply_mask_to_src \
-                mask_fmt, 1, v0, v1, v0, v4, \
-                v3, v8, v10, v11
-    bilinear_combine \
-                op, 1, v0, v1, v0, v18, v19, v9, \
-                v3, v8, v10, v11, v5
-    bilinear_deinterleave_dst mask_fmt, op, 1, v0, v1, v0
-    bilinear_store_&dst_fmt 1, v17, v18
-.endm
-
-.macro bilinear_interpolate_two_pixels src_fmt, mask_fmt, dst_fmt, op
-    bilinear_load_and_vertical_interpolate_two_&src_fmt \
-                v1, v11, v18, v19, v20, v21, v22, v23
-    bilinear_load_mask mask_fmt, 2, v4
-    bilinear_load_dst dst_fmt, op, 2, v18, v19, v9
-    ushll     v0.4s, v1.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v0.4s, v1.4h, v15.h[0]
-    umlal2    v0.4s, v1.8h, v15.h[0]
-    ushll     v10.4s, v11.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v10.4s, v11.4h, v15.h[4]
-    umlal2    v10.4s, v11.8h, v15.h[4]
-    shrn      v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    shrn2     v0.8h, v10.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    bilinear_duplicate_mask mask_fmt, 2, v4
-    ushr      v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-    add       v12.8h, v12.8h, v13.8h
-    xtn       v0.8b, v0.8h
-    bilinear_interleave_src_dst \
-                mask_fmt, op, 2, v0, v1, v0, v18, v19, v9
-    bilinear_apply_mask_to_src \
-                mask_fmt, 2, v0, v1, v0, v4, \
-                v3, v8, v10, v11
-    bilinear_combine \
-                op, 2, v0, v1, v0, v18, v19, v9, \
-                v3, v8, v10, v11, v5
-    bilinear_deinterleave_dst mask_fmt, op, 2, v0, v1, v0
-    bilinear_store_&dst_fmt 2, v16, v17
-.endm
-
-.macro bilinear_interpolate_four_pixels src_fmt, mask_fmt, dst_fmt, op
-    bilinear_load_and_vertical_interpolate_four_&src_fmt \
-                v1, v11, v4,  v5,  v6,  v7,  v22, v23 \
-                v3, v9,  v16, v17, v20, v21, v18, v19
-    prfm      PREFETCH_MODE, [TMP1, PF_OFFS]
-    sub       TMP1, TMP1, STRIDE
-    prfm      PREFETCH_MODE, [TMP1, PF_OFFS]
-    ushll     v0.4s, v1.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v0.4s, v1.4h, v15.h[0]
-    umlal2    v0.4s, v1.8h, v15.h[0]
-    ushll     v10.4s, v11.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v10.4s, v11.4h, v15.h[4]
-    umlal2    v10.4s, v11.8h, v15.h[4]
-    ushr      v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-    ushll     v2.4s, v3.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v2.4s, v3.4h, v15.h[0]
-    umlal2    v2.4s, v3.8h, v15.h[0]
-    ushll     v8.4s, v9.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v8.4s, v9.4h, v15.h[4]
-    umlal2    v8.4s, v9.8h, v15.h[4]
-    add       v12.8h, v12.8h, v13.8h
-    shrn      v0.4h,  v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    shrn2     v0.8h, v10.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    shrn      v2.4h,  v2.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    shrn2     v2.8h,  v8.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    bilinear_load_mask mask_fmt, 4, v4
-    bilinear_duplicate_mask mask_fmt, 4, v4
-    ushr      v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-    xtn       v0.8b, v0.8h
-    xtn       v1.8b, v2.8h
-    add       v12.8h, v12.8h, v13.8h
-    bilinear_load_dst dst_fmt, op, 4, v2, v3, v21
-    bilinear_interleave_src_dst \
-                mask_fmt, op, 4, v0, v1, v0, v2, v3, v11
-    bilinear_apply_mask_to_src \
-                mask_fmt, 4, v0, v1, v0, v4, \
-                v6, v8, v9, v10
-    bilinear_combine \
-                op, 4, v0, v1, v0, v2, v3, v1, \
-                v6, v8, v9, v10, v23
-    bilinear_deinterleave_dst mask_fmt, op, 4, v0, v1, v0
-    bilinear_store_&dst_fmt 4, v6, v7
-.endm
-
-.set BILINEAR_FLAG_USE_MASK,        1
-.set BILINEAR_FLAG_USE_ALL_NEON_REGS,    2
-
-/*
- * Main template macro for generating NEON optimized bilinear scanline functions.
- *
- * Bilinear scanline generator macro take folling arguments:
- *  fname            - name of the function to generate
- *  src_fmt            - source color format (8888 or 0565)
- *  dst_fmt            - destination color format (8888 or 0565)
- *  src/dst_bpp_shift        - (1 << bpp_shift) is the size of src/dst pixel in bytes
- *  process_last_pixel        - code block that interpolate one pixel and does not
- *                  update horizontal weight
- *  process_two_pixels        - code block that interpolate two pixels and update
- *                  horizontal weight
- *  process_four_pixels        - code block that interpolate four pixels and update
- *                  horizontal weight
- *  process_pixblock_head    - head part of middle loop
- *  process_pixblock_tail    - tail part of middle loop
- *  process_pixblock_tail_head    - tail_head of middle loop
- *  pixblock_size        - number of pixels processed in a single middle loop
- *  prefetch_distance        - prefetch in the source image by that many pixels ahead
- */
-
-.macro generate_bilinear_scanline_func \
-    fname, \
-    src_fmt, dst_fmt, src_bpp_shift, dst_bpp_shift, \
-    bilinear_process_last_pixel, \
-    bilinear_process_two_pixels, \
-    bilinear_process_four_pixels, \
-    bilinear_process_pixblock_head, \
-    bilinear_process_pixblock_tail, \
-    bilinear_process_pixblock_tail_head, \
-    pixblock_size, \
-    prefetch_distance, \
-    flags
-
-pixman_asm_function fname
-.if pixblock_size == 8
-.elseif pixblock_size == 4
-.else
-    .error unsupported pixblock size
-.endif
-
-.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0
-    OUT       .req    x0
-    TOP       .req    x1
-    BOTTOM    .req    x2
-    WT        .req    x3
-    WWT       .req    w3
-    WB        .req    x4
-    WWB       .req    w4
-    X         .req    w5
-    UX        .req    w6
-    WIDTH     .req    x7
-    TMP1      .req    x10
-    WTMP1     .req    w10
-    TMP2      .req    x11
-    WTMP2     .req    w11
-    PF_OFFS   .req    x12
-    TMP3      .req    x13
-    WTMP3     .req    w13
-    TMP4      .req    x14
-    WTMP4     .req    w14
-    STRIDE    .req    x15
-    DUMMY     .req    x30
-
-    stp       x29, x30, [sp, -16]!
-    mov       x29, sp
-    sub       sp, sp, 112
-    sub       x29, x29, 64
-    st1       {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32
-    st1       {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32
-    stp       x10, x11, [x29, -80]
-    stp       x12, x13, [x29, -96]
-    stp       x14, x15, [x29, -112]
-.else
-    OUT       .req      x0
-    MASK      .req      x1
-    TOP       .req      x2
-    BOTTOM    .req      x3
-    WT        .req      x4
-    WWT       .req      w4
-    WB        .req      x5
-    WWB       .req      w5
-    X         .req      w6
-    UX        .req      w7
-    WIDTH     .req      x8
-    TMP1      .req      x10
-    WTMP1     .req      w10
-    TMP2      .req      x11
-    WTMP2     .req      w11
-    PF_OFFS   .req      x12
-    TMP3      .req      x13
-    WTMP3     .req      w13
-    TMP4      .req      x14
-    WTMP4     .req      w14
-    STRIDE    .req      x15
-    DUMMY     .req      x30
-
-    .set prefetch_offset, prefetch_distance
-
-    stp      x29, x30, [sp, -16]!
-    mov      x29, sp
-    sub      x29, x29, 64
-    st1      {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32
-    st1      {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32
-    stp      x10, x11, [x29, -80]
-    stp      x12, x13, [x29, -96]
-    stp      x14, x15, [x29, -112]
-    str      x8, [x29, -120]
-    ldr      w8, [x29, 16]
-    sub      sp, sp, 120
-.endif
-
-    mov      WTMP1, #prefetch_distance
-    umull    PF_OFFS, WTMP1, UX
-
-    sub      STRIDE, BOTTOM, TOP
-    .unreq   BOTTOM
-
-    cmp      WIDTH, #0
-    ble      300f
-
-    dup      v12.8h, X
-    dup      v13.8h, UX
-    dup      v28.8b, WWT
-    dup      v29.8b, WWB
-    mov      v25.d[0], v12.d[1]
-    mov      v26.d[0], v13.d[0]
-    add      v25.4h, v25.4h, v26.4h
-    mov      v12.d[1], v25.d[0]
-
-    /* ensure good destination alignment  */
-    cmp       WIDTH, #1
-    blt       100f
-    tst       OUT, #(1 << dst_bpp_shift)
-    beq       100f
-    ushr      v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-    add       v12.8h, v12.8h, v13.8h
-    bilinear_process_last_pixel
-    sub       WIDTH, WIDTH, #1
-100:
-    add       v13.8h, v13.8h, v13.8h
-    ushr      v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-    add       v12.8h, v12.8h, v13.8h
-
-    cmp       WIDTH, #2
-    blt       100f
-    tst       OUT, #(1 << (dst_bpp_shift + 1))
-    beq       100f
-    bilinear_process_two_pixels
-    sub       WIDTH, WIDTH, #2
-100:
-.if pixblock_size == 8
-    cmp       WIDTH, #4
-    blt       100f
-    tst       OUT, #(1 << (dst_bpp_shift + 2))
-    beq       100f
-    bilinear_process_four_pixels
-    sub       WIDTH, WIDTH, #4
-100:
-.endif
-    subs      WIDTH, WIDTH, #pixblock_size
-    blt       100f
-    asr       PF_OFFS, PF_OFFS, #(16 - src_bpp_shift)
-    bilinear_process_pixblock_head
-    subs      WIDTH, WIDTH, #pixblock_size
-    blt       500f
-0:
-    bilinear_process_pixblock_tail_head
-    subs      WIDTH, WIDTH, #pixblock_size
-    bge       0b
-500:
-    bilinear_process_pixblock_tail
-100:
-.if pixblock_size == 8
-    tst       WIDTH, #4
-    beq       200f
-    bilinear_process_four_pixels
-200:
-.endif
-    /* handle the remaining trailing pixels */
-    tst       WIDTH, #2
-    beq       200f
-    bilinear_process_two_pixels
-200:
-    tst       WIDTH, #1
-    beq       300f
-    bilinear_process_last_pixel
-300:
-
-.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0
-    sub       x29, x29, 64
-    ld1       {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32
-    ld1       {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32
-    ldp       x10, x11, [x29, -80]
-    ldp       x12, x13, [x29, -96]
-    ldp       x14, x15, [x29, -112]
-    mov       sp, x29
-    ldp       x29, x30, [sp], 16
-.else
-    sub       x29, x29, 64
-    ld1       {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32
-    ld1       {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32
-    ldp       x10, x11, [x29, -80]
-    ldp       x12, x13, [x29, -96]
-    ldp       x14, x15, [x29, -112]
-    ldr       x8, [x29, -120]
-    mov       sp, x29
-    ldp       x29, x30, [sp], 16
-.endif
-    ret
-
-    .unreq    OUT
-    .unreq    TOP
-    .unreq    WT
-    .unreq    WWT
-    .unreq    WB
-    .unreq    WWB
-    .unreq    X
-    .unreq    UX
-    .unreq    WIDTH
-    .unreq    TMP1
-    .unreq    WTMP1
-    .unreq    TMP2
-    .unreq    PF_OFFS
-    .unreq    TMP3
-    .unreq    TMP4
-    .unreq    STRIDE
-.if ((flags) & BILINEAR_FLAG_USE_MASK) != 0
-    .unreq    MASK
-.endif
-
-.endfunc
-
-.endm
-
-/* src_8888_8_8888 */
-.macro bilinear_src_8888_8_8888_process_last_pixel
-    bilinear_interpolate_last_pixel 8888, 8, 8888, src
-.endm
-
-.macro bilinear_src_8888_8_8888_process_two_pixels
-    bilinear_interpolate_two_pixels 8888, 8, 8888, src
-.endm
-
-.macro bilinear_src_8888_8_8888_process_four_pixels
-    bilinear_interpolate_four_pixels 8888, 8, 8888, src
-.endm
-
-.macro bilinear_src_8888_8_8888_process_pixblock_head
-    bilinear_src_8888_8_8888_process_four_pixels
-.endm
-
-.macro bilinear_src_8888_8_8888_process_pixblock_tail
-.endm
-
-.macro bilinear_src_8888_8_8888_process_pixblock_tail_head
-    bilinear_src_8888_8_8888_process_pixblock_tail
-    bilinear_src_8888_8_8888_process_pixblock_head
-.endm
-
-/* src_8888_8_0565 */
-.macro bilinear_src_8888_8_0565_process_last_pixel
-    bilinear_interpolate_last_pixel 8888, 8, 0565, src
-.endm
-
-.macro bilinear_src_8888_8_0565_process_two_pixels
-    bilinear_interpolate_two_pixels 8888, 8, 0565, src
-.endm
-
-.macro bilinear_src_8888_8_0565_process_four_pixels
-    bilinear_interpolate_four_pixels 8888, 8, 0565, src
-.endm
-
-.macro bilinear_src_8888_8_0565_process_pixblock_head
-    bilinear_src_8888_8_0565_process_four_pixels
-.endm
-
-.macro bilinear_src_8888_8_0565_process_pixblock_tail
-.endm
-
-.macro bilinear_src_8888_8_0565_process_pixblock_tail_head
-    bilinear_src_8888_8_0565_process_pixblock_tail
-    bilinear_src_8888_8_0565_process_pixblock_head
-.endm
-
-/* src_0565_8_x888 */
-.macro bilinear_src_0565_8_x888_process_last_pixel
-    bilinear_interpolate_last_pixel 0565, 8, 8888, src
-.endm
-
-.macro bilinear_src_0565_8_x888_process_two_pixels
-    bilinear_interpolate_two_pixels 0565, 8, 8888, src
-.endm
-
-.macro bilinear_src_0565_8_x888_process_four_pixels
-    bilinear_interpolate_four_pixels 0565, 8, 8888, src
-.endm
-
-.macro bilinear_src_0565_8_x888_process_pixblock_head
-    bilinear_src_0565_8_x888_process_four_pixels
-.endm
-
-.macro bilinear_src_0565_8_x888_process_pixblock_tail
-.endm
-
-.macro bilinear_src_0565_8_x888_process_pixblock_tail_head
-    bilinear_src_0565_8_x888_process_pixblock_tail
-    bilinear_src_0565_8_x888_process_pixblock_head
-.endm
-
-/* src_0565_8_0565 */
-.macro bilinear_src_0565_8_0565_process_last_pixel
-    bilinear_interpolate_last_pixel 0565, 8, 0565, src
-.endm
-
-.macro bilinear_src_0565_8_0565_process_two_pixels
-    bilinear_interpolate_two_pixels 0565, 8, 0565, src
-.endm
-
-.macro bilinear_src_0565_8_0565_process_four_pixels
-    bilinear_interpolate_four_pixels 0565, 8, 0565, src
-.endm
-
-.macro bilinear_src_0565_8_0565_process_pixblock_head
-    bilinear_src_0565_8_0565_process_four_pixels
-.endm
-
-.macro bilinear_src_0565_8_0565_process_pixblock_tail
-.endm
-
-.macro bilinear_src_0565_8_0565_process_pixblock_tail_head
-    bilinear_src_0565_8_0565_process_pixblock_tail
-    bilinear_src_0565_8_0565_process_pixblock_head
-.endm
-
-/* over_8888_8888 */
-.macro bilinear_over_8888_8888_process_last_pixel
-    bilinear_interpolate_last_pixel 8888, x, 8888, over
-.endm
-
-.macro bilinear_over_8888_8888_process_two_pixels
-    bilinear_interpolate_two_pixels 8888, x, 8888, over
-.endm
-
-.macro bilinear_over_8888_8888_process_four_pixels
-    bilinear_interpolate_four_pixels 8888, x, 8888, over
-.endm
-
-.macro bilinear_over_8888_8888_process_pixblock_head
-    asr         WTMP1, X, #16
-    add         X, X, UX
-    add         TMP1, TOP, TMP1, lsl #2
-    asr         WTMP2, X, #16
-    add         X, X, UX
-    add         TMP2, TOP, TMP2, lsl #2
-
-    ld1         {v22.2s}, [TMP1], STRIDE
-    ld1         {v23.2s}, [TMP1]
-    asr         WTMP3, X, #16
-    add         X, X, UX
-    add         TMP3, TOP, TMP3, lsl #2
-    umull       v8.8h, v22.8b, v28.8b
-    umlal       v8.8h, v23.8b, v29.8b
-
-    ld1         {v22.2s}, [TMP2], STRIDE
-    ld1         {v23.2s}, [TMP2]
-    asr         WTMP4, X, #16
-    add         X, X, UX
-    add         TMP4, TOP, TMP4, lsl #2
-    umull       v9.8h, v22.8b, v28.8b
-    umlal       v9.8h, v23.8b, v29.8b
-
-    ld1         {v22.2s}, [TMP3], STRIDE
-    ld1         {v23.2s}, [TMP3]
-    umull       v10.8h, v22.8b, v28.8b
-    umlal       v10.8h, v23.8b, v29.8b
-
-    ushll       v0.4s, v8.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl       v0.4s, v8.4h, v15.h[0]
-    umlal2      v0.4s, v8.8h, v15.h[0]
-
-    prfm        PREFETCH_MODE, [TMP4, PF_OFFS]
-    ld1         {v16.2s}, [TMP4], STRIDE
-    ld1         {v17.2s}, [TMP4]
-    prfm        PREFETCH_MODE, [TMP4, PF_OFFS]
-    umull       v11.8h, v16.8b, v28.8b
-    umlal       v11.8h, v17.8b, v29.8b
-
-    ushll       v1.4s, v9.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl       v1.4s, v9.4h, v15.h[4]
-    umlal2      v1.4s, v9.8h, v15.h[4]
-    ushr        v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-    add         v12.8h, v12.8h, v13.8h
-.endm
-
-.macro bilinear_over_8888_8888_process_pixblock_tail
-    ushll       v2.4s, v10.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl       v2.4s, v10.4h, v15.h[0]
-    umlal2      v2.4s, v10.8h, v15.h[0]
-    ushll       v3.4s, v11.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl       v3.4s, v11.4h, v15.h[4]
-    umlal2      v3.4s, v11.8h, v15.h[4]
-    shrn        v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    shrn2       v0.8h, v1.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    shrn        v2.4h, v2.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    ushr        v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-    shrn2       v2.8h, v3.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    xtn         v6.8b, v0.8h
-    xtn         v7.8b, v2.8h
-    ld1         {v2.2s, v3.2s}, [OUT]
-    prfm        PREFETCH_MODE, [OUT, #(prefetch_offset * 4)]
-    vuzp        v6.8b, v7.8b
-    vuzp        v2.8b, v3.8b
-    vuzp        v6.8b, v7.8b
-    vuzp        v2.8b, v3.8b
-    dup         v4.2s, v7.s[1]
-    mvn         v4.8b, v4.8b
-    umull       v11.8h, v2.8b, v4.8b
-    umull       v2.8h,  v3.8b, v4.8b
-    urshr       v1.8h, v11.8h, #8
-    urshr       v10.8h, v2.8h, #8
-    raddhn      v3.8b, v10.8h, v2.8h
-    raddhn      v2.8b, v1.8h, v11.8h
-    uqadd       v6.8b, v2.8b,  v6.8b
-    uqadd       v7.8b, v3.8b,  v7.8b
-    vuzp        v6.8b, v7.8b
-    vuzp        v6.8b, v7.8b
-    add         v12.8h, v12.8h, v13.8h
-    st1         {v6.2s, v7.2s}, [OUT], #16
-.endm
-
-.macro bilinear_over_8888_8888_process_pixblock_tail_head
-                                            ushll       v2.4s, v10.4h, #BILINEAR_INTERPOLATION_BITS
-    asr         WTMP1, X, #16
-    add         X, X, UX
-    add         TMP1, TOP, TMP1, lsl #2
-                                            umlsl       v2.4s, v10.4h, v15.h[0]
-    asr         WTMP2, X, #16
-    add         X, X, UX
-    add         TMP2, TOP, TMP2, lsl #2
-                                            umlal2      v2.4s, v10.8h, v15.h[0]
-                                            ushll       v3.4s, v11.4h, #BILINEAR_INTERPOLATION_BITS
-    ld1         {v20.2s}, [TMP1], STRIDE
-                                            umlsl       v3.4s, v11.4h, v15.h[4]
-                                            umlal2      v3.4s, v11.8h, v15.h[4]
-    ld1         {v21.2s}, [TMP1]
-    umull       v8.8h, v20.8b, v28.8b
-    umlal       v8.8h, v21.8b, v29.8b
-                                            shrn        v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-                                            shrn2       v0.8h, v1.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-                                            shrn        v2.4h, v2.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-                                            ushr        v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-    ld1         {v22.2s}, [TMP2], STRIDE
-                                            shrn2       v2.8h, v3.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-                                            xtn         v6.8b, v0.8h
-    ld1         {v23.2s}, [TMP2]
-    umull       v9.8h, v22.8b, v28.8b
-    asr         WTMP3, X, #16
-    add         X, X, UX
-    add         TMP3, TOP, TMP3, lsl #2
-    asr         WTMP4, X, #16
-    add         X, X, UX
-    add         TMP4, TOP, TMP4, lsl #2
-    umlal       v9.8h, v23.8b, v29.8b
-                                            xtn         v7.8b, v2.8h
-                                            ld1         {v2.2s, v3.2s}, [OUT]
-                                            prfm        PREFETCH_MODE, [OUT, PF_OFFS]
-    ld1         {v22.2s}, [TMP3], STRIDE
-                                            vuzp        v6.8b, v7.8b
-                                            vuzp        v2.8b, v3.8b
-                                            vuzp        v6.8b, v7.8b
-                                            vuzp        v2.8b, v3.8b
-                                            dup         v4.2s, v7.s[1]
-    ld1         {v23.2s}, [TMP3]
-                                            mvn         v4.8b, v4.8b
-    umull       v10.8h, v22.8b, v28.8b
-    umlal       v10.8h, v23.8b, v29.8b
-                                            umull       v11.8h, v2.8b, v4.8b
-                                            umull        v2.8h, v3.8b, v4.8b
-    ushll       v0.4s, v8.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl       v0.4s, v8.4h, v15.h[0]
-                                            urshr       v1.8h, v11.8h, #8
-    umlal2      v0.4s, v8.8h, v15.h[0]
-                                            urshr       v8.8h, v2.8h, #8
-                                            raddhn      v3.8b, v8.8h, v2.8h
-                                            raddhn      v2.8b, v1.8h, v11.8h
-    prfm        PREFETCH_MODE, [TMP4, PF_OFFS]
-    ld1         {v16.2s}, [TMP4], STRIDE
-                                            uqadd       v6.8b, v2.8b, v6.8b
-                                            uqadd       v7.8b, v3.8b, v7.8b
-    ld1         {v17.2s}, [TMP4]
-    prfm        PREFETCH_MODE, [TMP4, PF_OFFS]
-    umull       v11.8h, v16.8b, v28.8b
-    umlal       v11.8h, v17.8b, v29.8b
-                                            vuzp        v6.8b, v7.8b
-    ushll       v1.4s, v9.4h, #BILINEAR_INTERPOLATION_BITS
-                                            vuzp        v6.8b, v7.8b
-    umlsl       v1.4s, v9.4h, v15.h[4]
-                                            add         v12.8h, v12.8h, v13.8h
-    umlal2      v1.4s, v9.8h, v15.h[4]
-    ushr        v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-    add         v12.8h, v12.8h, v13.8h
-                                            st1         {v6.2s, v7.2s}, [OUT], #16
-.endm
-
-/* over_8888_8_8888 */
-.macro bilinear_over_8888_8_8888_process_last_pixel
-    bilinear_interpolate_last_pixel 8888, 8, 8888, over
-.endm
-
-.macro bilinear_over_8888_8_8888_process_two_pixels
-    bilinear_interpolate_two_pixels 8888, 8, 8888, over
-.endm
-
-.macro bilinear_over_8888_8_8888_process_four_pixels
-    bilinear_interpolate_two_pixels 8888, 8, 8888, over
-    bilinear_interpolate_two_pixels 8888, 8, 8888, over
-.endm
-
-.macro bilinear_over_8888_8_8888_process_pixblock_head
-    bilinear_over_8888_8_8888_process_four_pixels
-.endm
-
-.macro bilinear_over_8888_8_8888_process_pixblock_tail
-.endm
-
-.macro bilinear_over_8888_8_8888_process_pixblock_tail_head
-     bilinear_over_8888_8_8888_process_pixblock_tail
-     bilinear_over_8888_8_8888_process_pixblock_head
-.endm
-
-/* add_8888_8888 */
-.macro bilinear_add_8888_8888_process_last_pixel
-    bilinear_interpolate_last_pixel 8888, x, 8888, add
-.endm
-
-.macro bilinear_add_8888_8888_process_two_pixels
-    bilinear_interpolate_two_pixels 8888, x, 8888, add
-.endm
-
-.macro bilinear_add_8888_8888_process_four_pixels
-    bilinear_interpolate_two_pixels 8888, x, 8888, add
-    bilinear_interpolate_two_pixels 8888, x, 8888, add
-.endm
-
-.macro bilinear_add_8888_8888_process_pixblock_head
-    bilinear_add_8888_8888_process_four_pixels
-.endm
-
-.macro bilinear_add_8888_8888_process_pixblock_tail
-.endm
-
-.macro bilinear_add_8888_8888_process_pixblock_tail_head
-    bilinear_add_8888_8888_process_pixblock_tail
-    bilinear_add_8888_8888_process_pixblock_head
-.endm
-
-/* add_8888_8_8888 */
-.macro bilinear_add_8888_8_8888_process_last_pixel
-    bilinear_interpolate_last_pixel 8888, 8, 8888, add
-.endm
-
-.macro bilinear_add_8888_8_8888_process_two_pixels
-    bilinear_interpolate_two_pixels 8888, 8, 8888, add
-.endm
-
-.macro bilinear_add_8888_8_8888_process_four_pixels
-    bilinear_interpolate_four_pixels 8888, 8, 8888, add
-.endm
-
-.macro bilinear_add_8888_8_8888_process_pixblock_head
-    bilinear_add_8888_8_8888_process_four_pixels
-.endm
-
-.macro bilinear_add_8888_8_8888_process_pixblock_tail
-.endm
-
-.macro bilinear_add_8888_8_8888_process_pixblock_tail_head
-    bilinear_add_8888_8_8888_process_pixblock_tail
-    bilinear_add_8888_8_8888_process_pixblock_head
-.endm
-
-
-/* Bilinear scanline functions */
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_neon, \
-    8888, 8888, 2, 2, \
-    bilinear_src_8888_8_8888_process_last_pixel, \
-    bilinear_src_8888_8_8888_process_two_pixels, \
-    bilinear_src_8888_8_8888_process_four_pixels, \
-    bilinear_src_8888_8_8888_process_pixblock_head, \
-    bilinear_src_8888_8_8888_process_pixblock_tail, \
-    bilinear_src_8888_8_8888_process_pixblock_tail_head, \
-    4, 28, BILINEAR_FLAG_USE_MASK
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_neon, \
-    8888, 0565, 2, 1, \
-    bilinear_src_8888_8_0565_process_last_pixel, \
-    bilinear_src_8888_8_0565_process_two_pixels, \
-    bilinear_src_8888_8_0565_process_four_pixels, \
-    bilinear_src_8888_8_0565_process_pixblock_head, \
-    bilinear_src_8888_8_0565_process_pixblock_tail, \
-    bilinear_src_8888_8_0565_process_pixblock_tail_head, \
-    4, 28, BILINEAR_FLAG_USE_MASK
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_neon, \
-    0565, 8888, 1, 2, \
-    bilinear_src_0565_8_x888_process_last_pixel, \
-    bilinear_src_0565_8_x888_process_two_pixels, \
-    bilinear_src_0565_8_x888_process_four_pixels, \
-    bilinear_src_0565_8_x888_process_pixblock_head, \
-    bilinear_src_0565_8_x888_process_pixblock_tail, \
-    bilinear_src_0565_8_x888_process_pixblock_tail_head, \
-    4, 28, BILINEAR_FLAG_USE_MASK
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_neon, \
-    0565, 0565, 1, 1, \
-    bilinear_src_0565_8_0565_process_last_pixel, \
-    bilinear_src_0565_8_0565_process_two_pixels, \
-    bilinear_src_0565_8_0565_process_four_pixels, \
-    bilinear_src_0565_8_0565_process_pixblock_head, \
-    bilinear_src_0565_8_0565_process_pixblock_tail, \
-    bilinear_src_0565_8_0565_process_pixblock_tail_head, \
-    4, 28, BILINEAR_FLAG_USE_MASK
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_neon, \
-    8888, 8888, 2, 2, \
-    bilinear_over_8888_8888_process_last_pixel, \
-    bilinear_over_8888_8888_process_two_pixels, \
-    bilinear_over_8888_8888_process_four_pixels, \
-    bilinear_over_8888_8888_process_pixblock_head, \
-    bilinear_over_8888_8888_process_pixblock_tail, \
-    bilinear_over_8888_8888_process_pixblock_tail_head, \
-    4, 28, 0
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_neon, \
-    8888, 8888, 2, 2, \
-    bilinear_over_8888_8_8888_process_last_pixel, \
-    bilinear_over_8888_8_8888_process_two_pixels, \
-    bilinear_over_8888_8_8888_process_four_pixels, \
-    bilinear_over_8888_8_8888_process_pixblock_head, \
-    bilinear_over_8888_8_8888_process_pixblock_tail, \
-    bilinear_over_8888_8_8888_process_pixblock_tail_head, \
-    4, 28, BILINEAR_FLAG_USE_MASK
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_neon, \
-    8888, 8888, 2, 2, \
-    bilinear_add_8888_8888_process_last_pixel, \
-    bilinear_add_8888_8888_process_two_pixels, \
-    bilinear_add_8888_8888_process_four_pixels, \
-    bilinear_add_8888_8888_process_pixblock_head, \
-    bilinear_add_8888_8888_process_pixblock_tail, \
-    bilinear_add_8888_8888_process_pixblock_tail_head, \
-    4, 28, 0
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_neon, \
-    8888, 8888, 2, 2, \
-    bilinear_add_8888_8_8888_process_last_pixel, \
-    bilinear_add_8888_8_8888_process_two_pixels, \
-    bilinear_add_8888_8_8888_process_four_pixels, \
-    bilinear_add_8888_8_8888_process_pixblock_head, \
-    bilinear_add_8888_8_8888_process_pixblock_tail, \
-    bilinear_add_8888_8_8888_process_pixblock_tail_head, \
-    4, 28, BILINEAR_FLAG_USE_MASK
diff --git a/vendor/pixman/pixman/pixman-arma64-neon-asm.S b/vendor/pixman/pixman/pixman-arma64-neon-asm.S
deleted file mode 100644
index 774d98d38..000000000
--- a/vendor/pixman/pixman/pixman-arma64-neon-asm.S
+++ /dev/null
@@ -1,3704 +0,0 @@
-/*
- * Copyright © 2009 Nokia Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author:  Siarhei Siamashka (siarhei.siamashka@nokia.com)
- */
-
-/*
- * This file contains implementations of NEON optimized pixel processing
- * functions. There is no full and detailed tutorial, but some functions
- * (those which are exposing some new or interesting features) are
- * extensively commented and can be used as examples.
- *
- * You may want to have a look at the comments for following functions:
- *  - pixman_composite_over_8888_0565_asm_neon
- *  - pixman_composite_over_n_8_0565_asm_neon
- */
-
-/* Prevent the stack from becoming executable for no reason... */
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
-
-.text
-.arch armv8-a
-
-.altmacro
-.p2align 2
-
-#include "pixman-private.h"
-#include "pixman-arm-asm.h"
-#include "pixman-arma64-neon-asm.h"
-
-/* Global configuration options and preferences */
-
-/*
- * The code can optionally make use of unaligned memory accesses to improve
- * performance of handling leading/trailing pixels for each scanline.
- * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for
- * example in linux if unaligned memory accesses are not configured to
- * generate.exceptions.
- */
-.set RESPECT_STRICT_ALIGNMENT, 1
-
-/*
- * Set default prefetch type. There is a choice between the following options:
- *
- * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work
- * as NOP to workaround some HW bugs or for whatever other reason)
- *
- * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where
- * advanced prefetch intruduces heavy overhead)
- *
- * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8
- * which can run ARM and NEON instructions simultaneously so that extra ARM
- * instructions do not add (many) extra cycles, but improve prefetch efficiency)
- *
- * Note: some types of function can't support advanced prefetch and fallback
- *       to simple one (those which handle 24bpp pixels)
- */
-.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED
-
-/* Prefetch distance in pixels for simple prefetch */
-.set PREFETCH_DISTANCE_SIMPLE, 64
-
-/*
- * Implementation of pixman_composite_over_8888_0565_asm_neon
- *
- * This function takes a8r8g8b8 source buffer, r5g6b5 destination buffer and
- * performs OVER compositing operation. Function fast_composite_over_8888_0565
- * from pixman-fast-path.c does the same in C and can be used as a reference.
- *
- * First we need to have some NEON assembly code which can do the actual
- * operation on the pixels and provide it to the template macro.
- *
- * Template macro quite conveniently takes care of emitting all the necessary
- * code for memory reading and writing (including quite tricky cases of
- * handling unaligned leading/trailing pixels), so we only need to deal with
- * the data in NEON registers.
- *
- * NEON registers allocation in general is recommented to be the following:
- * v0,  v1,  v2,  v3  - contain loaded source pixel data
- * v4,  v5,  v6,  v7  - contain loaded destination pixels (if they are needed)
- * v24, v25, v26, v27 - contain loading mask pixel data (if mask is used)
- * v28, v29, v30, v31 - place for storing the result (destination pixels)
- *
- * As can be seen above, four 64-bit NEON registers are used for keeping
- * intermediate pixel data and up to 8 pixels can be processed in one step
- * for 32bpp formats (16 pixels for 16bpp, 32 pixels for 8bpp).
- *
- * This particular function uses the following registers allocation:
- * v0,  v1,  v2,  v3  - contain loaded source pixel data
- * v4,  v5            - contain loaded destination pixels (they are needed)
- * v28, v29           - place for storing the result (destination pixels)
- */
-
-/*
- * Step one. We need to have some code to do some arithmetics on pixel data.
- * This is implemented as a pair of macros: '*_head' and '*_tail'. When used
- * back-to-back, they take pixel data from {v0, v1, v2, v3} and {v4, v5},
- * perform all the needed calculations and write the result to {v28, v29}.
- * The rationale for having two macros and not just one will be explained
- * later. In practice, any single monolitic function which does the work can
- * be split into two parts in any arbitrary way without affecting correctness.
- *
- * There is one special trick here too. Common template macro can optionally
- * make our life a bit easier by doing R, G, B, A color components
- * deinterleaving for 32bpp pixel formats (and this feature is used in
- * 'pixman_composite_over_8888_0565_asm_neon' function). So it means that
- * instead of having 8 packed pixels in {v0, v1, v2, v3} registers, we
- * actually use v0 register for blue channel (a vector of eight 8-bit
- * values), v1 register for green, v2 for red and v3 for alpha. This
- * simple conversion can be also done with a few NEON instructions:
- *
- * Packed to planar conversion: // vuzp8 is a wrapper macro
- *  vuzp8 v0, v1
- *  vuzp8 v2, v3
- *  vuzp8 v1, v3
- *  vuzp8 v0, v2
- *
- * Planar to packed conversion: // vzip8 is a wrapper macro
- *  vzip8 v0, v2
- *  vzip8 v1, v3
- *  vzip8 v2, v3
- *  vzip8 v0, v1
- *
- * But pixel can be loaded directly in planar format using LD4 / b NEON
- * instruction. It is 1 cycle slower than LD1 / s, so this is not always
- * desirable, that's why deinterleaving is optional.
- *
- * But anyway, here is the code:
- */
-
-.macro pixman_composite_over_8888_0565_process_pixblock_head
-    /* convert 8 r5g6b5 pixel data from {v4} to planar 8-bit format
-       and put data into v6 - red, v7 - green, v30 - blue */
-    mov         v4.d[1], v5.d[0]
-    shrn        v6.8b, v4.8h, #8
-    shrn        v7.8b, v4.8h, #3
-    sli         v4.8h, v4.8h, #5
-    sri         v6.8b, v6.8b, #5
-    mvn         v3.8b, v3.8b      /* invert source alpha */
-    sri         v7.8b, v7.8b, #6
-    shrn        v30.8b, v4.8h, #2
-    /* now do alpha blending, storing results in 8-bit planar format
-       into v20 - red, v23 - green, v22 - blue */
-    umull       v10.8h, v3.8b, v6.8b
-    umull       v11.8h, v3.8b, v7.8b
-    umull       v12.8h, v3.8b, v30.8b
-    urshr       v17.8h, v10.8h, #8
-    urshr       v18.8h, v11.8h, #8
-    urshr       v19.8h, v12.8h, #8
-    raddhn      v20.8b, v10.8h, v17.8h
-    raddhn      v23.8b, v11.8h, v18.8h
-    raddhn      v22.8b, v12.8h, v19.8h
-.endm
-
-.macro pixman_composite_over_8888_0565_process_pixblock_tail
-    /* ... continue alpha blending */
-    uqadd       v17.8b, v2.8b, v20.8b
-    uqadd       v18.8b, v0.8b, v22.8b
-    uqadd       v19.8b, v1.8b, v23.8b
-    /* convert the result to r5g6b5 and store it into {v14} */
-    ushll       v14.8h, v17.8b, #7
-    sli         v14.8h, v14.8h, #1
-    ushll       v8.8h, v19.8b, #7
-    sli         v8.8h, v8.8h, #1
-    ushll       v9.8h, v18.8b, #7
-    sli         v9.8h, v9.8h, #1
-    sri         v14.8h, v8.8h, #5
-    sri         v14.8h, v9.8h, #11
-    mov         v28.d[0], v14.d[0]
-    mov         v29.d[0], v14.d[1]
-.endm
-
-/*
- * OK, now we got almost everything that we need. Using the above two
- * macros, the work can be done right. But now we want to optimize
- * it a bit. ARM Cortex-A8 is an in-order core, and benefits really
- * a lot from good code scheduling and software pipelining.
- *
- * Let's construct some code, which will run in the core main loop.
- * Some pseudo-code of the main loop will look like this:
- *   head
- *   while (...) {
- *     tail
- *     head
- *   }
- *   tail
- *
- * It may look a bit weird, but this setup allows to hide instruction
- * latencies better and also utilize dual-issue capability more
- * efficiently (make pairs of load-store and ALU instructions).
- *
- * So what we need now is a '*_tail_head' macro, which will be used
- * in the core main loop. A trivial straightforward implementation
- * of this macro would look like this:
- *
- *   pixman_composite_over_8888_0565_process_pixblock_tail
- *   st1         {v28.4h, v29.4h}, [DST_W], #32
- *   ld1         {v4.4h, v5.4h}, [DST_R], #16
- *   ld4         {v0.2s, v1.2s, v2.2s, v3.2s}, [SRC], #32
- *   pixman_composite_over_8888_0565_process_pixblock_head
- *   cache_preload 8, 8
- *
- * Now it also got some VLD/VST instructions. We simply can't move from
- * processing one block of pixels to the other one with just arithmetics.
- * The previously processed data needs to be written to memory and new
- * data needs to be fetched. Fortunately, this main loop does not deal
- * with partial leading/trailing pixels and can load/store a full block
- * of pixels in a bulk. Additionally, destination buffer is already
- * 16 bytes aligned here (which is good for performance).
- *
- * New things here are DST_R, DST_W, SRC and MASK identifiers. These
- * are the aliases for ARM registers which are used as pointers for
- * accessing data. We maintain separate pointers for reading and writing
- * destination buffer (DST_R and DST_W).
- *
- * Another new thing is 'cache_preload' macro. It is used for prefetching
- * data into CPU L2 cache and improve performance when dealing with large
- * images which are far larger than cache size. It uses one argument
- * (actually two, but they need to be the same here) - number of pixels
- * in a block. Looking into 'pixman-arm-neon-asm.h' can provide some
- * details about this macro. Moreover, if good performance is needed
- * the code from this macro needs to be copied into '*_tail_head' macro
- * and mixed with the rest of code for optimal instructions scheduling.
- * We are actually doing it below.
- *
- * Now after all the explanations, here is the optimized code.
- * Different instruction streams (originaling from '*_head', '*_tail'
- * and 'cache_preload' macro) use different indentation levels for
- * better readability. Actually taking the code from one of these
- * indentation levels and ignoring a few LD/ST instructions would
- * result in exactly the code from '*_head', '*_tail' or 'cache_preload'
- * macro!
- */
-
-#if 1
-
-.macro pixman_composite_over_8888_0565_process_pixblock_tail_head
-        uqadd       v17.8b, v2.8b, v20.8b
-    ld1         {v4.4h, v5.4h}, [DST_R], #16
-    mov         v4.d[1], v5.d[0]
-        uqadd       v18.8b, v0.8b, v22.8b
-        uqadd       v19.8b, v1.8b, v23.8b
-    shrn        v6.8b, v4.8h, #8
-    fetch_src_pixblock
-    shrn        v7.8b, v4.8h, #3
-    sli         v4.8h, v4.8h, #5
-        ushll       v14.8h, v17.8b, #7
-        sli         v14.8h, v14.8h, #1
-                                    PF add PF_X, PF_X, #8
-        ushll       v8.8h, v19.8b, #7
-        sli         v8.8h, v8.8h,  #1
-                                    PF tst PF_CTL, #0xF
-    sri         v6.8b, v6.8b, #5
-                                    PF beq 10f
-                                    PF add PF_X, PF_X, #8
-10:
-    mvn         v3.8b, v3.8b
-                                    PF beq 10f
-                                    PF sub PF_CTL, PF_CTL, #1
-10:
-    sri         v7.8b, v7.8b, #6
-    shrn        v30.8b, v4.8h, #2
-    umull       v10.8h, v3.8b, v6.8b
-                                    PF lsl DUMMY, PF_X, #src_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
-    umull       v11.8h, v3.8b, v7.8b
-    umull       v12.8h, v3.8b, v30.8b
-                                    PF lsl DUMMY, PF_X, #dst_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
-        sri         v14.8h, v8.8h, #5
-                                    PF cmp PF_X, ORIG_W
-        ushll       v9.8h, v18.8b, #7
-        sli         v9.8h, v9.8h, #1
-    urshr       v17.8h, v10.8h, #8
-                                    PF ble 10f
-                                    PF sub PF_X, PF_X, ORIG_W
-10:
-    urshr       v19.8h, v11.8h, #8
-    urshr       v18.8h, v12.8h, #8
-                                    PF ble 10f
-                                    PF subs PF_CTL, PF_CTL, #0x10
-10:
-        sri         v14.8h, v9.8h, #11
-        mov         v28.d[0], v14.d[0]
-        mov         v29.d[0], v14.d[1]
-                                    PF ble 10f
-                                    PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
-                                    PF ldrsb DUMMY, [PF_SRC, DUMMY]
-                                    PF add PF_SRC, PF_SRC, #1
-10:
-    raddhn      v20.8b, v10.8h, v17.8h
-    raddhn      v23.8b, v11.8h, v19.8h
-                                    PF ble 10f
-                                    PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
-                                    PF ldrsb DUMMY, [PF_DST, DUMMY]
-                                    PF add PF_DST, PF_SRC, #1
-10:
-    raddhn      v22.8b, v12.8h, v18.8h
-        st1         {v14.8h}, [DST_W], #16
-.endm
-
-#else
-
-/* If we did not care much about the performance, we would just use this... */
-.macro pixman_composite_over_8888_0565_process_pixblock_tail_head
-    pixman_composite_over_8888_0565_process_pixblock_tail
-    st1         {v14.8h}, [DST_W], #16
-    ld1         {v4.4h, v4.5h}, [DST_R], #16
-    fetch_src_pixblock
-    pixman_composite_over_8888_0565_process_pixblock_head
-    cache_preload 8, 8
-.endm
-
-#endif
-
-/*
- * And now the final part. We are using 'generate_composite_function' macro
- * to put all the stuff together. We are specifying the name of the function
- * which we want to get, number of bits per pixel for the source, mask and
- * destination (0 if unused, like mask in this case). Next come some bit
- * flags:
- *   FLAG_DST_READWRITE      - tells that the destination buffer is both read
- *                             and written, for write-only buffer we would use
- *                             FLAG_DST_WRITEONLY flag instead
- *   FLAG_DEINTERLEAVE_32BPP - tells that we prefer to work with planar data
- *                             and separate color channels for 32bpp format.
- * The next things are:
- *  - the number of pixels processed per iteration (8 in this case, because
- *    that's the maximum what can fit into four 64-bit NEON registers).
- *  - prefetch distance, measured in pixel blocks. In this case it is 5 times
- *    by 8 pixels. That would be 40 pixels, or up to 160 bytes. Optimal
- *    prefetch distance can be selected by running some benchmarks.
- *
- * After that we specify some macros, these are 'default_init',
- * 'default_cleanup' here which are empty (but it is possible to have custom
- * init/cleanup macros to be able to save/restore some extra NEON registers
- * like d8-d15 or do anything else) followed by
- * 'pixman_composite_over_8888_0565_process_pixblock_head',
- * 'pixman_composite_over_8888_0565_process_pixblock_tail' and
- * 'pixman_composite_over_8888_0565_process_pixblock_tail_head'
- * which we got implemented above.
- *
- * The last part is the NEON registers allocation scheme.
- */
-generate_composite_function \
-    pixman_composite_over_8888_0565_asm_neon, 32, 0, 16, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_over_8888_0565_process_pixblock_head, \
-    pixman_composite_over_8888_0565_process_pixblock_tail, \
-    pixman_composite_over_8888_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_0565_process_pixblock_head
-    /* convert 8 r5g6b5 pixel data from {v4} to planar 8-bit format
-       and put data into v6 - red, v7 - green, v30 - blue */
-    mov         v4.d[1], v5.d[0]
-    shrn        v6.8b, v4.8h, #8
-    shrn        v7.8b, v4.8h, #3
-    sli         v4.8h, v4.8h, #5
-    sri         v6.8b, v6.8b, #5
-    sri         v7.8b, v7.8b, #6
-    shrn        v30.8b, v4.8h, #2
-    /* now do alpha blending, storing results in 8-bit planar format
-       into v20 - red, v23 - green, v22 - blue */
-    umull       v10.8h, v3.8b, v6.8b
-    umull       v11.8h, v3.8b, v7.8b
-    umull       v12.8h, v3.8b, v30.8b
-    urshr       v13.8h, v10.8h, #8
-    urshr       v14.8h, v11.8h, #8
-    urshr       v15.8h, v12.8h, #8
-    raddhn      v20.8b, v10.8h, v13.8h
-    raddhn      v23.8b, v11.8h, v14.8h
-    raddhn      v22.8b, v12.8h, v15.8h
-.endm
-
-.macro pixman_composite_over_n_0565_process_pixblock_tail
-    /* ... continue alpha blending */
-    uqadd       v17.8b, v2.8b, v20.8b
-    uqadd       v18.8b, v0.8b, v22.8b
-    uqadd       v19.8b, v1.8b, v23.8b
-    /* convert the result to r5g6b5 and store it into {v14} */
-    ushll       v14.8h, v17.8b, #7
-    sli         v14.8h, v14.8h, #1
-    ushll       v8.8h, v19.8b, #7
-    sli         v8.8h, v8.8h, #1
-    ushll       v9.8h, v18.8b, #7
-    sli         v9.8h, v9.8h, #1
-    sri         v14.8h, v8.8h, #5
-    sri         v14.8h, v9.8h, #11
-    mov         v28.d[0], v14.d[0]
-    mov         v29.d[0], v14.d[1]
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_n_0565_process_pixblock_tail_head
-    pixman_composite_over_n_0565_process_pixblock_tail
-    ld1         {v4.4h, v5.4h}, [DST_R], #16
-    st1         {v14.8h}, [DST_W], #16
-    pixman_composite_over_n_0565_process_pixblock_head
-    cache_preload 8, 8
-.endm
-
-.macro pixman_composite_over_n_0565_init
-    mov         v3.s[0], w4
-    dup         v0.8b, v3.b[0]
-    dup         v1.8b, v3.b[1]
-    dup         v2.8b, v3.b[2]
-    dup         v3.8b, v3.b[3]
-    mvn         v3.8b, v3.8b      /* invert source alpha */
-.endm
-
-generate_composite_function \
-    pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_n_0565_init, \
-    default_cleanup, \
-    pixman_composite_over_n_0565_process_pixblock_head, \
-    pixman_composite_over_n_0565_process_pixblock_tail, \
-    pixman_composite_over_n_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_8888_0565_process_pixblock_head
-    ushll       v8.8h,  v1.8b,  #7
-    sli         v8.8h,  v8.8h,  #1
-    ushll       v14.8h, v2.8b,  #7
-    sli         v14.8h, v14.8h, #1
-    ushll       v9.8h,  v0.8b,  #7
-    sli         v9.8h,  v9.8h,  #1
-.endm
-
-.macro pixman_composite_src_8888_0565_process_pixblock_tail
-    sri         v14.8h, v8.8h, #5
-    sri         v14.8h, v9.8h, #11
-    mov         v28.d[0], v14.d[0]
-    mov         v29.d[0], v14.d[1]
-.endm
-
-.macro pixman_composite_src_8888_0565_process_pixblock_tail_head
-        sri         v14.8h, v8.8h, #5
-                                    PF add PF_X, PF_X, #8
-                                    PF tst PF_CTL, #0xF
-    fetch_src_pixblock
-                                    PF beq 10f
-                                    PF add PF_X, PF_X, #8
-                                    PF sub PF_CTL, PF_CTL, #1
-10:
-        sri         v14.8h, v9.8h, #11
-        mov         v28.d[0], v14.d[0]
-        mov         v29.d[0], v14.d[1]
-                                    PF cmp PF_X, ORIG_W
-                                    PF lsl DUMMY, PF_X, #src_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
-    ushll       v8.8h, v1.8b, #7
-    sli         v8.8h, v8.8h, #1
-        st1        {v14.8h}, [DST_W], #16
-                                    PF ble 10f
-                                    PF sub PF_X, PF_X, ORIG_W
-                                    PF subs PF_CTL, PF_CTL, #0x10
-10:
-    ushll       v14.8h, v2.8b, #7
-    sli         v14.8h, v14.8h, #1
-                                    PF ble 10f
-                                    PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
-                                    PF ldrsb DUMMY, [PF_SRC, DUMMY]
-                                    PF add PF_SRC, PF_SRC, #1
-10:
-    ushll       v9.8h, v0.8b, #7
-    sli         v9.8h, v9.8h, #1
-.endm
-
-generate_composite_function \
-    pixman_composite_src_8888_0565_asm_neon, 32, 0, 16, \
-    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_8888_0565_process_pixblock_head, \
-    pixman_composite_src_8888_0565_process_pixblock_tail, \
-    pixman_composite_src_8888_0565_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0565_8888_process_pixblock_head
-    mov         v0.d[1], v1.d[0]
-    shrn        v30.8b, v0.8h, #8
-    shrn        v29.8b, v0.8h, #3
-    sli         v0.8h,  v0.8h, #5
-    movi        v31.8b, #255
-    sri         v30.8b, v30.8b, #5
-    sri         v29.8b, v29.8b, #6
-    shrn        v28.8b, v0.8h, #2
-.endm
-
-.macro pixman_composite_src_0565_8888_process_pixblock_tail
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_src_0565_8888_process_pixblock_tail_head
-    pixman_composite_src_0565_8888_process_pixblock_tail
-    st4         {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-    fetch_src_pixblock
-    pixman_composite_src_0565_8888_process_pixblock_head
-    cache_preload 8, 8
-.endm
-
-generate_composite_function \
-    pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \
-    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_0565_8888_process_pixblock_head, \
-    pixman_composite_src_0565_8888_process_pixblock_tail, \
-    pixman_composite_src_0565_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8_8_process_pixblock_head
-    uqadd       v28.8b, v0.8b, v4.8b
-    uqadd       v29.8b, v1.8b, v5.8b
-    uqadd       v30.8b, v2.8b, v6.8b
-    uqadd       v31.8b, v3.8b, v7.8b
-.endm
-
-.macro pixman_composite_add_8_8_process_pixblock_tail
-.endm
-
-.macro pixman_composite_add_8_8_process_pixblock_tail_head
-    fetch_src_pixblock
-                                    PF add PF_X, PF_X, #32
-                                    PF tst PF_CTL, #0xF
-    ld1         {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
-                                    PF beq 10f
-                                    PF add PF_X, PF_X, #32
-                                    PF sub PF_CTL, PF_CTL, #1
-10:
-        st1     {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-                                    PF cmp PF_X, ORIG_W
-                                    PF lsl DUMMY, PF_X, #src_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
-                                    PF lsl DUMMY, PF_X, #dst_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
-                                    PF ble 10f
-                                    PF sub PF_X, PF_X, ORIG_W
-                                    PF subs PF_CTL, PF_CTL, #0x10
-10:
-    uqadd       v28.8b, v0.8b, v4.8b
-                                    PF ble 10f
-                                    PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
-                                    PF ldrsb DUMMY, [PF_SRC, DUMMY]
-                                    PF add PF_SRC, PF_SRC, #1
-                                    PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
-                                    PF ldrsb DUMMY, [PF_DST, DUMMY]
-                                    PF add PF_DST, PF_DST, #1
-10:
-    uqadd       v29.8b, v1.8b, v5.8b
-    uqadd       v30.8b, v2.8b, v6.8b
-    uqadd       v31.8b, v3.8b, v7.8b
-.endm
-
-generate_composite_function \
-    pixman_composite_add_8_8_asm_neon, 8, 0, 8, \
-    FLAG_DST_READWRITE, \
-    32, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_add_8_8_process_pixblock_head, \
-    pixman_composite_add_8_8_process_pixblock_tail, \
-    pixman_composite_add_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8888_8888_process_pixblock_tail_head
-    fetch_src_pixblock
-                                    PF add PF_X, PF_X, #8
-                                    PF tst PF_CTL, #0xF
-    ld1         {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
-                                    PF beq 10f
-                                    PF add PF_X, PF_X, #8
-                                    PF sub PF_CTL, PF_CTL, #1
-10:
-        st1     {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-                                    PF cmp PF_X, ORIG_W
-                                    PF lsl DUMMY, PF_X, #src_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
-                                    PF lsl DUMMY, PF_X, #dst_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
-                                    PF ble 10f
-                                    PF sub PF_X, PF_X, ORIG_W
-                                    PF subs PF_CTL, PF_CTL, #0x10
-10:
-    uqadd       v28.8b, v0.8b, v4.8b
-                                    PF ble 10f
-                                    PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
-                                    PF ldrsb DUMMY, [PF_SRC, DUMMY]
-                                    PF add PF_SRC, PF_SRC, #1
-                                    PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
-                                    PF ldrsb DUMMY, [PF_DST, DUMMY]
-                                    PF add PF_DST, PF_DST, #1
-10:
-    uqadd       v29.8b, v1.8b, v5.8b
-    uqadd       v30.8b, v2.8b, v6.8b
-    uqadd       v31.8b, v3.8b, v7.8b
-.endm
-
-generate_composite_function \
-    pixman_composite_add_8888_8888_asm_neon, 32, 0, 32, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_add_8_8_process_pixblock_head, \
-    pixman_composite_add_8_8_process_pixblock_tail, \
-    pixman_composite_add_8888_8888_process_pixblock_tail_head
-
-generate_composite_function_single_scanline \
-    pixman_composite_scanline_add_asm_neon, 32, 0, 32, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_add_8_8_process_pixblock_head, \
-    pixman_composite_add_8_8_process_pixblock_tail, \
-    pixman_composite_add_8888_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_out_reverse_8888_8888_process_pixblock_head
-    mvn         v24.8b, v3.8b  /* get inverted alpha */
-    /* do alpha blending */
-    umull       v8.8h, v24.8b, v4.8b
-    umull       v9.8h, v24.8b, v5.8b
-    umull       v10.8h, v24.8b, v6.8b
-    umull       v11.8h, v24.8b, v7.8b
-.endm
-
-.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail
-    urshr       v14.8h, v8.8h, #8
-    urshr       v15.8h, v9.8h, #8
-    urshr       v16.8h, v10.8h, #8
-    urshr       v17.8h, v11.8h, #8
-    raddhn      v28.8b, v14.8h, v8.8h
-    raddhn      v29.8b, v15.8h, v9.8h
-    raddhn      v30.8b, v16.8h, v10.8h
-    raddhn      v31.8b, v17.8h, v11.8h
-.endm
-
-.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head
-     ld4        {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
-        urshr       v14.8h, v8.8h, #8
-                                    PF add PF_X, PF_X, #8
-                                    PF tst PF_CTL, #0xF
-        urshr       v15.8h, v9.8h, #8
-        urshr       v16.8h, v10.8h, #8
-        urshr       v17.8h, v11.8h, #8
-                                    PF beq 10f
-                                    PF add PF_X, PF_X, #8
-                                    PF sub PF_CTL, PF_CTL, #1
-10:
-        raddhn      v28.8b, v14.8h, v8.8h
-        raddhn      v29.8b, v15.8h, v9.8h
-                                    PF cmp PF_X, ORIG_W
-        raddhn      v30.8b, v16.8h, v10.8h
-        raddhn      v31.8b, v17.8h, v11.8h
-    fetch_src_pixblock
-                                    PF lsl DUMMY, PF_X, #src_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
-    mvn         v22.8b, v3.8b
-                                    PF lsl DUMMY, PF_X, #dst_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
-         st4        {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-                                    PF ble 10f
-                                    PF sub PF_X, PF_X, ORIG_W
-10:
-    umull      v8.8h, v22.8b, v4.8b
-                                    PF ble 10f
-                                    PF subs PF_CTL, PF_CTL, #0x10
-10:
-    umull      v9.8h, v22.8b, v5.8b
-                                    PF ble 10f
-                                    PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
-                                    PF ldrsb DUMMY, [PF_SRC, DUMMY]
-                                    PF add PF_SRC, PF_SRC, #1
-10:
-    umull      v10.8h, v22.8b, v6.8b
-                                    PF ble 10f
-                                    PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
-                                    PF ldrsb DUMMY, [PF_DST, DUMMY]
-                                    PF add PF_DST, PF_DST, #1
-10:
-     umull     v11.8h, v22.8b, v7.8b
-.endm
-
-generate_composite_function_single_scanline \
-    pixman_composite_scanline_out_reverse_asm_neon, 32, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_out_reverse_8888_8888_process_pixblock_head, \
-    pixman_composite_out_reverse_8888_8888_process_pixblock_tail, \
-    pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_8888_8888_process_pixblock_head
-    pixman_composite_out_reverse_8888_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_over_8888_8888_process_pixblock_tail
-    pixman_composite_out_reverse_8888_8888_process_pixblock_tail
-    uqadd       v28.8b, v0.8b, v28.8b
-    uqadd       v29.8b, v1.8b, v29.8b
-    uqadd       v30.8b, v2.8b, v30.8b
-    uqadd       v31.8b, v3.8b, v31.8b
-.endm
-
-.macro pixman_composite_over_8888_8888_process_pixblock_tail_head
-     ld4        {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
-        urshr       v14.8h, v8.8h, #8
-                                    PF add PF_X, PF_X, #8
-                                    PF tst PF_CTL, #0xF
-        urshr       v15.8h, v9.8h, #8
-        urshr       v16.8h, v10.8h, #8
-        urshr       v17.8h, v11.8h, #8
-                                    PF beq 10f
-                                    PF add PF_X, PF_X, #8
-                                    PF sub PF_CTL, PF_CTL, #1
-10:
-        raddhn      v28.8b, v14.8h, v8.8h
-        raddhn      v29.8b, v15.8h, v9.8h
-                                    PF cmp PF_X, ORIG_W
-        raddhn      v30.8b, v16.8h, v10.8h
-        raddhn      v31.8b, v17.8h, v11.8h
-        uqadd       v28.8b, v0.8b, v28.8b
-        uqadd       v29.8b, v1.8b, v29.8b
-        uqadd       v30.8b, v2.8b, v30.8b
-        uqadd       v31.8b, v3.8b, v31.8b
-    fetch_src_pixblock
-                                    PF lsl DUMMY, PF_X, #src_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
-    mvn        v22.8b, v3.8b
-                                    PF lsl DUMMY, PF_X, #dst_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
-         st4        {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-                                    PF ble 10f
-                                    PF sub PF_X, PF_X, ORIG_W
-10:
-    umull      v8.8h, v22.8b, v4.8b
-                                    PF ble 10f
-                                    PF subs PF_CTL, PF_CTL, #0x10
-10:
-    umull      v9.8h, v22.8b, v5.8b
-                                    PF ble 10f
-                                    PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
-                                    PF ldrsb DUMMY, [PF_SRC, DUMMY]
-                                    PF add PF_SRC, PF_SRC, #1
-10:
-    umull      v10.8h, v22.8b, v6.8b
-                                    PF ble 10f
-                                    PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
-                                    PF ldrsb DUMMY, [PF_DST, DUMMY]
-                                    PF add PF_DST, PF_DST, #1
-10:
-    umull      v11.8h, v22.8b, v7.8b
-.endm
-
-generate_composite_function \
-    pixman_composite_over_8888_8888_asm_neon, 32, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_over_8888_8888_process_pixblock_head, \
-    pixman_composite_over_8888_8888_process_pixblock_tail, \
-    pixman_composite_over_8888_8888_process_pixblock_tail_head
-
-generate_composite_function_single_scanline \
-    pixman_composite_scanline_over_asm_neon, 32, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_over_8888_8888_process_pixblock_head, \
-    pixman_composite_over_8888_8888_process_pixblock_tail, \
-    pixman_composite_over_8888_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_8888_process_pixblock_head
-    /* deinterleaved source pixels in {v0, v1, v2, v3} */
-    /* inverted alpha in {v24} */
-    /* destination pixels in {v4, v5, v6, v7} */
-    umull       v8.8h, v24.8b, v4.8b
-    umull       v9.8h, v24.8b, v5.8b
-    umull       v10.8h, v24.8b, v6.8b
-    umull       v11.8h, v24.8b, v7.8b
-.endm
-
-.macro pixman_composite_over_n_8888_process_pixblock_tail
-    urshr       v14.8h, v8.8h, #8
-    urshr       v15.8h, v9.8h, #8
-    urshr       v16.8h, v10.8h, #8
-    urshr       v17.8h, v11.8h, #8
-    raddhn      v28.8b, v14.8h, v8.8h
-    raddhn      v29.8b, v15.8h, v9.8h
-    raddhn      v30.8b, v16.8h, v10.8h
-    raddhn      v31.8b, v17.8h, v11.8h
-    uqadd       v28.8b, v0.8b, v28.8b
-    uqadd       v29.8b, v1.8b, v29.8b
-    uqadd       v30.8b, v2.8b, v30.8b
-    uqadd       v31.8b, v3.8b, v31.8b
-.endm
-
-.macro pixman_composite_over_n_8888_process_pixblock_tail_head
-        urshr       v14.8h, v8.8h, #8
-        urshr       v15.8h, v9.8h, #8
-        urshr       v16.8h, v10.8h, #8
-        urshr       v17.8h, v11.8h, #8
-        raddhn      v28.8b, v14.8h, v8.8h
-        raddhn      v29.8b, v15.8h, v9.8h
-        raddhn      v30.8b, v16.8h, v10.8h
-        raddhn      v31.8b, v17.8h, v11.8h
-    ld4         {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
-        uqadd       v28.8b, v0.8b, v28.8b
-                                    PF add PF_X, PF_X, #8
-                                    PF tst PF_CTL, #0x0F
-                                    PF beq 10f
-                                    PF add PF_X, PF_X, #8
-                                    PF sub PF_CTL, PF_CTL, #1
-10:
-        uqadd       v29.8b, v1.8b, v29.8b
-        uqadd       v30.8b, v2.8b, v30.8b
-        uqadd       v31.8b, v3.8b, v31.8b
-                                    PF cmp PF_X, ORIG_W
-    umull       v8.8h, v24.8b, v4.8b
-                                    PF lsl DUMMY, PF_X, #dst_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
-    umull       v9.8h, v24.8b, v5.8b
-                                    PF ble 10f
-                                    PF sub PF_X, PF_X, ORIG_W
-10:
-    umull       v10.8h, v24.8b, v6.8b
-                                    PF subs PF_CTL, PF_CTL, #0x10
-    umull       v11.8h, v24.8b, v7.8b
-                                    PF ble 10f
-                                    PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
-                                    PF ldrsb DUMMY, [PF_DST, DUMMY]
-                                    PF add PF_DST, PF_DST, #1
-10:
-        st4         {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-.endm
-
-.macro pixman_composite_over_n_8888_init
-    mov         v3.s[0], w4
-    dup         v0.8b, v3.b[0]
-    dup         v1.8b, v3.b[1]
-    dup         v2.8b, v3.b[2]
-    dup         v3.8b, v3.b[3]
-    mvn         v24.8b, v3.8b  /* get inverted alpha */
-.endm
-
-generate_composite_function \
-    pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_n_8888_init, \
-    default_cleanup, \
-    pixman_composite_over_8888_8888_process_pixblock_head, \
-    pixman_composite_over_8888_8888_process_pixblock_tail, \
-    pixman_composite_over_n_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head
-        urshr       v14.8h, v8.8h, #8
-                                    PF add PF_X, PF_X, #8
-                                    PF tst PF_CTL, #0xF
-        urshr       v15.8h, v9.8h, #8
-        urshr       v12.8h, v10.8h, #8
-        urshr       v13.8h, v11.8h, #8
-                                    PF beq 10f
-                                    PF add PF_X, PF_X, #8
-                                    PF sub PF_CTL, PF_CTL, #1
-10:
-        raddhn      v28.8b, v14.8h, v8.8h
-        raddhn      v29.8b, v15.8h, v9.8h
-                                    PF cmp PF_X, ORIG_W
-        raddhn      v30.8b, v12.8h, v10.8h
-        raddhn      v31.8b, v13.8h, v11.8h
-        uqadd       v28.8b, v0.8b, v28.8b
-        uqadd       v29.8b, v1.8b, v29.8b
-        uqadd       v30.8b, v2.8b, v30.8b
-        uqadd       v31.8b, v3.8b, v31.8b
-    ld4         {v0.8b, v1.8b, v2.8b, v3.8b}, [DST_R], #32
-    mvn         v22.8b, v3.8b
-                                    PF lsl DUMMY, PF_X, #dst_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
-        st4         {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-                                    PF blt 10f
-                                    PF sub PF_X, PF_X, ORIG_W
-10:
-    umull       v8.8h, v22.8b, v4.8b
-                                    PF blt 10f
-                                    PF subs PF_CTL, PF_CTL, #0x10
-10:
-    umull       v9.8h, v22.8b, v5.8b
-    umull       v10.8h, v22.8b, v6.8b
-                                    PF blt 10f
-                                    PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
-                                    PF ldrsb DUMMY, [PF_DST, DUMMY]
-                                    PF add PF_DST, PF_DST, #1
-10:
-    umull       v11.8h, v22.8b, v7.8b
-.endm
-
-.macro pixman_composite_over_reverse_n_8888_init
-    mov         v7.s[0], w4
-    dup         v4.8b, v7.b[0]
-    dup         v5.8b, v7.b[1]
-    dup         v6.8b, v7.b[2]
-    dup         v7.8b, v7.b[3]
-.endm
-
-generate_composite_function \
-    pixman_composite_over_reverse_n_8888_asm_neon, 0, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_reverse_n_8888_init, \
-    default_cleanup, \
-    pixman_composite_over_8888_8888_process_pixblock_head, \
-    pixman_composite_over_8888_8888_process_pixblock_tail, \
-    pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    0,  /* dst_r_basereg */ \
-    4,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_8888_8_0565_process_pixblock_head
-    umull       v0.8h,  v24.8b, v8.8b    /* IN for SRC pixels (part1) */
-    umull       v1.8h,  v24.8b, v9.8b
-    umull       v2.8h,  v24.8b, v10.8b
-    umull       v3.8h,  v24.8b, v11.8b
-        mov         v4.d[1], v5.d[0]
-        shrn        v25.8b,  v4.8h, #8 /* convert DST_R data to 32-bpp (part1) */
-        shrn        v26.8b,  v4.8h, #3
-        sli         v4.8h,   v4.8h, #5
-    urshr       v17.8h, v0.8h,  #8    /* IN for SRC pixels (part2) */
-    urshr       v18.8h, v1.8h,  #8
-    urshr       v19.8h, v2.8h,  #8
-    urshr       v20.8h, v3.8h,  #8
-    raddhn      v0.8b,  v0.8h,  v17.8h
-    raddhn      v1.8b,  v1.8h,  v18.8h
-    raddhn      v2.8b,  v2.8h,  v19.8h
-    raddhn      v3.8b,  v3.8h,  v20.8h
-        sri         v25.8b, v25.8b, #5 /* convert DST_R data to 32-bpp (part2) */
-        sri         v26.8b, v26.8b, #6
-    mvn         v3.8b,  v3.8b
-        shrn        v30.8b, v4.8h,  #2
-    umull       v18.8h, v3.8b, v25.8b     /* now do alpha blending */
-    umull       v19.8h, v3.8b, v26.8b
-    umull       v20.8h, v3.8b, v30.8b
-.endm
-
-.macro pixman_composite_over_8888_8_0565_process_pixblock_tail
-    /* 3 cycle bubble (after vmull.u8) */
-    urshr       v5.8h, v18.8h, #8
-    urshr       v6.8h, v19.8h, #8
-    urshr       v7.8h, v20.8h, #8
-    raddhn      v17.8b, v18.8h, v5.8h
-    raddhn      v19.8b, v19.8h, v6.8h
-    raddhn      v18.8b, v20.8h, v7.8h
-    uqadd       v5.8b, v2.8b,  v17.8b
-    /* 1 cycle bubble */
-    uqadd       v6.8b, v0.8b,  v18.8b
-    uqadd       v7.8b, v1.8b,  v19.8b
-    ushll       v14.8h, v5.8b, #7    /* convert to 16bpp */
-    sli         v14.8h, v14.8h, #1
-    ushll       v18.8h, v7.8b, #7
-    sli         v18.8h, v18.8h, #1
-    ushll       v19.8h, v6.8b, #7
-    sli         v19.8h, v19.8h, #1
-    sri         v14.8h, v18.8h, #5
-    /* 1 cycle bubble */
-    sri         v14.8h, v19.8h, #11
-    mov         v28.d[0], v14.d[0]
-    mov         v29.d[0], v14.d[1]
-.endm
-
-.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head
-#if 0
-    ld1         {v4.8h}, [DST_R], #16
-    shrn        v25.8b,  v4.8h,  #8
-    fetch_mask_pixblock
-    shrn        v26.8b,  v4.8h,  #3
-    fetch_src_pixblock
-    umull       v22.8h,  v24.8b, v10.8b
-        urshr       v13.8h, v18.8h, #8
-        urshr       v11.8h, v19.8h, #8
-        urshr       v15.8h, v20.8h, #8
-        raddhn      v17.8b, v18.8h, v13.8h
-        raddhn      v19.8b, v19.8h, v11.8h
-        raddhn      v18.8b, v20.8h, v15.8h
-        uqadd       v17.8b, v2.8b, v17.8b
-    umull       v21.8h,  v24.8b, v9.8b
-        uqadd       v18.8b, v0.8b, v18.8b
-        uqadd       v19.8b, v1.8b, v19.8b
-        ushll       v14.8h, v17.8b, #7
-        sli         v14.8h, v14.8h, #1
-    umull       v20.8h,  v24.8b, v8.8b
-        ushll       v18.8h,  v18.8b, #7
-        sli         v18.8h,  v18.8h, #1
-        ushll       v19.8h,  v19.8b, #7
-        sli         v19.8h,  v19.8h, #1
-        sri         v14.8h,  v18.8h, #5
-    umull       v23.8h,  v24.8b, v11.8b
-        sri         v14.8h,  v19.8h, #11
-        mov         v28.d[0], v14.d[0]
-        mov         v29.d[0], v14.d[1]
-
-    cache_preload 8, 8
-
-    sli         v4.8h,  v4.8h,   #5
-    urshr       v16.8h, v20.8h,  #8
-    urshr       v17.8h, v21.8h,  #8
-    urshr       v18.8h, v22.8h,  #8
-    urshr       v19.8h, v23.8h,  #8
-    raddhn      v0.8b,  v20.8h, v16.8h
-    raddhn      v1.8b,  v21.8h, v17.8h
-    raddhn      v2.8b,  v22.8h, v18.8h
-    raddhn      v3.8b,  v23.8h, v19.8h
-    sri         v25.8b,  v25.8b,  #5
-    sri         v26.8b,  v26.8b,  #6
-    mvn         v3.8b,  v3.8b
-    shrn        v30.8b, v4.8h,  #2
-    st1         {v14.8h}, [DST_W], #16
-    umull       v18.8h, v3.8b, v25.8b
-    umull       v19.8h, v3.8b, v26.8b
-    umull       v20.8h, v3.8b, v30.8b
-#else
-    pixman_composite_over_8888_8_0565_process_pixblock_tail
-    st1         {v28.4h, v29.4h}, [DST_W], #16
-    ld1         {v4.4h, v5.4h}, [DST_R], #16
-    fetch_mask_pixblock
-    fetch_src_pixblock
-    pixman_composite_over_8888_8_0565_process_pixblock_head
-#endif
-.endm
-
-generate_composite_function \
-    pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_over_8888_8_0565_process_pixblock_head, \
-    pixman_composite_over_8888_8_0565_process_pixblock_tail, \
-    pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    8,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-/******************************************************************************/
-
-/*
- * This function needs a special initialization of solid mask.
- * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET
- * offset, split into color components and replicated in d8-d11
- * registers. Additionally, this function needs all the NEON registers,
- * so it has to save d8-d15 registers which are callee saved according
- * to ABI. These registers are restored from 'cleanup' macro. All the
- * other NEON registers are caller saved, so can be clobbered freely
- * without introducing any problems.
- */
-.macro pixman_composite_over_n_8_0565_init
-    mov         v11.s[0], w4
-    dup         v8.8b, v11.b[0]
-    dup         v9.8b, v11.b[1]
-    dup         v10.8b, v11.b[2]
-    dup         v11.8b, v11.b[3]
-.endm
-
-.macro pixman_composite_over_n_8_0565_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_over_n_8_0565_asm_neon, 0, 8, 16, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_n_8_0565_init, \
-    pixman_composite_over_n_8_0565_cleanup, \
-    pixman_composite_over_8888_8_0565_process_pixblock_head, \
-    pixman_composite_over_8888_8_0565_process_pixblock_tail, \
-    pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    8,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_8888_n_0565_init
-    mov         v24.s[0], w6
-    dup         v24.8b, v24.b[3]
-.endm
-
-.macro pixman_composite_over_8888_n_0565_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_8888_n_0565_init, \
-    pixman_composite_over_8888_n_0565_cleanup, \
-    pixman_composite_over_8888_8_0565_process_pixblock_head, \
-    pixman_composite_over_8888_8_0565_process_pixblock_tail, \
-    pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    8,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0565_0565_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_0565_0565_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_0565_0565_process_pixblock_tail_head
-    st1     {v0.4h, v1.4h, v2.4h, v3.4h}, [DST_W], #32
-    fetch_src_pixblock
-    cache_preload 16, 16
-.endm
-
-generate_composite_function \
-    pixman_composite_src_0565_0565_asm_neon, 16, 0, 16, \
-    FLAG_DST_WRITEONLY, \
-    16, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_0565_0565_process_pixblock_head, \
-    pixman_composite_src_0565_0565_process_pixblock_tail, \
-    pixman_composite_src_0565_0565_process_pixblock_tail_head, \
-    0, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_n_8_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_n_8_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_n_8_process_pixblock_tail_head
-    st1         {v0.8b, v1.8b, v2.8b, v3.8b}, [DST_W], 32
-.endm
-
-.macro pixman_composite_src_n_8_init
-    mov         v0.s[0], w4
-    dup         v3.8b, v0.b[0]
-    dup         v2.8b, v0.b[0]
-    dup         v1.8b, v0.b[0]
-    dup         v0.8b, v0.b[0]
-.endm
-
-.macro pixman_composite_src_n_8_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_src_n_8_asm_neon, 0, 0, 8, \
-    FLAG_DST_WRITEONLY, \
-    32, /* number of pixels, processed in a single block */ \
-    0,  /* prefetch distance */ \
-    pixman_composite_src_n_8_init, \
-    pixman_composite_src_n_8_cleanup, \
-    pixman_composite_src_n_8_process_pixblock_head, \
-    pixman_composite_src_n_8_process_pixblock_tail, \
-    pixman_composite_src_n_8_process_pixblock_tail_head, \
-    0, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_n_0565_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_n_0565_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_n_0565_process_pixblock_tail_head
-    st1     {v0.4h, v1.4h, v2.4h, v3.4h}, [DST_W], #32
-.endm
-
-.macro pixman_composite_src_n_0565_init
-    mov         v0.s[0], w4
-    dup         v3.4h, v0.h[0]
-    dup         v2.4h, v0.h[0]
-    dup         v1.4h, v0.h[0]
-    dup         v0.4h, v0.h[0]
-.endm
-
-.macro pixman_composite_src_n_0565_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_src_n_0565_asm_neon, 0, 0, 16, \
-    FLAG_DST_WRITEONLY, \
-    16, /* number of pixels, processed in a single block */ \
-    0,  /* prefetch distance */ \
-    pixman_composite_src_n_0565_init, \
-    pixman_composite_src_n_0565_cleanup, \
-    pixman_composite_src_n_0565_process_pixblock_head, \
-    pixman_composite_src_n_0565_process_pixblock_tail, \
-    pixman_composite_src_n_0565_process_pixblock_tail_head, \
-    0, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_n_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_n_8888_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_n_8888_process_pixblock_tail_head
-    st1         {v0.2s, v1.2s, v2.2s, v3.2s}, [DST_W], #32
-.endm
-
-.macro pixman_composite_src_n_8888_init
-    mov         v0.s[0], w4
-    dup         v3.2s, v0.s[0]
-    dup         v2.2s, v0.s[0]
-    dup         v1.2s, v0.s[0]
-    dup         v0.2s, v0.s[0]
-.endm
-
-.macro pixman_composite_src_n_8888_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_src_n_8888_asm_neon, 0, 0, 32, \
-    FLAG_DST_WRITEONLY, \
-    8, /* number of pixels, processed in a single block */ \
-    0, /* prefetch distance */ \
-    pixman_composite_src_n_8888_init, \
-    pixman_composite_src_n_8888_cleanup, \
-    pixman_composite_src_n_8888_process_pixblock_head, \
-    pixman_composite_src_n_8888_process_pixblock_tail, \
-    pixman_composite_src_n_8888_process_pixblock_tail_head, \
-    0, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_8888_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_8888_8888_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_8888_8888_process_pixblock_tail_head
-    st1  {v0.2s, v1.2s, v2.2s, v3.2s}, [DST_W], #32
-    fetch_src_pixblock
-    cache_preload 8, 8
-.endm
-
-generate_composite_function \
-    pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \
-    FLAG_DST_WRITEONLY, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_8888_8888_process_pixblock_head, \
-    pixman_composite_src_8888_8888_process_pixblock_tail, \
-    pixman_composite_src_8888_8888_process_pixblock_tail_head, \
-    0, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_x888_8888_process_pixblock_head
-    orr      v0.8b, v0.8b, v4.8b
-    orr      v1.8b, v1.8b, v4.8b
-    orr      v2.8b, v2.8b, v4.8b
-    orr      v3.8b, v3.8b, v4.8b
-.endm
-
-.macro pixman_composite_src_x888_8888_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_x888_8888_process_pixblock_tail_head
-    st1      {v0.2s, v1.2s, v2.2s, v3.2s}, [DST_W], #32
-    fetch_src_pixblock
-    orr      v0.8b, v0.8b, v4.8b
-    orr      v1.8b, v1.8b, v4.8b
-    orr      v2.8b, v2.8b, v4.8b
-    orr      v3.8b, v3.8b, v4.8b
-    cache_preload 8, 8
-.endm
-
-.macro pixman_composite_src_x888_8888_init
-    movi    v4.2s, #0xff, lsl 24
-.endm
-
-generate_composite_function \
-    pixman_composite_src_x888_8888_asm_neon, 32, 0, 32, \
-    FLAG_DST_WRITEONLY, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    pixman_composite_src_x888_8888_init, \
-    default_cleanup, \
-    pixman_composite_src_x888_8888_process_pixblock_head, \
-    pixman_composite_src_x888_8888_process_pixblock_tail, \
-    pixman_composite_src_x888_8888_process_pixblock_tail_head, \
-    0, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_n_8_8888_process_pixblock_head
-    /* expecting solid source in {v0, v1, v2, v3} */
-    /* mask is in v24 (v25, v26, v27 are unused) */
-
-    /* in */
-    umull       v8.8h,  v24.8b, v0.8b
-    umull       v9.8h,  v24.8b, v1.8b
-    umull       v10.8h, v24.8b, v2.8b
-    umull       v11.8h, v24.8b, v3.8b
-    ursra       v8.8h,  v8.8h, #8
-    ursra       v9.8h,  v9.8h, #8
-    ursra       v10.8h, v10.8h, #8
-    ursra       v11.8h, v11.8h, #8
-.endm
-
-.macro pixman_composite_src_n_8_8888_process_pixblock_tail
-    rshrn       v28.8b, v8.8h, #8
-    rshrn       v29.8b, v9.8h, #8
-    rshrn       v30.8b, v10.8h, #8
-    rshrn       v31.8b, v11.8h, #8
-.endm
-
-.macro pixman_composite_src_n_8_8888_process_pixblock_tail_head
-    fetch_mask_pixblock
-                                    PF add PF_X, PF_X, #8
-        rshrn       v28.8b, v8.8h, #8
-                                    PF tst PF_CTL, #0x0F
-        rshrn       v29.8b, v9.8h, #8
-                                    PF beq 10f
-                                    PF add PF_X, PF_X, #8
-10:
-        rshrn      v30.8b, v10.8h, #8
-                                    PF beq 10f
-                                    PF sub PF_CTL, PF_CTL, #1
-10:
-        rshrn      v31.8b, v11.8h, #8
-                                    PF cmp PF_X, ORIG_W
-    umull          v8.8h, v24.8b, v0.8b
-                                    PF lsl DUMMY, PF_X, #mask_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_MASK, DUMMY]
-    umull          v9.8h, v24.8b, v1.8b
-                                    PF ble 10f
-                                    PF sub PF_X, PF_X, ORIG_W
-10:
-    umull          v10.8h, v24.8b, v2.8b
-                                    PF ble 10f
-                                    PF subs PF_CTL, PF_CTL, #0x10
-10:
-    umull          v11.8h, v24.8b, v3.8b
-                                    PF ble 10f
-                                    PF lsl DUMMY, MASK_STRIDE, #mask_bpp_shift
-                                    PF ldrsb DUMMY, [PF_MASK, DUMMY]
-                                    PF add PF_MASK, PF_MASK, #1
-10:
-        st4        {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-    ursra       v8.8h, v8.8h, #8
-    ursra       v9.8h, v9.8h, #8
-    ursra       v10.8h, v10.8h, #8
-    ursra       v11.8h, v11.8h, #8
-.endm
-
-.macro pixman_composite_src_n_8_8888_init
-    mov         v3.s[0], w4
-    dup         v0.8b, v3.b[0]
-    dup         v1.8b, v3.b[1]
-    dup         v2.8b, v3.b[2]
-    dup         v3.8b, v3.b[3]
-.endm
-
-.macro pixman_composite_src_n_8_8888_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_src_n_8_8888_asm_neon, 0, 8, 32, \
-    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_src_n_8_8888_init, \
-    pixman_composite_src_n_8_8888_cleanup, \
-    pixman_composite_src_n_8_8888_process_pixblock_head, \
-    pixman_composite_src_n_8_8888_process_pixblock_tail, \
-    pixman_composite_src_n_8_8888_process_pixblock_tail_head, \
-
-/******************************************************************************/
-
-.macro pixman_composite_src_n_8_8_process_pixblock_head
-    umull       v0.8h, v24.8b, v16.8b
-    umull       v1.8h, v25.8b, v16.8b
-    umull       v2.8h, v26.8b, v16.8b
-    umull       v3.8h, v27.8b, v16.8b
-    ursra       v0.8h, v0.8h,  #8
-    ursra       v1.8h, v1.8h,  #8
-    ursra       v2.8h, v2.8h,  #8
-    ursra       v3.8h, v3.8h,  #8
-.endm
-
-.macro pixman_composite_src_n_8_8_process_pixblock_tail
-    rshrn       v28.8b, v0.8h, #8
-    rshrn       v29.8b, v1.8h, #8
-    rshrn       v30.8b, v2.8h, #8
-    rshrn       v31.8b, v3.8h, #8
-.endm
-
-.macro pixman_composite_src_n_8_8_process_pixblock_tail_head
-    fetch_mask_pixblock
-                                    PF add PF_X, PF_X, #8
-        rshrn       v28.8b, v0.8h, #8
-                                    PF tst PF_CTL, #0x0F
-        rshrn       v29.8b, v1.8h, #8
-                                    PF beq 10f
-                                    PF add PF_X, PF_X, #8
-10:
-        rshrn       v30.8b, v2.8h, #8
-                                    PF beq 10f
-                                    PF sub PF_CTL, PF_CTL, #1
-10:
-        rshrn       v31.8b, v3.8h, #8
-                                    PF cmp PF_X, ORIG_W
-    umull       v0.8h,  v24.8b, v16.8b
-                                    PF lsl DUMMY, PF_X, mask_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_MASK, DUMMY]
-    umull       v1.8h,  v25.8b, v16.8b
-                                    PF ble 10f
-                                    PF sub PF_X, PF_X, ORIG_W
-10:
-    umull       v2.8h,  v26.8b, v16.8b
-                                    PF ble 10f
-                                    PF subs PF_CTL, PF_CTL, #0x10
-10:
-    umull       v3.8h,  v27.8b, v16.8b
-                                    PF ble 10f
-                                    PF lsl DUMMY, MASK_STRIDE, #mask_bpp_shift
-                                    PF ldrsb DUMMY, [PF_MASK, DUMMY]
-                                    PF add PF_MASK, PF_MASK, #1
-10:
-        st1         {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-    ursra       v0.8h, v0.8h,  #8
-    ursra       v1.8h, v1.8h,  #8
-    ursra       v2.8h, v2.8h,  #8
-    ursra       v3.8h, v3.8h,  #8
-.endm
-
-.macro pixman_composite_src_n_8_8_init
-    mov         v16.s[0], w4
-    dup         v16.8b, v16.b[3]
-.endm
-
-.macro pixman_composite_src_n_8_8_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_src_n_8_8_asm_neon, 0, 8, 8, \
-    FLAG_DST_WRITEONLY, \
-    32, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_src_n_8_8_init, \
-    pixman_composite_src_n_8_8_cleanup, \
-    pixman_composite_src_n_8_8_process_pixblock_head, \
-    pixman_composite_src_n_8_8_process_pixblock_tail, \
-    pixman_composite_src_n_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_8_8888_process_pixblock_head
-    /* expecting deinterleaved source data in {v8, v9, v10, v11} */
-    /* v8 - blue, v9 - green, v10 - red, v11 - alpha */
-    /* and destination data in {v4, v5, v6, v7} */
-    /* mask is in v24 (v25, v26, v27 are unused) */
-
-    /* in */
-    umull       v12.8h, v24.8b, v8.8b
-    umull       v13.8h, v24.8b, v9.8b
-    umull       v14.8h, v24.8b, v10.8b
-    umull       v15.8h, v24.8b, v11.8b
-    urshr       v16.8h, v12.8h, #8
-    urshr       v17.8h, v13.8h, #8
-    urshr       v18.8h, v14.8h, #8
-    urshr       v19.8h, v15.8h, #8
-    raddhn      v0.8b, v12.8h, v16.8h
-    raddhn      v1.8b, v13.8h, v17.8h
-    raddhn      v2.8b, v14.8h, v18.8h
-    raddhn      v3.8b, v15.8h, v19.8h
-    mvn         v25.8b, v3.8b  /* get inverted alpha */
-    /* source:      v0 - blue, v1 - green, v2 - red, v3 - alpha */
-    /* destination: v4 - blue, v5 - green, v6 - red, v7 - alpha */
-    /* now do alpha blending */
-    umull       v12.8h, v25.8b, v4.8b
-    umull       v13.8h, v25.8b, v5.8b
-    umull       v14.8h, v25.8b, v6.8b
-    umull       v15.8h, v25.8b, v7.8b
-.endm
-
-.macro pixman_composite_over_n_8_8888_process_pixblock_tail
-    urshr       v16.8h, v12.8h, #8
-    urshr       v17.8h, v13.8h, #8
-    urshr       v18.8h, v14.8h, #8
-    urshr       v19.8h, v15.8h, #8
-    raddhn      v28.8b, v16.8h, v12.8h
-    raddhn      v29.8b, v17.8h, v13.8h
-    raddhn      v30.8b, v18.8h, v14.8h
-    raddhn      v31.8b, v19.8h, v15.8h
-    uqadd       v28.8b, v0.8b, v28.8b
-    uqadd       v29.8b, v1.8b, v29.8b
-    uqadd       v30.8b, v2.8b, v30.8b
-    uqadd       v31.8b, v3.8b, v31.8b
-.endm
-
-.macro pixman_composite_over_n_8_8888_process_pixblock_tail_head
-        urshr       v16.8h, v12.8h, #8
-     ld4        {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
-        urshr       v17.8h, v13.8h, #8
-    fetch_mask_pixblock
-        urshr       v18.8h, v14.8h, #8
-                                    PF add PF_X, PF_X, #8
-        urshr       v19.8h, v15.8h, #8
-                                    PF tst PF_CTL, #0x0F
-        raddhn      v28.8b, v16.8h, v12.8h
-                                    PF beq 10f
-                                    PF add PF_X, PF_X, #8
-10:
-        raddhn      v29.8b, v17.8h, v13.8h
-                                    PF beq 10f
-                                    PF sub PF_CTL, PF_CTL, #1
-10:
-        raddhn      v30.8b, v18.8h, v14.8h
-                                    PF cmp PF_X, ORIG_W
-        raddhn      v31.8b, v19.8h, v15.8h
-                                    PF lsl DUMMY, PF_X, #dst_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
-    umull       v16.8h, v24.8b, v8.8b
-                                    PF lsl DUMMY, PF_X, #mask_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_MASK, DUMMY]
-    umull       v17.8h, v24.8b, v9.8b
-                                    PF ble 10f
-                                    PF sub PF_X, PF_X, ORIG_W
-10:
-    umull       v18.8h, v24.8b, v10.8b
-                                    PF ble 10f
-                                    PF subs PF_CTL, PF_CTL, #0x10
-10:
-    umull       v19.8h, v24.8b, v11.8b
-                                    PF ble 10f
-                                    PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
-                                    PF ldrsb DUMMY, [PF_DST, DUMMY]
-                                    PF add PF_DST, PF_DST, #1
-10:
-        uqadd       v28.8b, v0.8b, v28.8b
-                                    PF ble 10f
-                                    PF lsl DUMMY, MASK_STRIDE, #mask_bpp_shift
-                                    PF ldrsb DUMMY, [PF_MASK, DUMMY]
-                                    PF add PF_MASK, PF_MASK, #1
-10:
-        uqadd        v29.8b, v1.8b, v29.8b
-        uqadd        v30.8b, v2.8b, v30.8b
-        uqadd        v31.8b, v3.8b, v31.8b
-    urshr       v12.8h, v16.8h, #8
-    urshr       v13.8h, v17.8h, #8
-    urshr       v14.8h, v18.8h, #8
-    urshr       v15.8h, v19.8h, #8
-    raddhn      v0.8b, v16.8h, v12.8h
-    raddhn      v1.8b, v17.8h, v13.8h
-    raddhn      v2.8b, v18.8h, v14.8h
-    raddhn      v3.8b, v19.8h, v15.8h
-        st4          {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-    mvn         v25.8b, v3.8b
-    umull       v12.8h, v25.8b, v4.8b
-    umull       v13.8h, v25.8b, v5.8b
-    umull       v14.8h, v25.8b, v6.8b
-    umull       v15.8h, v25.8b, v7.8b
-.endm
-
-.macro pixman_composite_over_n_8_8888_init
-    mov         v11.s[0], w4
-    dup         v8.8b, v11.b[0]
-    dup         v9.8b, v11.b[1]
-    dup         v10.8b, v11.b[2]
-    dup         v11.8b, v11.b[3]
-.endm
-
-.macro pixman_composite_over_n_8_8888_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_over_n_8_8888_asm_neon, 0, 8, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_n_8_8888_init, \
-    pixman_composite_over_n_8_8888_cleanup, \
-    pixman_composite_over_n_8_8888_process_pixblock_head, \
-    pixman_composite_over_n_8_8888_process_pixblock_tail, \
-    pixman_composite_over_n_8_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_8_8_process_pixblock_head
-    umull       v0.8h,  v24.8b, v8.8b
-    umull       v1.8h,  v25.8b, v8.8b
-    umull       v2.8h,  v26.8b, v8.8b
-    umull       v3.8h,  v27.8b, v8.8b
-    urshr       v10.8h, v0.8h,  #8
-    urshr       v11.8h, v1.8h,  #8
-    urshr       v12.8h, v2.8h,  #8
-    urshr       v13.8h, v3.8h,  #8
-    raddhn      v0.8b,  v0.8h,  v10.8h
-    raddhn      v1.8b,  v1.8h,  v11.8h
-    raddhn      v2.8b,  v2.8h,  v12.8h
-    raddhn      v3.8b,  v3.8h,  v13.8h
-    mvn         v24.8b, v0.8b
-    mvn         v25.8b, v1.8b
-    mvn         v26.8b, v2.8b
-    mvn         v27.8b, v3.8b
-    umull       v10.8h, v24.8b, v4.8b
-    umull       v11.8h, v25.8b, v5.8b
-    umull       v12.8h, v26.8b, v6.8b
-    umull       v13.8h, v27.8b, v7.8b
-.endm
-
-.macro pixman_composite_over_n_8_8_process_pixblock_tail
-    urshr       v14.8h, v10.8h,  #8
-    urshr       v15.8h, v11.8h,  #8
-    urshr       v16.8h, v12.8h, #8
-    urshr       v17.8h, v13.8h, #8
-    raddhn      v28.8b, v14.8h, v10.8h
-    raddhn      v29.8b, v15.8h, v11.8h
-    raddhn      v30.8b, v16.8h, v12.8h
-    raddhn      v31.8b, v17.8h, v13.8h
-    uqadd       v28.8b, v0.8b,  v28.8b
-    uqadd       v29.8b, v1.8b,  v29.8b
-    uqadd       v30.8b, v2.8b,  v30.8b
-    uqadd       v31.8b, v3.8b,  v31.8b
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_n_8_8_process_pixblock_tail_head
-    ld1         {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
-    pixman_composite_over_n_8_8_process_pixblock_tail
-    fetch_mask_pixblock
-    cache_preload 32, 32
-    st1         {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-    pixman_composite_over_n_8_8_process_pixblock_head
-.endm
-
-.macro pixman_composite_over_n_8_8_init
-    mov         v8.s[0], w4
-    dup         v8.8b, v8.b[3]
-.endm
-
-.macro pixman_composite_over_n_8_8_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \
-    FLAG_DST_READWRITE, \
-    32, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_n_8_8_init, \
-    pixman_composite_over_n_8_8_cleanup, \
-    pixman_composite_over_n_8_8_process_pixblock_head, \
-    pixman_composite_over_n_8_8_process_pixblock_tail, \
-    pixman_composite_over_n_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head
-    /*
-     * 'combine_mask_ca' replacement
-     *
-     * input:  solid src (n) in {v8,  v9,  v10, v11}
-     *         dest in          {v4,  v5,  v6,  v7 }
-     *         mask in          {v24, v25, v26, v27}
-     * output: updated src in   {v0,  v1,  v2,  v3 }
-     *         updated mask in  {v24, v25, v26, v3 }
-     */
-    umull       v0.8h,  v24.8b, v8.8b
-    umull       v1.8h,  v25.8b, v9.8b
-    umull       v2.8h,  v26.8b, v10.8b
-    umull       v3.8h,  v27.8b, v11.8b
-    umull       v12.8h, v11.8b, v25.8b
-    umull       v13.8h, v11.8b, v24.8b
-    umull       v14.8h, v11.8b, v26.8b
-    urshr       v15.8h, v0.8h,  #8
-    urshr       v16.8h, v1.8h,  #8
-    urshr       v17.8h, v2.8h,  #8
-    raddhn      v0.8b,  v0.8h,  v15.8h
-    raddhn      v1.8b,  v1.8h,  v16.8h
-    raddhn      v2.8b,  v2.8h,  v17.8h
-    urshr       v15.8h, v13.8h, #8
-    urshr       v16.8h, v12.8h, #8
-    urshr       v17.8h, v14.8h, #8
-    urshr       v18.8h, v3.8h,  #8
-    raddhn      v24.8b, v13.8h, v15.8h
-    raddhn      v25.8b, v12.8h, v16.8h
-    raddhn      v26.8b, v14.8h, v17.8h
-    raddhn      v3.8b,  v3.8h,  v18.8h
-    /*
-     * 'combine_over_ca' replacement
-     *
-     * output: updated dest in {v28, v29, v30, v31}
-     */
-    mvn         v24.8b, v24.8b
-    mvn         v25.8b, v25.8b
-    mvn         v26.8b, v26.8b
-    mvn         v27.8b, v3.8b
-    umull       v12.8h, v24.8b, v4.8b
-    umull       v13.8h, v25.8b, v5.8b
-    umull       v14.8h, v26.8b, v6.8b
-    umull       v15.8h, v27.8b, v7.8b
-.endm
-
-.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail
-    /* ... continue 'combine_over_ca' replacement */
-    urshr       v16.8h, v12.8h, #8
-    urshr       v17.8h, v13.8h, #8
-    urshr       v18.8h, v14.8h, #8
-    urshr       v19.8h, v15.8h, #8
-    raddhn      v28.8b, v16.8h, v12.8h
-    raddhn      v29.8b, v17.8h, v13.8h
-    raddhn      v30.8b, v18.8h, v14.8h
-    raddhn      v31.8b, v19.8h, v15.8h
-    uqadd       v28.8b, v0.8b,  v28.8b
-    uqadd       v29.8b, v1.8b,  v29.8b
-    uqadd       v30.8b, v2.8b,  v30.8b
-    uqadd       v31.8b, v3.8b,  v31.8b
-.endm
-
-.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head
-        urshr       v16.8h, v12.8h, #8
-        urshr       v17.8h, v13.8h, #8
-    ld4         {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
-        urshr       v18.8h, v14.8h, #8
-        urshr       v19.8h, v15.8h, #8
-        raddhn      v28.8b, v16.8h, v12.8h
-        raddhn      v29.8b, v17.8h, v13.8h
-        raddhn      v30.8b, v18.8h, v14.8h
-        raddhn      v31.8b, v19.8h, v15.8h
-    fetch_mask_pixblock
-        uqadd       v28.8b, v0.8b, v28.8b
-        uqadd       v29.8b, v1.8b, v29.8b
-        uqadd       v30.8b, v2.8b, v30.8b
-        uqadd       v31.8b, v3.8b, v31.8b
-    cache_preload 8, 8
-    pixman_composite_over_n_8888_8888_ca_process_pixblock_head
-    st4         {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-.endm
-
-.macro pixman_composite_over_n_8888_8888_ca_init
-    mov         v13.s[0], w4
-    dup         v8.8b, v13.b[0]
-    dup         v9.8b, v13.b[1]
-    dup         v10.8b, v13.b[2]
-    dup         v11.8b, v13.b[3]
-.endm
-
-.macro pixman_composite_over_n_8888_8888_ca_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_over_n_8888_8888_ca_asm_neon, 0, 32, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_n_8888_8888_ca_init, \
-    pixman_composite_over_n_8888_8888_ca_cleanup, \
-    pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \
-    pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \
-    pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_head
-    /*
-     * 'combine_mask_ca' replacement
-     *
-     * input:  solid src (n) in {v8,  v9,  v10, v11}  [B, G, R, A]
-     *         mask in          {v24, v25, v26}       [B, G, R]
-     * output: updated src in   {v0,  v1,  v2 }       [B, G, R]
-     *         updated mask in  {v24, v25, v26}       [B, G, R]
-     */
-    umull       v0.8h,  v24.8b, v8.8b
-    umull       v1.8h,  v25.8b, v9.8b
-    umull       v2.8h,  v26.8b, v10.8b
-    umull       v12.8h, v11.8b, v24.8b
-    umull       v13.8h, v11.8b, v25.8b
-    umull       v14.8h, v11.8b, v26.8b
-    urshr       v15.8h, v0.8h,  #8
-    urshr       v16.8h, v1.8h,  #8
-    urshr       v17.8h, v2.8h,  #8
-    raddhn      v0.8b,  v0.8h,  v15.8h
-    raddhn      v1.8b,  v1.8h,  v16.8h
-    raddhn      v2.8b,  v2.8h,  v17.8h
-    urshr       v19.8h, v12.8h, #8
-    urshr       v20.8h, v13.8h, #8
-    urshr       v21.8h, v14.8h, #8
-    raddhn      v24.8b, v12.8h, v19.8h
-    raddhn      v25.8b, v13.8h, v20.8h
-    /*
-     * convert 8 r5g6b5 pixel data from {v4} to planar 8-bit format
-     * and put data into v16 - blue, v17 - green, v18 - red
-     */
-       mov         v4.d[1], v5.d[0]
-       shrn        v17.8b, v4.8h,  #3
-       shrn        v18.8b, v4.8h,  #8
-    raddhn      v26.8b, v14.8h, v21.8h
-       sli         v4.8h,  v4.8h,  #5
-       sri         v18.8b, v18.8b, #5
-       sri         v17.8b, v17.8b, #6
-    /*
-     * 'combine_over_ca' replacement
-     *
-     * output: updated dest in v16 - blue, v17 - green, v18 - red
-     */
-    mvn         v24.8b, v24.8b
-    mvn         v25.8b, v25.8b
-       shrn       v16.8b, v4.8h,  #2
-    mvn         v26.8b, v26.8b
-    umull       v5.8h, v16.8b, v24.8b
-    umull       v6.8h, v17.8b, v25.8b
-    umull       v7.8h, v18.8b, v26.8b
-.endm
-
-.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail
-    /* ... continue 'combine_over_ca' replacement */
-    urshr       v13.8h, v5.8h, #8
-    urshr       v14.8h, v6.8h, #8
-    urshr       v15.8h, v7.8h, #8
-    raddhn      v16.8b, v13.8h, v5.8h
-    raddhn      v17.8b, v14.8h, v6.8h
-    raddhn      v18.8b, v15.8h, v7.8h
-    uqadd       v16.8b, v0.8b, v16.8b
-    uqadd       v17.8b, v1.8b, v17.8b
-    uqadd       v18.8b, v2.8b, v18.8b
-    /*
-     * convert the results in v16, v17, v18 to r5g6b5 and store
-     * them into {v14}
-     */
-    ushll       v14.8h, v18.8b, #7
-    sli         v14.8h, v14.8h, #1
-    ushll       v12.8h, v17.8b, #7
-    sli         v12.8h, v12.8h, #1
-    ushll       v13.8h, v16.8b, #7
-    sli         v13.8h, v13.8h, #1
-    sri         v14.8h, v12.8h, #5
-    sri         v14.8h, v13.8h, #11
-    mov         v28.d[0], v14.d[0]
-    mov         v29.d[0], v14.d[1]
-.endm
-
-.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head
-    fetch_mask_pixblock
-        urshr       v13.8h, v5.8h, #8
-        urshr       v14.8h, v6.8h, #8
-    ld1         {v4.8h}, [DST_R], #16
-        urshr       v15.8h, v7.8h, #8
-        raddhn      v16.8b, v13.8h, v5.8h
-        raddhn      v17.8b, v14.8h, v6.8h
-        raddhn      v18.8b, v15.8h, v7.8h
-    mov         v5.d[0], v4.d[1]
-            /* process_pixblock_head */
-            /*
-             * 'combine_mask_ca' replacement
-             *
-             * input:  solid src (n) in {v8,  v9,  v10, v11}  [B, G, R, A]
-             *         mask in          {v24, v25, v26}       [B, G, R]
-             * output: updated src in   {v0,  v1,  v2 }       [B, G, R]
-             *         updated mask in  {v24, v25, v26}       [B, G, R]
-             */
-        uqadd       v16.8b, v0.8b, v16.8b
-        uqadd       v17.8b, v1.8b, v17.8b
-        uqadd       v18.8b, v2.8b, v18.8b
-            umull       v0.8h,  v24.8b, v8.8b
-            umull       v1.8h,  v25.8b, v9.8b
-            umull       v2.8h,  v26.8b, v10.8b
-        /*
-         * convert the result in v16, v17, v18 to r5g6b5 and store
-         * it into {v14}
-         */
-        ushll       v14.8h, v18.8b, #7
-        sli         v14.8h, v14.8h, #1
-        ushll       v18.8h, v16.8b, #7
-        sli         v18.8h, v18.8h, #1
-        ushll       v19.8h, v17.8b, #7
-        sli         v19.8h, v19.8h, #1
-            umull       v12.8h, v11.8b, v24.8b
-        sri         v14.8h, v19.8h, #5
-            umull       v13.8h, v11.8b, v25.8b
-            umull       v15.8h, v11.8b, v26.8b
-        sri         v14.8h, v18.8h, #11
-        mov         v28.d[0], v14.d[0]
-        mov         v29.d[0], v14.d[1]
-    cache_preload 8, 8
-            urshr       v16.8h, v0.8h,  #8
-            urshr       v17.8h, v1.8h,  #8
-            urshr       v18.8h, v2.8h,  #8
-            raddhn      v0.8b,  v0.8h,  v16.8h
-            raddhn      v1.8b,  v1.8h,  v17.8h
-            raddhn      v2.8b,  v2.8h,  v18.8h
-            urshr       v19.8h, v12.8h, #8
-            urshr       v20.8h, v13.8h, #8
-            urshr       v21.8h, v15.8h, #8
-            raddhn      v24.8b, v12.8h, v19.8h
-            raddhn      v25.8b, v13.8h, v20.8h
-                /*
-                 * convert 8 r5g6b5 pixel data from {v4, v5} to planar
-             * 8-bit format and put data into v16 - blue, v17 - green,
-             * v18 - red
-                 */
-		mov         v4.d[1], v5.d[0]
-                shrn        v17.8b, v4.8h,  #3
-                shrn        v18.8b, v4.8h,  #8
-            raddhn      v26.8b, v15.8h, v21.8h
-                sli         v4.8h,  v4.8h,  #5
-                sri         v17.8b, v17.8b, #6
-                sri         v18.8b, v18.8b, #5
-            /*
-             * 'combine_over_ca' replacement
-             *
-             * output: updated dest in v16 - blue, v17 - green, v18 - red
-             */
-            mvn         v24.8b, v24.8b
-            mvn         v25.8b, v25.8b
-                shrn        v16.8b, v4.8h,  #2
-            mvn         v26.8b, v26.8b
-            umull       v5.8h, v16.8b, v24.8b
-            umull       v6.8h, v17.8b, v25.8b
-            umull       v7.8h, v18.8b, v26.8b
-    st1         {v14.8h}, [DST_W], #16
-.endm
-
-.macro pixman_composite_over_n_8888_0565_ca_init
-    mov         v13.s[0], w4
-    dup         v8.8b, v13.b[0]
-    dup         v9.8b, v13.b[1]
-    dup         v10.8b, v13.b[2]
-    dup         v11.8b, v13.b[3]
-.endm
-
-.macro pixman_composite_over_n_8888_0565_ca_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_over_n_8888_0565_ca_asm_neon, 0, 32, 16, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_n_8888_0565_ca_init, \
-    pixman_composite_over_n_8888_0565_ca_cleanup, \
-    pixman_composite_over_n_8888_0565_ca_process_pixblock_head, \
-    pixman_composite_over_n_8888_0565_ca_process_pixblock_tail, \
-    pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_in_n_8_process_pixblock_head
-    /* expecting source data in {v0, v1, v2, v3} */
-    /* and destination data in {v4, v5, v6, v7} */
-    umull       v8.8h,  v4.8b,  v3.8b
-    umull       v9.8h,  v5.8b,  v3.8b
-    umull       v10.8h, v6.8b,  v3.8b
-    umull       v11.8h, v7.8b,  v3.8b
-.endm
-
-.macro pixman_composite_in_n_8_process_pixblock_tail
-    urshr       v14.8h, v8.8h,  #8
-    urshr       v15.8h, v9.8h,  #8
-    urshr       v12.8h, v10.8h, #8
-    urshr       v13.8h, v11.8h, #8
-    raddhn      v28.8b, v8.8h,  v14.8h
-    raddhn      v29.8b, v9.8h,  v15.8h
-    raddhn      v30.8b, v10.8h, v12.8h
-    raddhn      v31.8b, v11.8h, v13.8h
-.endm
-
-.macro pixman_composite_in_n_8_process_pixblock_tail_head
-    pixman_composite_in_n_8_process_pixblock_tail
-    ld1         {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
-    cache_preload 32, 32
-    pixman_composite_in_n_8_process_pixblock_head
-    st1         {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-.endm
-
-.macro pixman_composite_in_n_8_init
-    mov         v3.s[0], w4
-    dup         v3.8b, v3.b[3]
-.endm
-
-.macro pixman_composite_in_n_8_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_in_n_8_asm_neon, 0, 0, 8, \
-    FLAG_DST_READWRITE, \
-    32, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_in_n_8_init, \
-    pixman_composite_in_n_8_cleanup, \
-    pixman_composite_in_n_8_process_pixblock_head, \
-    pixman_composite_in_n_8_process_pixblock_tail, \
-    pixman_composite_in_n_8_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-.macro pixman_composite_add_n_8_8_process_pixblock_head
-    /* expecting source data in {v8, v9, v10, v11} */
-    /* v8 - blue, v9 - green, v10 - red, v11 - alpha */
-    /* and destination data in {v4, v5, v6, v7} */
-    /* mask is in v24, v25, v26, v27 */
-    umull       v0.8h, v24.8b, v11.8b
-    umull       v1.8h, v25.8b, v11.8b
-    umull       v2.8h, v26.8b, v11.8b
-    umull       v3.8h, v27.8b, v11.8b
-    urshr       v12.8h, v0.8h, #8
-    urshr       v13.8h, v1.8h, #8
-    urshr       v14.8h, v2.8h, #8
-    urshr       v15.8h, v3.8h, #8
-    raddhn      v0.8b, v0.8h, v12.8h
-    raddhn      v1.8b, v1.8h, v13.8h
-    raddhn      v2.8b, v2.8h, v14.8h
-    raddhn      v3.8b, v3.8h, v15.8h
-    uqadd       v28.8b, v0.8b, v4.8b
-    uqadd       v29.8b, v1.8b, v5.8b
-    uqadd       v30.8b, v2.8b, v6.8b
-    uqadd       v31.8b, v3.8b, v7.8b
-.endm
-
-.macro pixman_composite_add_n_8_8_process_pixblock_tail
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_add_n_8_8_process_pixblock_tail_head
-    pixman_composite_add_n_8_8_process_pixblock_tail
-    st1         {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-    ld1         {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
-    fetch_mask_pixblock
-    cache_preload 32, 32
-    pixman_composite_add_n_8_8_process_pixblock_head
-.endm
-
-.macro pixman_composite_add_n_8_8_init
-    mov         v11.s[0], w4
-    dup         v11.8b, v11.b[3]
-.endm
-
-.macro pixman_composite_add_n_8_8_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_add_n_8_8_asm_neon, 0, 8, 8, \
-    FLAG_DST_READWRITE, \
-    32, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_add_n_8_8_init, \
-    pixman_composite_add_n_8_8_cleanup, \
-    pixman_composite_add_n_8_8_process_pixblock_head, \
-    pixman_composite_add_n_8_8_process_pixblock_tail, \
-    pixman_composite_add_n_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8_8_8_process_pixblock_head
-    /* expecting source data in {v0, v1, v2, v3} */
-    /* destination data in {v4, v5, v6, v7} */
-    /* mask in {v24, v25, v26, v27} */
-    umull       v8.8h, v24.8b, v0.8b
-    umull       v9.8h, v25.8b, v1.8b
-    umull       v10.8h, v26.8b, v2.8b
-    umull       v11.8h, v27.8b, v3.8b
-    urshr       v0.8h, v8.8h, #8
-    urshr       v1.8h, v9.8h, #8
-    urshr       v12.8h, v10.8h, #8
-    urshr       v13.8h, v11.8h, #8
-    raddhn      v0.8b, v0.8h, v8.8h
-    raddhn      v1.8b, v1.8h, v9.8h
-    raddhn      v2.8b, v12.8h, v10.8h
-    raddhn      v3.8b, v13.8h, v11.8h
-    uqadd       v28.8b, v0.8b, v4.8b
-    uqadd       v29.8b, v1.8b, v5.8b
-    uqadd       v30.8b, v2.8b, v6.8b
-    uqadd       v31.8b, v3.8b, v7.8b
-.endm
-
-.macro pixman_composite_add_8_8_8_process_pixblock_tail
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_add_8_8_8_process_pixblock_tail_head
-    pixman_composite_add_8_8_8_process_pixblock_tail
-    st1         {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-    ld1         {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
-    fetch_mask_pixblock
-    fetch_src_pixblock
-    cache_preload 32, 32
-    pixman_composite_add_8_8_8_process_pixblock_head
-.endm
-
-.macro pixman_composite_add_8_8_8_init
-.endm
-
-.macro pixman_composite_add_8_8_8_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_add_8_8_8_asm_neon, 8, 8, 8, \
-    FLAG_DST_READWRITE, \
-    32, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_add_8_8_8_init, \
-    pixman_composite_add_8_8_8_cleanup, \
-    pixman_composite_add_8_8_8_process_pixblock_head, \
-    pixman_composite_add_8_8_8_process_pixblock_tail, \
-    pixman_composite_add_8_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8888_8888_8888_process_pixblock_head
-    /* expecting source data in {v0, v1, v2, v3} */
-    /* destination data in {v4, v5, v6, v7} */
-    /* mask in {v24, v25, v26, v27} */
-    umull       v8.8h,  v27.8b, v0.8b
-    umull       v9.8h,  v27.8b, v1.8b
-    umull       v10.8h, v27.8b, v2.8b
-    umull       v11.8h, v27.8b, v3.8b
-    /* 1 cycle bubble */
-    ursra       v8.8h,  v8.8h,  #8
-    ursra       v9.8h,  v9.8h,  #8
-    ursra       v10.8h, v10.8h, #8
-    ursra       v11.8h, v11.8h, #8
-.endm
-
-.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail
-    /* 2 cycle bubble */
-    rshrn       v28.8b, v8.8h,  #8
-    rshrn       v29.8b, v9.8h,  #8
-    rshrn       v30.8b, v10.8h, #8
-    rshrn       v31.8b, v11.8h, #8
-    uqadd       v28.8b, v4.8b,  v28.8b
-    uqadd       v29.8b, v5.8b,  v29.8b
-    uqadd       v30.8b, v6.8b,  v30.8b
-    uqadd       v31.8b, v7.8b,  v31.8b
-.endm
-
-.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
-    fetch_src_pixblock
-        rshrn       v28.8b, v8.8h,  #8
-    fetch_mask_pixblock
-        rshrn       v29.8b, v9.8h,  #8
-    umull       v8.8h,  v27.8b, v0.8b
-        rshrn       v30.8b, v10.8h, #8
-    umull       v9.8h,  v27.8b, v1.8b
-        rshrn       v31.8b, v11.8h, #8
-    umull       v10.8h, v27.8b, v2.8b
-    umull       v11.8h, v27.8b, v3.8b
-        uqadd       v28.8b, v4.8b,  v28.8b
-        uqadd       v29.8b, v5.8b,  v29.8b
-        uqadd       v30.8b, v6.8b,  v30.8b
-        uqadd       v31.8b, v7.8b,  v31.8b
-    ursra       v8.8h,  v8.8h,  #8
-    ld4         {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
-    ursra       v9.8h,  v9.8h,  #8
-        st4         {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-    ursra       v10.8h, v10.8h, #8
-
-    cache_preload 8, 8
-
-    ursra       v11.8h, v11.8h, #8
-.endm
-
-generate_composite_function \
-    pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-generate_composite_function_single_scanline \
-    pixman_composite_scanline_add_mask_asm_neon, 32, 32, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-/******************************************************************************/
-
-generate_composite_function \
-    pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    27  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_add_n_8_8888_init
-    mov         v3.s[0], w4
-    dup         v0.8b, v3.b[0]
-    dup         v1.8b, v3.b[1]
-    dup         v2.8b, v3.b[2]
-    dup         v3.8b, v3.b[3]
-.endm
-
-.macro pixman_composite_add_n_8_8888_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_add_n_8_8888_init, \
-    pixman_composite_add_n_8_8888_cleanup, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    27  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8888_n_8888_init
-    mov         v27.s[0], w6
-    dup         v27.8b, v27.b[3]
-.endm
-
-.macro pixman_composite_add_8888_n_8888_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_add_8888_n_8888_init, \
-    pixman_composite_add_8888_n_8888_cleanup, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
-    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    27  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
-    /* expecting source data in {v0, v1, v2, v3} */
-    /* destination data in {v4, v5, v6, v7} */
-    /* solid mask is in v15 */
-
-    /* 'in' */
-    umull       v11.8h, v15.8b, v3.8b
-    umull       v10.8h, v15.8b, v2.8b
-    umull       v9.8h,  v15.8b, v1.8b
-    umull       v8.8h,  v15.8b, v0.8b
-    urshr       v16.8h, v11.8h, #8
-    urshr       v14.8h, v10.8h, #8
-    urshr       v13.8h,  v9.8h, #8
-    urshr       v12.8h,  v8.8h, #8
-    raddhn      v3.8b, v11.8h, v16.8h
-    raddhn      v2.8b, v10.8h, v14.8h
-    raddhn      v1.8b,  v9.8h, v13.8h
-    raddhn      v0.8b,  v8.8h, v12.8h
-    mvn         v24.8b, v3.8b  /* get inverted alpha */
-    /* now do alpha blending */
-    umull       v8.8h, v24.8b, v4.8b
-    umull       v9.8h, v24.8b, v5.8b
-    umull       v10.8h, v24.8b, v6.8b
-    umull       v11.8h, v24.8b, v7.8b
-.endm
-
-.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
-    urshr       v16.8h, v8.8h, #8
-    urshr       v17.8h, v9.8h, #8
-    urshr       v18.8h, v10.8h, #8
-    urshr       v19.8h, v11.8h, #8
-    raddhn      v28.8b, v16.8h, v8.8h
-    raddhn      v29.8b, v17.8h, v9.8h
-    raddhn      v30.8b, v18.8h, v10.8h
-    raddhn      v31.8b, v19.8h, v11.8h
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head
-    ld4        {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
-    pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
-    fetch_src_pixblock
-    cache_preload 8, 8
-    fetch_mask_pixblock
-    pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
-    st4        {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-.endm
-
-generate_composite_function_single_scanline \
-    pixman_composite_scanline_out_reverse_mask_asm_neon, 32, 32, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_out_reverse_8888_n_8888_process_pixblock_head, \
-    pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail, \
-    pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    12  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_8888_n_8888_process_pixblock_head
-    pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_over_8888_n_8888_process_pixblock_tail
-    pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
-    uqadd       v28.8b, v0.8b, v28.8b
-    uqadd       v29.8b, v1.8b, v29.8b
-    uqadd       v30.8b, v2.8b, v30.8b
-    uqadd       v31.8b, v3.8b, v31.8b
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_8888_n_8888_process_pixblock_tail_head
-    ld4        {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
-    pixman_composite_over_8888_n_8888_process_pixblock_tail
-    fetch_src_pixblock
-    cache_preload 8, 8
-    pixman_composite_over_8888_n_8888_process_pixblock_head
-    st4        {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-.endm
-
-.macro pixman_composite_over_8888_n_8888_init
-    mov         v15.s[0], w6
-    dup         v15.8b, v15.b[3]
-.endm
-
-.macro pixman_composite_over_8888_n_8888_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_over_8888_n_8888_asm_neon, 32, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_8888_n_8888_init, \
-    pixman_composite_over_8888_n_8888_cleanup, \
-    pixman_composite_over_8888_n_8888_process_pixblock_head, \
-    pixman_composite_over_8888_n_8888_process_pixblock_tail, \
-    pixman_composite_over_8888_n_8888_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    12  /* mask_basereg  */
-
-/******************************************************************************/
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head
-    ld4        {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
-    pixman_composite_over_8888_n_8888_process_pixblock_tail
-    fetch_src_pixblock
-    cache_preload 8, 8
-    fetch_mask_pixblock
-    pixman_composite_over_8888_n_8888_process_pixblock_head
-    st4        {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-.endm
-
-generate_composite_function \
-    pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_over_8888_n_8888_process_pixblock_head, \
-    pixman_composite_over_8888_n_8888_process_pixblock_tail, \
-    pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    12  /* mask_basereg  */
-
-generate_composite_function_single_scanline \
-    pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_over_8888_n_8888_process_pixblock_head, \
-    pixman_composite_over_8888_n_8888_process_pixblock_tail, \
-    pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    12  /* mask_basereg  */
-
-/******************************************************************************/
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head
-    ld4        {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
-    pixman_composite_over_8888_n_8888_process_pixblock_tail
-    fetch_src_pixblock
-    cache_preload 8, 8
-    fetch_mask_pixblock
-    pixman_composite_over_8888_n_8888_process_pixblock_head
-    st4        {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-.endm
-
-generate_composite_function \
-    pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_over_8888_n_8888_process_pixblock_head, \
-    pixman_composite_over_8888_n_8888_process_pixblock_tail, \
-    pixman_composite_over_8888_8_8888_process_pixblock_tail_head \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    15  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0888_0888_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_0888_0888_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_0888_0888_process_pixblock_tail_head
-    st3     {v0.8b, v1.8b, v2.8b}, [DST_W], #24
-    fetch_src_pixblock
-    cache_preload 8, 8
-.endm
-
-generate_composite_function \
-    pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \
-    FLAG_DST_WRITEONLY, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_0888_0888_process_pixblock_head, \
-    pixman_composite_src_0888_0888_process_pixblock_tail, \
-    pixman_composite_src_0888_0888_process_pixblock_tail_head, \
-    0, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0888_8888_rev_process_pixblock_head
-    mov    v31.8b, v2.8b
-    mov    v2.8b, v0.8b
-    mov    v0.8b, v31.8b
-.endm
-
-.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head
-    st4    {v0.8b, v1.8b, v2.8b, v3.8b}, [DST_W], #32
-    fetch_src_pixblock
-    mov    v31.8b, v2.8b
-    mov    v2.8b, v0.8b
-    mov    v0.8b, v31.8b
-    cache_preload 8, 8
-.endm
-
-.macro pixman_composite_src_0888_8888_rev_init
-    eor    v3.8b, v3.8b, v3.8b
-.endm
-
-generate_composite_function \
-    pixman_composite_src_0888_8888_rev_asm_neon, 24, 0, 32, \
-    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    pixman_composite_src_0888_8888_rev_init, \
-    default_cleanup, \
-    pixman_composite_src_0888_8888_rev_process_pixblock_head, \
-    pixman_composite_src_0888_8888_rev_process_pixblock_tail, \
-    pixman_composite_src_0888_8888_rev_process_pixblock_tail_head, \
-    0, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0888_0565_rev_process_pixblock_head
-    ushll       v8.8h, v1.8b, #7
-    sli         v8.8h, v8.8h, #1
-    ushll       v9.8h, v2.8b, #7
-    sli         v9.8h, v9.8h, #1
-.endm
-
-.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail
-    ushll       v14.8h, v0.8b, #7
-    sli         v14.8h, v14.8h, #1
-    sri         v14.8h, v8.8h, #5
-    sri         v14.8h, v9.8h, #11
-    mov         v28.d[0], v14.d[0]
-    mov         v29.d[0], v14.d[1]
-.endm
-
-.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head
-        ushll       v14.8h, v0.8b, #7
-        sli         v14.8h, v14.8h, #1
-    fetch_src_pixblock
-        sri         v14.8h, v8.8h, #5
-        sri         v14.8h, v9.8h, #11
-        mov         v28.d[0], v14.d[0]
-        mov         v29.d[0], v14.d[1]
-    ushll       v8.8h, v1.8b, #7
-    sli         v8.8h, v8.8h, #1
-        st1     {v14.8h}, [DST_W], #16
-    ushll       v9.8h, v2.8b, #7
-    sli         v9.8h, v9.8h, #1
-.endm
-
-generate_composite_function \
-    pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \
-    FLAG_DST_WRITEONLY, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_0888_0565_rev_process_pixblock_head, \
-    pixman_composite_src_0888_0565_rev_process_pixblock_tail, \
-    pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_pixbuf_8888_process_pixblock_head
-    umull       v8.8h, v3.8b, v0.8b
-    umull       v9.8h, v3.8b, v1.8b
-    umull       v10.8h, v3.8b, v2.8b
-.endm
-
-.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail
-    urshr       v11.8h, v8.8h, #8
-    mov         v30.8b, v31.8b
-    mov         v31.8b, v3.8b
-    mov         v3.8b, v30.8b
-    urshr       v12.8h, v9.8h, #8
-    urshr       v13.8h, v10.8h, #8
-    raddhn      v30.8b, v11.8h, v8.8h
-    raddhn      v29.8b, v12.8h, v9.8h
-    raddhn      v28.8b, v13.8h, v10.8h
-.endm
-
-.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head
-        urshr       v11.8h, v8.8h, #8
-        mov         v30.8b, v31.8b
-        mov         v31.8b, v3.8b
-        mov         v3.8b, v31.8b
-        urshr       v12.8h, v9.8h, #8
-        urshr       v13.8h, v10.8h, #8
-    fetch_src_pixblock
-        raddhn      v30.8b, v11.8h, v8.8h
-                                    PF add PF_X, PF_X, #8
-                                    PF tst PF_CTL, #0xF
-                                    PF beq 10f
-                                    PF add PF_X, PF_X, #8
-                                    PF sub PF_CTL, PF_CTL, #1
-10:
-        raddhn      v29.8b, v12.8h, v9.8h
-        raddhn      v28.8b, v13.8h, v10.8h
-    umull       v8.8h, v3.8b, v0.8b
-    umull       v9.8h, v3.8b, v1.8b
-    umull       v10.8h, v3.8b, v2.8b
-         st4    {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-                                    PF cmp PF_X, ORIG_W
-                                    PF lsl DUMMY, PF_X, src_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
-                                    PF ble 10f
-                                    PF sub PF_X, PF_X, ORIG_W
-                                    PF subs PF_CTL, PF_CTL, #0x10
-                                    PF ble 10f
-                                    PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
-                                    PF ldrsb DUMMY, [PF_SRC, DUMMY]
-                                    PF add PF_SRC, PF_SRC, #1
-10:
-.endm
-
-generate_composite_function \
-    pixman_composite_src_pixbuf_8888_asm_neon, 32, 0, 32, \
-    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_pixbuf_8888_process_pixblock_head, \
-    pixman_composite_src_pixbuf_8888_process_pixblock_tail, \
-    pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_rpixbuf_8888_process_pixblock_head
-    umull       v8.8h, v3.8b, v0.8b
-    umull       v9.8h, v3.8b, v1.8b
-    umull       v10.8h, v3.8b, v2.8b
-.endm
-
-.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail
-    urshr       v11.8h, v8.8h, #8
-    mov         v30.8b, v31.8b
-    mov         v31.8b, v3.8b
-    mov         v3.8b, v30.8b
-    urshr       v12.8h, v9.8h, #8
-    urshr       v13.8h, v10.8h, #8
-    raddhn      v28.8b, v11.8h, v8.8h
-    raddhn      v29.8b, v12.8h, v9.8h
-    raddhn      v30.8b, v13.8h, v10.8h
-.endm
-
-.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head
-        urshr       v11.8h, v8.8h, #8
-        mov         v30.8b, v31.8b
-        mov         v31.8b, v3.8b
-        mov         v3.8b, v30.8b
-        urshr       v12.8h, v9.8h, #8
-        urshr       v13.8h, v10.8h, #8
-    fetch_src_pixblock
-        raddhn      v28.8b, v11.8h, v8.8h
-                                    PF add PF_X, PF_X, #8
-                                    PF tst PF_CTL, #0xF
-                                    PF beq 10f
-                                    PF add PF_X, PF_X, #8
-                                    PF sub PF_CTL, PF_CTL, #1
-10:
-        raddhn      v29.8b, v12.8h, v9.8h
-        raddhn      v30.8b, v13.8h, v10.8h
-    umull       v8.8h, v3.8b, v0.8b
-    umull       v9.8h, v3.8b, v1.8b
-    umull       v10.8h, v3.8b, v2.8b
-         st4    {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-                                    PF cmp PF_X, ORIG_W
-                                    PF lsl DUMMY, PF_X, src_bpp_shift
-                                    PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
-                                    PF ble 10f
-                                    PF sub PF_X, PF_X, ORIG_W
-                                    PF subs PF_CTL, PF_CTL, #0x10
-                                    PF ble 10f
-                                    PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
-                                    PF ldrsb DUMMY, [PF_SRC, DUMMY]
-                                    PF add PF_SRC, PF_SRC, #1
-10:
-.endm
-
-generate_composite_function \
-    pixman_composite_src_rpixbuf_8888_asm_neon, 32, 0, 32, \
-    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    10, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_rpixbuf_8888_process_pixblock_head, \
-    pixman_composite_src_rpixbuf_8888_process_pixblock_tail, \
-    pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    0, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_0565_8_0565_process_pixblock_head
-    /* mask is in v15 */
-    mov         v4.d[0], v8.d[0]
-    mov         v4.d[1], v9.d[0]
-    mov         v13.d[0], v10.d[0]
-    mov         v13.d[1], v11.d[0]
-    convert_0565_to_x888 v4, v2, v1, v0
-    convert_0565_to_x888 v13, v6, v5, v4
-    /* source pixel data is in      {v0, v1, v2, XX} */
-    /* destination pixel data is in {v4, v5, v6, XX} */
-    mvn         v7.8b,  v15.8b
-    umull       v10.8h, v15.8b, v2.8b
-    umull       v9.8h,  v15.8b, v1.8b
-    umull       v8.8h,  v15.8b, v0.8b
-    umull       v11.8h, v7.8b,  v4.8b
-    umull       v12.8h, v7.8b,  v5.8b
-    umull       v13.8h, v7.8b,  v6.8b
-    urshr       v19.8h, v10.8h, #8
-    urshr       v18.8h, v9.8h,  #8
-    urshr       v17.8h, v8.8h,  #8
-    raddhn      v2.8b,  v10.8h, v19.8h
-    raddhn      v1.8b,  v9.8h,  v18.8h
-    raddhn      v0.8b,  v8.8h,  v17.8h
-.endm
-
-.macro pixman_composite_over_0565_8_0565_process_pixblock_tail
-    urshr       v17.8h, v11.8h,  #8
-    urshr       v18.8h, v12.8h,  #8
-    urshr       v19.8h, v13.8h,  #8
-    raddhn      v28.8b, v17.8h, v11.8h
-    raddhn      v29.8b, v18.8h, v12.8h
-    raddhn      v30.8b, v19.8h, v13.8h
-    uqadd       v0.8b,  v0.8b,  v28.8b
-    uqadd       v1.8b,  v1.8b,  v29.8b
-    uqadd       v2.8b,  v2.8b,  v30.8b
-    /* 32bpp result is in {v0, v1, v2, XX} */
-    convert_8888_to_0565 v2, v1, v0, v14, v30, v13
-    mov         v28.d[0], v14.d[0]
-    mov         v29.d[0], v14.d[1]
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head
-    fetch_mask_pixblock
-    pixman_composite_over_0565_8_0565_process_pixblock_tail
-    fetch_src_pixblock
-    ld1        {v10.4h, v11.4h}, [DST_R], #16
-    cache_preload 8, 8
-    pixman_composite_over_0565_8_0565_process_pixblock_head
-    st1        {v14.8h}, [DST_W], #16
-.endm
-
-generate_composite_function \
-    pixman_composite_over_0565_8_0565_asm_neon, 16, 8, 16, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_over_0565_8_0565_process_pixblock_head, \
-    pixman_composite_over_0565_8_0565_process_pixblock_tail, \
-    pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    10,  /* dst_r_basereg */ \
-    8,  /* src_basereg   */ \
-    15  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_0565_n_0565_init
-    mov         v15.s[0], w6
-    dup         v15.8b, v15.b[3]
-.endm
-
-.macro pixman_composite_over_0565_n_0565_cleanup
-.endm
-
-generate_composite_function \
-    pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    pixman_composite_over_0565_n_0565_init, \
-    pixman_composite_over_0565_n_0565_cleanup, \
-    pixman_composite_over_0565_8_0565_process_pixblock_head, \
-    pixman_composite_over_0565_8_0565_process_pixblock_tail, \
-    pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    10,  /* dst_r_basereg */ \
-    8,  /* src_basereg   */ \
-    15  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_add_0565_8_0565_process_pixblock_head
-    /* mask is in v15 */
-    mov         v4.d[0], v8.d[0]
-    mov         v4.d[1], v9.d[0]
-    mov         v13.d[0], v10.d[0]
-    mov         v13.d[1], v11.d[0]
-    convert_0565_to_x888 v4,  v2, v1, v0
-    convert_0565_to_x888 v13, v6, v5, v4
-    /* source pixel data is in      {v0, v1, v2, XX} */
-    /* destination pixel data is in {v4, v5, v6, XX} */
-    umull       v9.8h,  v15.8b, v2.8b
-    umull       v8.8h,  v15.8b, v1.8b
-    umull       v7.8h,  v15.8b, v0.8b
-    urshr       v12.8h, v9.8h,  #8
-    urshr       v11.8h, v8.8h,  #8
-    urshr       v10.8h, v7.8h,  #8
-    raddhn      v2.8b,  v9.8h,  v12.8h
-    raddhn      v1.8b,  v8.8h,  v11.8h
-    raddhn      v0.8b,  v7.8h,  v10.8h
-.endm
-
-.macro pixman_composite_add_0565_8_0565_process_pixblock_tail
-    uqadd       v0.8b,  v0.8b,  v4.8b
-    uqadd       v1.8b,  v1.8b,  v5.8b
-    uqadd       v2.8b,  v2.8b,  v6.8b
-    /* 32bpp result is in {v0, v1, v2, XX} */
-    convert_8888_to_0565 v2, v1, v0, v14, v30, v13
-    mov         v28.d[0], v14.d[0]
-    mov         v29.d[0], v14.d[1]
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head
-    fetch_mask_pixblock
-    pixman_composite_add_0565_8_0565_process_pixblock_tail
-    fetch_src_pixblock
-    ld1        {v10.4h, v11.4h}, [DST_R], #16
-    cache_preload 8, 8
-    pixman_composite_add_0565_8_0565_process_pixblock_head
-    st1        {v14.8h}, [DST_W], #16
-.endm
-
-generate_composite_function \
-    pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_add_0565_8_0565_process_pixblock_head, \
-    pixman_composite_add_0565_8_0565_process_pixblock_tail, \
-    pixman_composite_add_0565_8_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    10, /* dst_r_basereg */ \
-    8,  /* src_basereg   */ \
-    15  /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_out_reverse_8_0565_process_pixblock_head
-    /* mask is in v15 */
-    mov         v12.d[0], v10.d[0]
-    mov         v12.d[1], v11.d[0]
-    convert_0565_to_x888 v12, v6, v5, v4
-    /* destination pixel data is in {v4, v5, v6, xx} */
-    mvn         v24.8b, v15.8b /* get inverted alpha */
-    /* now do alpha blending */
-    umull       v8.8h,  v24.8b, v4.8b
-    umull       v9.8h,  v24.8b, v5.8b
-    umull       v10.8h, v24.8b, v6.8b
-.endm
-
-.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail
-    urshr       v11.8h, v8.8h, #8
-    urshr       v12.8h, v9.8h, #8
-    urshr       v13.8h, v10.8h, #8
-    raddhn      v0.8b, v11.8h, v8.8h
-    raddhn      v1.8b, v12.8h, v9.8h
-    raddhn      v2.8b, v13.8h, v10.8h
-    /* 32bpp result is in {v0, v1, v2, XX} */
-    convert_8888_to_0565 v2, v1, v0, v14, v12, v3
-    mov         v28.d[0], v14.d[0]
-    mov         v29.d[0], v14.d[1]
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head
-    fetch_src_pixblock
-    pixman_composite_out_reverse_8_0565_process_pixblock_tail
-    ld1        {v10.4h, v11.4h}, [DST_R], #16
-    cache_preload 8, 8
-    pixman_composite_out_reverse_8_0565_process_pixblock_head
-    st1        {v14.8h}, [DST_W], #16
-.endm
-
-generate_composite_function \
-    pixman_composite_out_reverse_8_0565_asm_neon, 8, 0, 16, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_out_reverse_8_0565_process_pixblock_head, \
-    pixman_composite_out_reverse_8_0565_process_pixblock_tail, \
-    pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    10,  /* dst_r_basereg */ \
-    15, /* src_basereg   */ \
-    0   /* mask_basereg  */
-
-/******************************************************************************/
-
-.macro pixman_composite_out_reverse_8_8888_process_pixblock_head
-    /* src is in v0 */
-    /* destination pixel data is in {v4, v5, v6, v7} */
-    mvn         v1.8b, v0.8b /* get inverted alpha */
-    /* now do alpha blending */
-    umull       v8.8h, v1.8b, v4.8b
-    umull       v9.8h, v1.8b, v5.8b
-    umull       v10.8h, v1.8b, v6.8b
-    umull       v11.8h, v1.8b, v7.8b
-.endm
-
-.macro pixman_composite_out_reverse_8_8888_process_pixblock_tail
-    urshr       v14.8h, v8.8h, #8
-    urshr       v15.8h, v9.8h, #8
-    urshr       v12.8h, v10.8h, #8
-    urshr       v13.8h, v11.8h, #8
-    raddhn      v28.8b, v14.8h, v8.8h
-    raddhn      v29.8b, v15.8h, v9.8h
-    raddhn      v30.8b, v12.8h, v10.8h
-    raddhn      v31.8b, v13.8h, v11.8h
-    /* 32bpp result is in {v28, v29, v30, v31} */
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_out_reverse_8_8888_process_pixblock_tail_head
-    fetch_src_pixblock
-    pixman_composite_out_reverse_8_8888_process_pixblock_tail
-    ld4       {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
-    cache_preload 8, 8
-    pixman_composite_out_reverse_8_8888_process_pixblock_head
-    st4       {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
-.endm
-
-generate_composite_function \
-    pixman_composite_out_reverse_8_8888_asm_neon, 8, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_out_reverse_8_8888_process_pixblock_head, \
-    pixman_composite_out_reverse_8_8888_process_pixblock_tail, \
-    pixman_composite_out_reverse_8_8888_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4, /* dst_r_basereg */ \
-    0, /* src_basereg   */ \
-    0   /* mask_basereg  */
-
-/******************************************************************************/
-
-generate_composite_function_nearest_scanline \
-    pixman_scaled_nearest_scanline_8888_8888_OVER_asm_neon, 32, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_over_8888_8888_process_pixblock_head, \
-    pixman_composite_over_8888_8888_process_pixblock_tail, \
-    pixman_composite_over_8888_8888_process_pixblock_tail_head
-
-generate_composite_function_nearest_scanline \
-    pixman_scaled_nearest_scanline_8888_0565_OVER_asm_neon, 32, 0, 16, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_over_8888_0565_process_pixblock_head, \
-    pixman_composite_over_8888_0565_process_pixblock_tail, \
-    pixman_composite_over_8888_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    0,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-generate_composite_function_nearest_scanline \
-    pixman_scaled_nearest_scanline_8888_0565_SRC_asm_neon, 32, 0, 16, \
-    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_8888_0565_process_pixblock_head, \
-    pixman_composite_src_8888_0565_process_pixblock_tail, \
-    pixman_composite_src_8888_0565_process_pixblock_tail_head
-
-generate_composite_function_nearest_scanline \
-    pixman_scaled_nearest_scanline_0565_8888_SRC_asm_neon, 16, 0, 32, \
-    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init, \
-    default_cleanup, \
-    pixman_composite_src_0565_8888_process_pixblock_head, \
-    pixman_composite_src_0565_8888_process_pixblock_tail, \
-    pixman_composite_src_0565_8888_process_pixblock_tail_head
-
-generate_composite_function_nearest_scanline \
-    pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_neon, 32, 8, 16, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_over_8888_8_0565_process_pixblock_head, \
-    pixman_composite_over_8888_8_0565_process_pixblock_tail, \
-    pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    4,  /* dst_r_basereg */ \
-    8,  /* src_basereg   */ \
-    24  /* mask_basereg  */
-
-generate_composite_function_nearest_scanline \
-    pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_neon, 16, 8, 16, \
-    FLAG_DST_READWRITE, \
-    8, /* number of pixels, processed in a single block */ \
-    default_init_need_all_regs, \
-    default_cleanup_need_all_regs, \
-    pixman_composite_over_0565_8_0565_process_pixblock_head, \
-    pixman_composite_over_0565_8_0565_process_pixblock_tail, \
-    pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
-    28, /* dst_w_basereg */ \
-    10,  /* dst_r_basereg */ \
-    8,  /* src_basereg   */ \
-    15  /* mask_basereg  */
-
-/******************************************************************************/
-
-/*
- * Bilinear scaling support code which tries to provide pixel fetching, color
- * format conversion, and interpolation as separate macros which can be used
- * as the basic building blocks for constructing bilinear scanline functions.
- */
-
-.macro bilinear_load_8888 reg1, reg2, tmp
-    asr       TMP1, X, #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, lsl #2
-    ld1       {&reg1&.2s}, [TMP1], STRIDE
-    ld1       {&reg2&.2s}, [TMP1]
-.endm
-
-.macro bilinear_load_0565 reg1, reg2, tmp
-    asr       TMP1, X, #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, lsl #1
-    ld1       {&reg2&.s}[0], [TMP1], STRIDE
-    ld1       {&reg2&.s}[1], [TMP1]
-    convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
-.endm
-
-.macro bilinear_load_and_vertical_interpolate_two_8888 \
-                    acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2
-
-    bilinear_load_8888 reg1, reg2, tmp1
-    umull     &acc1&.8h, &reg1&.8b, v28.8b
-    umlal     &acc1&.8h, &reg2&.8b, v29.8b
-    bilinear_load_8888 reg3, reg4, tmp2
-    umull     &acc2&.8h, &reg3&.8b, v28.8b
-    umlal     &acc2&.8h, &reg4&.8b, v29.8b
-.endm
-
-.macro bilinear_load_and_vertical_interpolate_four_8888 \
-                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
-                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
-
-    bilinear_load_and_vertical_interpolate_two_8888 \
-                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi
-    bilinear_load_and_vertical_interpolate_two_8888 \
-                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
-.endm
-
-.macro vzip reg1, reg2
-    umov      TMP4, v31.d[0]
-    zip1      v31.8b, reg1, reg2
-    zip2      reg2,   reg1, reg2
-    mov       reg1,   v31.8b
-    mov       v31.d[0], TMP4
-.endm
-
-.macro vuzp reg1, reg2
-    umov      TMP4, v31.d[0]
-    uzp1      v31.8b, reg1, reg2
-    uzp2      reg2,   reg1, reg2
-    mov       reg1,   v31.8b
-    mov       v31.d[0], TMP4
-.endm
-
-.macro bilinear_load_and_vertical_interpolate_two_0565 \
-                acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi
-    asr       TMP1, X, #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, lsl #1
-    asr       TMP2, X, #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, lsl #1
-    ld1       {&acc2&.s}[0], [TMP1], STRIDE
-    ld1       {&acc2&.s}[2], [TMP2], STRIDE
-    ld1       {&acc2&.s}[1], [TMP1]
-    ld1       {&acc2&.s}[3], [TMP2]
-    convert_0565_to_x888 acc2, reg3, reg2, reg1
-    vzip      &reg1&.8b, &reg3&.8b
-    vzip      &reg2&.8b, &reg4&.8b
-    vzip      &reg3&.8b, &reg4&.8b
-    vzip      &reg1&.8b, &reg2&.8b
-    umull     &acc1&.8h, &reg1&.8b, v28.8b
-    umlal     &acc1&.8h, &reg2&.8b, v29.8b
-    umull     &acc2&.8h, &reg3&.8b, v28.8b
-    umlal     &acc2&.8h, &reg4&.8b, v29.8b
-.endm
-
-.macro bilinear_load_and_vertical_interpolate_four_0565 \
-                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
-                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
-    asr       TMP1, X, #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, lsl #1
-    asr       TMP2, X, #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, lsl #1
-    ld1       {&xacc2&.s}[0], [TMP1], STRIDE
-    ld1       {&xacc2&.s}[2], [TMP2], STRIDE
-    ld1       {&xacc2&.s}[1], [TMP1]
-    ld1       {&xacc2&.s}[3], [TMP2]
-    convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
-    asr       TMP1, X, #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, lsl #1
-    asr       TMP2, X, #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, lsl #1
-    ld1       {&yacc2&.s}[0], [TMP1], STRIDE
-    vzip      &xreg1&.8b, &xreg3&.8b
-    ld1       {&yacc2&.s}[2], [TMP2], STRIDE
-    vzip      &xreg2&.8b, &xreg4&.8b
-    ld1       {&yacc2&.s}[1], [TMP1]
-    vzip      &xreg3&.8b, &xreg4&.8b
-    ld1       {&yacc2&.s}[3], [TMP2]
-    vzip      &xreg1&.8b, &xreg2&.8b
-    convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1
-    umull     &xacc1&.8h, &xreg1&.8b, v28.8b
-    vzip      &yreg1&.8b, &yreg3&.8b
-    umlal     &xacc1&.8h, &xreg2&.8b, v29.8b
-    vzip      &yreg2&.8b, &yreg4&.8b
-    umull     &xacc2&.8h, &xreg3&.8b, v28.8b
-    vzip      &yreg3&.8b, &yreg4&.8b
-    umlal     &xacc2&.8h, &xreg4&.8b, v29.8b
-    vzip      &yreg1&.8b, &yreg2&.8b
-    umull     &yacc1&.8h, &yreg1&.8b, v28.8b
-    umlal     &yacc1&.8h, &yreg2&.8b, v29.8b
-    umull     &yacc2&.8h, &yreg3&.8b, v28.8b
-    umlal     &yacc2&.8h, &yreg4&.8b, v29.8b
-.endm
-
-.macro bilinear_store_8888 numpix, tmp1, tmp2
-.if numpix == 4
-    st1       {v0.2s, v1.2s}, [OUT], #16
-.elseif numpix == 2
-    st1       {v0.2s}, [OUT], #8
-.elseif numpix == 1
-    st1       {v0.s}[0], [OUT], #4
-.else
-    .error bilinear_store_8888 numpix is unsupported
-.endif
-.endm
-
-.macro bilinear_store_0565 numpix, tmp1, tmp2
-    vuzp      v0.8b, v1.8b
-    vuzp      v2.8b, v3.8b
-    vuzp      v1.8b, v3.8b
-    vuzp      v0.8b, v2.8b
-    convert_8888_to_0565 v2, v1, v0, v1, tmp1, tmp2
-.if numpix == 4
-    st1       {v1.4h}, [OUT], #8
-.elseif numpix == 2
-    st1       {v1.s}[0], [OUT], #4
-.elseif numpix == 1
-    st1       {v1.h}[0], [OUT], #2
-.else
-    .error bilinear_store_0565 numpix is unsupported
-.endif
-.endm
-
-.macro bilinear_interpolate_last_pixel src_fmt, dst_fmt
-    bilinear_load_&src_fmt v0, v1, v2
-    umull     v2.8h, v0.8b, v28.8b
-    umlal     v2.8h, v1.8b, v29.8b
-    /* 5 cycles bubble */
-    ushll     v0.4s, v2.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v0.4s, v2.4h, v15.h[0]
-    umlal2    v0.4s, v2.8h, v15.h[0]
-    /* 5 cycles bubble */
-    shrn      v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    /* 3 cycles bubble */
-    xtn       v0.8b, v0.8h
-    /* 1 cycle bubble */
-    bilinear_store_&dst_fmt 1, v3, v4
-.endm
-
-.macro bilinear_interpolate_two_pixels src_fmt, dst_fmt
-    bilinear_load_and_vertical_interpolate_two_&src_fmt \
-                v1, v11, v2, v3, v20, v21, v22, v23
-    ushll     v0.4s, v1.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v0.4s, v1.4h, v15.h[0]
-    umlal2    v0.4s, v1.8h, v15.h[0]
-    ushll     v10.4s, v11.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v10.4s, v11.4h, v15.h[4]
-    umlal2    v10.4s, v11.8h, v15.h[4]
-    shrn      v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    shrn2     v0.8h, v10.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    ushr      v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-    add       v12.8h, v12.8h, v13.8h
-    xtn       v0.8b, v0.8h
-    bilinear_store_&dst_fmt 2, v3, v4
-.endm
-
-.macro bilinear_interpolate_four_pixels src_fmt, dst_fmt
-    bilinear_load_and_vertical_interpolate_four_&src_fmt \
-                v1, v11, v14, v20, v16, v17, v22, v23 \
-                v3, v9,  v24, v25, v26, v27, v18, v19
-    prfm      PREFETCH_MODE, [TMP1, PF_OFFS]
-    sub       TMP1, TMP1, STRIDE
-    ushll     v0.4s, v1.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v0.4s, v1.4h, v15.h[0]
-    umlal2    v0.4s, v1.8h, v15.h[0]
-    ushll     v10.4s, v11.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v10.4s, v11.4h, v15.h[4]
-    umlal2    v10.4s, v11.8h, v15.h[4]
-    ushr      v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-    ushll     v2.4s, v3.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v2.4s, v3.4h, v15.h[0]
-    umlal2    v2.4s, v3.8h, v15.h[0]
-    ushll     v8.4s, v9.4h, #BILINEAR_INTERPOLATION_BITS
-    prfm      PREFETCH_MODE, [TMP2, PF_OFFS]
-    umlsl     v8.4s, v9.4h, v15.h[4]
-    umlal2    v8.4s, v9.8h, v15.h[4]
-    add       v12.8h, v12.8h, v13.8h
-    shrn      v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    shrn2     v0.8h, v10.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    shrn      v2.4h, v2.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    shrn2     v2.8h, v8.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    ushr      v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-    xtn       v0.8b, v0.8h
-    xtn       v1.8b, v2.8h
-    add       v12.8h, v12.8h, v13.8h
-    bilinear_store_&dst_fmt 4, v3, v4
-.endm
-
-.macro bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
-    bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_head
-.else
-    bilinear_interpolate_four_pixels src_fmt, dst_fmt
-.endif
-.endm
-
-.macro bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
-    bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail
-.endif
-.endm
-
-.macro bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
-    bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail_head
-.else
-    bilinear_interpolate_four_pixels src_fmt, dst_fmt
-.endif
-.endm
-
-.macro bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
-    bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_head
-.else
-    bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
-    bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
-.endif
-.endm
-
-.macro bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
-    bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail
-.else
-    bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
-.endif
-.endm
-
-.macro bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
-    bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail_head
-.else
-    bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
-    bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
-.endif
-.endm
-
-.set BILINEAR_FLAG_UNROLL_4,          0
-.set BILINEAR_FLAG_UNROLL_8,          1
-.set BILINEAR_FLAG_USE_ALL_NEON_REGS, 2
-
-/*
- * Main template macro for generating NEON optimized bilinear scanline
- * functions.
- *
- * Bilinear scanline scaler macro template uses the following arguments:
- *  fname             - name of the function to generate
- *  src_fmt           - source color format (8888 or 0565)
- *  dst_fmt           - destination color format (8888 or 0565)
- *  bpp_shift         - (1 << bpp_shift) is the size of source pixel in bytes
- *  prefetch_distance - prefetch in the source image by that many
- *                      pixels ahead
- */
-
-.macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \
-                                       src_bpp_shift, dst_bpp_shift, \
-                                       prefetch_distance, flags
-
-pixman_asm_function fname
-    OUT       .req      x0
-    TOP       .req      x1
-    BOTTOM    .req      x2
-    WT        .req      x3
-    WB        .req      x4
-    X         .req      x5
-    UX        .req      x6
-    WIDTH     .req      x7
-    TMP1      .req      x8
-    TMP2      .req      x9
-    PF_OFFS   .req      x10
-    TMP3      .req      x11
-    TMP4      .req      x12
-    STRIDE    .req      x13
-
-    sxtw      x3, w3
-    sxtw      x4, w4
-    sxtw      x5, w5
-    sxtw      x6, w6
-    sxtw      x7, w7
-
-    stp       x29, x30, [sp, -16]!
-    mov       x29, sp
-    sub       sp,  sp, 112  /* push all registers */
-    sub       x29, x29, 64
-    st1       {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], #32
-    st1       {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], #32
-    stp        x8,  x9, [x29, -80]
-    stp       x10, x11, [x29, -96]
-    stp       x12, x13, [x29, -112]
-
-    mov       PF_OFFS, #prefetch_distance
-    mul       PF_OFFS, PF_OFFS, UX
-
-    subs      STRIDE, BOTTOM, TOP
-    .unreq    BOTTOM
-
-    cmp       WIDTH, #0
-    ble       300f
-
-    dup       v12.8h, w5
-    dup       v13.8h, w6
-    dup       v28.8b, w3
-    dup       v29.8b, w4
-    mov       v25.d[0], v12.d[1]
-    mov       v26.d[0], v13.d[0]
-    add       v25.4h, v25.4h, v26.4h
-    mov       v12.d[1], v25.d[0]
-
-    /* ensure good destination alignment  */
-    cmp       WIDTH, #1
-    blt       100f
-    tst       OUT, #(1 << dst_bpp_shift)
-    beq       100f
-    ushr      v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-    add       v12.8h, v12.8h, v13.8h
-    bilinear_interpolate_last_pixel src_fmt, dst_fmt
-    sub       WIDTH, WIDTH, #1
-100:
-    add       v13.8h, v13.8h, v13.8h
-    ushr      v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-    add       v12.8h, v12.8h, v13.8h
-
-    cmp       WIDTH, #2
-    blt       100f
-    tst       OUT, #(1 << (dst_bpp_shift + 1))
-    beq       100f
-    bilinear_interpolate_two_pixels src_fmt, dst_fmt
-    sub       WIDTH, WIDTH, #2
-100:
-.if ((flags) & BILINEAR_FLAG_UNROLL_8) != 0
-/*********** 8 pixels per iteration *****************/
-    cmp       WIDTH, #4
-    blt       100f
-    tst       OUT, #(1 << (dst_bpp_shift + 2))
-    beq       100f
-    bilinear_interpolate_four_pixels src_fmt, dst_fmt
-    sub       WIDTH, WIDTH, #4
-100:
-    subs      WIDTH, WIDTH, #8
-    blt       100f
-    asr       PF_OFFS, PF_OFFS, #(16 - src_bpp_shift)
-    bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt
-    subs      WIDTH, WIDTH, #8
-    blt       500f
-1000:
-    bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt
-    subs      WIDTH, WIDTH, #8
-    bge       1000b
-500:
-    bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt
-100:
-    tst       WIDTH, #4
-    beq       200f
-    bilinear_interpolate_four_pixels src_fmt, dst_fmt
-200:
-.else
-/*********** 4 pixels per iteration *****************/
-    subs      WIDTH, WIDTH, #4
-    blt       100f
-    asr       PF_OFFS, PF_OFFS, #(16 - src_bpp_shift)
-    bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
-    subs      WIDTH, WIDTH, #4
-    blt       500f
-1000:
-    bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
-    subs      WIDTH, WIDTH, #4
-    bge       1000b
-500:
-    bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
-100:
-/****************************************************/
-.endif
-    /* handle the remaining trailing pixels */
-    tst       WIDTH, #2
-    beq       200f
-    bilinear_interpolate_two_pixels src_fmt, dst_fmt
-200:
-    tst       WIDTH, #1
-    beq       300f
-    bilinear_interpolate_last_pixel src_fmt, dst_fmt
-300:
-    sub       x29, x29, 64
-    ld1       {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], #32
-    ld1       {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], #32
-    ldp        x8,  x9, [x29, -80]
-    ldp       x10, x11, [x29, -96]
-    ldp       x12, x13, [x29, -104]
-    mov       sp, x29
-    ldp       x29, x30, [sp], 16
-    ret
-
-    .unreq    OUT
-    .unreq    TOP
-    .unreq    WT
-    .unreq    WB
-    .unreq    X
-    .unreq    UX
-    .unreq    WIDTH
-    .unreq    TMP1
-    .unreq    TMP2
-    .unreq    PF_OFFS
-    .unreq    TMP3
-    .unreq    TMP4
-    .unreq    STRIDE
-.endfunc
-
-.endm
-
-/*****************************************************************************/
-
-.set have_bilinear_interpolate_four_pixels_8888_8888, 1
-
-.macro bilinear_interpolate_four_pixels_8888_8888_head
-    asr       TMP1, X, #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, lsl #2
-    asr       TMP2, X, #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, lsl #2
-
-    ld1       {v22.2s}, [TMP1], STRIDE
-    ld1       {v23.2s}, [TMP1]
-    asr       TMP3, X, #16
-    add       X, X, UX
-    add       TMP3, TOP, TMP3, lsl #2
-    umull     v8.8h, v22.8b, v28.8b
-    umlal     v8.8h, v23.8b, v29.8b
-
-    ld1       {v22.2s}, [TMP2], STRIDE
-    ld1       {v23.2s}, [TMP2]
-    asr       TMP4, X, #16
-    add       X, X, UX
-    add       TMP4, TOP, TMP4, lsl #2
-    umull     v9.8h, v22.8b, v28.8b
-    umlal     v9.8h, v23.8b, v29.8b
-
-    ld1       {v22.2s}, [TMP3], STRIDE
-    ld1       {v23.2s}, [TMP3]
-    umull     v10.8h, v22.8b, v28.8b
-    umlal     v10.8h, v23.8b, v29.8b
-
-    ushll     v0.4s, v8.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v0.4s, v8.4h, v15.h[0]
-    umlal2    v0.4s, v8.8h, v15.h[0]
-
-    prfm      PREFETCH_MODE, [TMP4, PF_OFFS]
-    ld1       {v16.2s}, [TMP4], STRIDE
-    ld1       {v17.2s}, [TMP4]
-    prfm      PREFETCH_MODE, [TMP4, PF_OFFS]
-    umull     v11.8h, v16.8b, v28.8b
-    umlal     v11.8h, v17.8b, v29.8b
-
-    ushll     v1.4s, v9.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v1.4s, v9.4h, v15.h[4]
-.endm
-
-.macro bilinear_interpolate_four_pixels_8888_8888_tail
-    umlal2    v1.4s, v9.8h, v15.h[4]
-    ushr      v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-    ushll     v2.4s, v10.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v2.4s, v10.4h, v15.h[0]
-    umlal2    v2.4s, v10.8h, v15.h[0]
-    ushll     v3.4s, v11.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v3.4s, v11.4h, v15.h[4]
-    umlal2    v3.4s, v11.8h, v15.h[4]
-    add       v12.8h, v12.8h, v13.8h
-    shrn      v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    shrn2     v0.8h, v1.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    shrn      v2.4h, v2.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    ushr      v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-    shrn2     v2.8h, v3.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    xtn       v6.8b, v0.8h
-    xtn       v7.8b, v2.8h
-    add       v12.8h, v12.8h, v13.8h
-    st1       {v6.2s, v7.2s}, [OUT], #16
-.endm
-
-.macro bilinear_interpolate_four_pixels_8888_8888_tail_head
-    asr       TMP1, X, #16
-    add       X, X, UX
-    add       TMP1, TOP, TMP1, lsl #2
-    asr       TMP2, X, #16
-    add       X, X, UX
-    add       TMP2, TOP, TMP2, lsl #2
-        umlal2    v1.4s, v9.8h, v15.h[4]
-        ushr      v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-        ushll     v2.4s, v10.4h, #BILINEAR_INTERPOLATION_BITS
-        umlsl     v2.4s, v10.4h, v15.h[0]
-        umlal2    v2.4s, v10.8h, v15.h[0]
-        ushll     v3.4s, v11.4h, #BILINEAR_INTERPOLATION_BITS
-    ld1       {v20.2s}, [TMP1], STRIDE
-        umlsl     v3.4s, v11.4h, v15.h[4]
-        umlal2    v3.4s, v11.8h, v15.h[4]
-    ld1       {v21.2s}, [TMP1]
-    umull     v8.8h, v20.8b, v28.8b
-    umlal     v8.8h, v21.8b, v29.8b
-        shrn      v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-        shrn2     v0.8h, v1.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-        shrn      v4.4h, v2.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-    ld1       {v22.2s}, [TMP2], STRIDE
-        shrn2     v4.8h, v3.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
-        add       v12.8h, v12.8h, v13.8h
-    ld1       {v23.2s}, [TMP2]
-    umull     v9.8h, v22.8b, v28.8b
-    asr       TMP3, X, #16
-    add       X, X, UX
-    add       TMP3, TOP, TMP3, lsl #2
-    asr       TMP4, X, #16
-    add       X, X, UX
-    add       TMP4, TOP, TMP4, lsl #2
-    umlal     v9.8h, v23.8b, v29.8b
-    ld1       {v22.2s}, [TMP3], STRIDE
-        ushr      v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
-    ld1       {v23.2s}, [TMP3]
-    umull     v10.8h, v22.8b, v28.8b
-    umlal     v10.8h, v23.8b, v29.8b
-        xtn       v6.8b, v0.8h
-    ushll     v0.4s, v8.4h, #BILINEAR_INTERPOLATION_BITS
-        xtn       v7.8b, v4.8h
-    umlsl     v0.4s, v8.4h, v15.h[0]
-    umlal2    v0.4s, v8.8h, v15.h[0]
-    prfm      PREFETCH_MODE, [TMP4, PF_OFFS]
-    ld1       {v16.2s}, [TMP4], STRIDE
-        add       v12.8h, v12.8h, v13.8h
-    ld1       {v17.2s}, [TMP4]
-    prfm      PREFETCH_MODE, [TMP4, PF_OFFS]
-    umull     v11.8h, v16.8b, v28.8b
-    umlal     v11.8h, v17.8b, v29.8b
-        st1       {v6.2s, v7.2s}, [OUT], #16
-    ushll     v1.4s, v9.4h, #BILINEAR_INTERPOLATION_BITS
-    umlsl     v1.4s, v9.4h, v15.h[4]
-.endm
-
-/*****************************************************************************/
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, \
-    2, 2, 28, BILINEAR_FLAG_UNROLL_4
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, \
-    2, 1, 28, BILINEAR_FLAG_UNROLL_8 | BILINEAR_FLAG_USE_ALL_NEON_REGS
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, \
-    1, 2, 28, BILINEAR_FLAG_UNROLL_4
-
-generate_bilinear_scanline_func \
-    pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, \
-    1, 1, 28, BILINEAR_FLAG_UNROLL_4
diff --git a/vendor/pixman/pixman/pixman-arma64-neon-asm.h b/vendor/pixman/pixman/pixman-arma64-neon-asm.h
deleted file mode 100644
index 5d9317217..000000000
--- a/vendor/pixman/pixman/pixman-arma64-neon-asm.h
+++ /dev/null
@@ -1,1310 +0,0 @@
-/*
- * Copyright © 2009 Nokia Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author:  Siarhei Siamashka (siarhei.siamashka@nokia.com)
- */
-
-/*
- * This file contains a macro ('generate_composite_function') which can
- * construct 2D image processing functions, based on a common template.
- * Any combinations of source, destination and mask images with 8bpp,
- * 16bpp, 24bpp, 32bpp color formats are supported.
- *
- * This macro takes care of:
- *  - handling of leading and trailing unaligned pixels
- *  - doing most of the work related to L2 cache preload
- *  - encourages the use of software pipelining for better instructions
- *    scheduling
- *
- * The user of this macro has to provide some configuration parameters
- * (bit depths for the images, prefetch distance, etc.) and a set of
- * macros, which should implement basic code chunks responsible for
- * pixels processing. See 'pixman-armv8-neon-asm.S' file for the usage
- * examples.
- *
- * TODO:
- *  - try overlapped pixel method (from Ian Rickards) when processing
- *    exactly two blocks of pixels
- *  - maybe add an option to do reverse scanline processing
- */
-
-/*
- * Bit flags for 'generate_composite_function' macro which are used
- * to tune generated functions behavior.
- */
-.set FLAG_DST_WRITEONLY,       0
-.set FLAG_DST_READWRITE,       1
-.set FLAG_DEINTERLEAVE_32BPP,  2
-
-/*
- * Constants for selecting preferable prefetch type.
- */
-.set PREFETCH_TYPE_NONE,       0 /* No prefetch at all */
-.set PREFETCH_TYPE_SIMPLE,     1 /* A simple, fixed-distance-ahead prefetch */
-.set PREFETCH_TYPE_ADVANCED,   2 /* Advanced fine-grained prefetch */
-
-/*
- * prefetch mode
- * available modes are:
- * pldl1keep
- * pldl1strm
- * pldl2keep
- * pldl2strm
- * pldl3keep
- * pldl3strm
- */
-#define PREFETCH_MODE pldl1keep
-
-/*
- * Definitions of supplementary pixld/pixst macros (for partial load/store of
- * pixel data).
- */
-
-.macro pixldst1 op, elem_size, reg1, mem_operand, abits
-    op {v&reg1&.&elem_size}, [&mem_operand&], #8
-.endm
-
-.macro pixldst2 op, elem_size, reg1, reg2, mem_operand, abits
-    op {v&reg1&.&elem_size, v&reg2&.&elem_size}, [&mem_operand&], #16
-.endm
-
-.macro pixldst4 op, elem_size, reg1, reg2, reg3, reg4, mem_operand, abits
-    op {v&reg1&.&elem_size, v&reg2&.&elem_size, v&reg3&.&elem_size, v&reg4&.&elem_size}, [&mem_operand&], #32
-.endm
-
-.macro pixldst0 op, elem_size, reg1, idx, mem_operand, abits, bytes
-    op {v&reg1&.&elem_size}[idx], [&mem_operand&], #&bytes&
-.endm
-
-.macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand
-    op {v&reg1&.&elem_size, v&reg2&.&elem_size, v&reg3&.&elem_size}, [&mem_operand&], #24
-.endm
-
-.macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand
-    op {v&reg1&.&elem_size, v&reg2&.&elem_size, v&reg3&.&elem_size}[idx], [&mem_operand&], #3
-.endm
-
-.macro pixldst numbytes, op, elem_size, basereg, mem_operand, abits
-.if numbytes == 32
-    .if elem_size==32
-        pixldst4 op, 2s, %(basereg+4), %(basereg+5), \
-                              %(basereg+6), %(basereg+7), mem_operand, abits
-    .elseif elem_size==16
-        pixldst4 op, 4h, %(basereg+4), %(basereg+5), \
-                              %(basereg+6), %(basereg+7), mem_operand, abits
-    .else
-        pixldst4 op, 8b, %(basereg+4), %(basereg+5), \
-                              %(basereg+6), %(basereg+7), mem_operand, abits
-    .endif
-.elseif numbytes == 16
-    .if elem_size==32
-          pixldst2 op, 2s, %(basereg+2), %(basereg+3), mem_operand, abits
-    .elseif elem_size==16
-          pixldst2 op, 4h, %(basereg+2), %(basereg+3), mem_operand, abits
-    .else
-          pixldst2 op, 8b, %(basereg+2), %(basereg+3), mem_operand, abits
-    .endif
-.elseif numbytes == 8
-    .if elem_size==32
-        pixldst1 op, 2s, %(basereg+1), mem_operand, abits
-    .elseif elem_size==16
-        pixldst1 op, 4h, %(basereg+1), mem_operand, abits
-    .else
-        pixldst1 op, 8b, %(basereg+1), mem_operand, abits
-    .endif
-.elseif numbytes == 4
-    .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 32)
-        pixldst0 op, s, %(basereg+0), 1, mem_operand, abits, 4
-    .elseif elem_size == 16
-        pixldst0 op, h, %(basereg+0), 2, mem_operand, abits, 2
-        pixldst0 op, h, %(basereg+0), 3, mem_operand, abits, 2
-    .else
-        pixldst0 op, b, %(basereg+0), 4, mem_operand, abits, 1
-        pixldst0 op, b, %(basereg+0), 5, mem_operand, abits, 1
-        pixldst0 op, b, %(basereg+0), 6, mem_operand, abits, 1
-        pixldst0 op, b, %(basereg+0), 7, mem_operand, abits, 1
-    .endif
-.elseif numbytes == 2
-    .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 16)
-        pixldst0 op, h, %(basereg+0), 1, mem_operand, abits, 2
-    .else
-        pixldst0 op, b, %(basereg+0), 2, mem_operand, abits, 1
-        pixldst0 op, b, %(basereg+0), 3, mem_operand, abits, 1
-    .endif
-.elseif numbytes == 1
-        pixldst0 op, b, %(basereg+0), 1, mem_operand, abits, 1
-.else
-    .error "unsupported size: numbytes"
-.endif
-.endm
-
-.macro pixld numpix, bpp, basereg, mem_operand, abits=0
-.if bpp > 0
-.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
-    pixldst4 ld4, 8b, %(basereg+4), %(basereg+5), \
-                      %(basereg+6), %(basereg+7), mem_operand, abits
-.elseif (bpp == 24) && (numpix == 8)
-    pixldst3 ld3, 8b, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
-.elseif (bpp == 24) && (numpix == 4)
-    pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
-    pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
-    pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
-    pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
-.elseif (bpp == 24) && (numpix == 2)
-    pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
-    pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
-.elseif (bpp == 24) && (numpix == 1)
-    pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
-.else
-    pixldst %(numpix * bpp / 8), ld1, %(bpp), basereg, mem_operand, abits
-.endif
-.endif
-.endm
-
-.macro pixst numpix, bpp, basereg, mem_operand, abits=0
-.if bpp > 0
-.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
-    pixldst4 st4, 8b, %(basereg+4), %(basereg+5), \
-                      %(basereg+6), %(basereg+7), mem_operand, abits
-.elseif (bpp == 24) && (numpix == 8)
-    pixldst3 st3, 8b, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
-.elseif (bpp == 24) && (numpix == 4)
-    pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
-    pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
-    pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
-    pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
-.elseif (bpp == 24) && (numpix == 2)
-    pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
-    pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
-.elseif (bpp == 24) && (numpix == 1)
-    pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
-.elseif numpix * bpp == 32 && abits == 32
-    pixldst 4, st1, 32, basereg, mem_operand, abits
-.elseif numpix * bpp == 16 && abits == 16
-    pixldst 2, st1, 16, basereg, mem_operand, abits
-.else
-    pixldst %(numpix * bpp / 8), st1, %(bpp), basereg, mem_operand, abits
-.endif
-.endif
-.endm
-
-.macro pixld_a numpix, bpp, basereg, mem_operand
-.if (bpp * numpix) <= 128
-    pixld numpix, bpp, basereg, mem_operand, %(bpp * numpix)
-.else
-    pixld numpix, bpp, basereg, mem_operand, 128
-.endif
-.endm
-
-.macro pixst_a numpix, bpp, basereg, mem_operand
-.if (bpp * numpix) <= 128
-    pixst numpix, bpp, basereg, mem_operand, %(bpp * numpix)
-.else
-    pixst numpix, bpp, basereg, mem_operand, 128
-.endif
-.endm
-
-/*
- * Pixel fetcher for nearest scaling (needs TMP1, TMP2, VX, UNIT_X register
- * aliases to be defined)
- */
-.macro pixld1_s elem_size, reg1, mem_operand
-.if elem_size == 16
-    asr     TMP1, VX, #16
-    adds    VX, VX, UNIT_X
-    bmi     55f
-5:  subs    VX, VX, SRC_WIDTH_FIXED
-    bpl     5b
-55:
-    add     TMP1, mem_operand, TMP1, lsl #1
-    asr     TMP2, VX, #16
-    adds    VX, VX, UNIT_X
-    bmi     55f
-5:  subs    VX, VX, SRC_WIDTH_FIXED
-    bpl     5b
-55:
-    add     TMP2, mem_operand, TMP2, lsl #1
-    ld1     {v&reg1&.h}[0], [TMP1]
-    asr     TMP1, VX, #16
-    adds    VX, VX, UNIT_X
-    bmi     55f
-5:  subs    VX, VX, SRC_WIDTH_FIXED
-    bpl     5b
-55:
-    add     TMP1, mem_operand, TMP1, lsl #1
-    ld1     {v&reg1&.h}[1], [TMP2]
-    asr     TMP2, VX, #16
-    adds    VX, VX, UNIT_X
-    bmi     55f
-5:  subs    VX, VX, SRC_WIDTH_FIXED
-    bpl     5b
-55:
-    add     TMP2, mem_operand, TMP2, lsl #1
-    ld1     {v&reg1&.h}[2], [TMP1]
-    ld1     {v&reg1&.h}[3], [TMP2]
-.elseif elem_size == 32
-    asr     TMP1, VX, #16
-    adds    VX, VX, UNIT_X
-    bmi     55f
-5:  subs    VX, VX, SRC_WIDTH_FIXED
-    bpl     5b
-55:
-    add     TMP1, mem_operand, TMP1, lsl #2
-    asr     TMP2, VX, #16
-    adds    VX, VX, UNIT_X
-    bmi     55f
-5:  subs    VX, VX, SRC_WIDTH_FIXED
-    bpl     5b
-55:
-    add     TMP2, mem_operand, TMP2, lsl #2
-    ld1     {v&reg1&.s}[0], [TMP1]
-    ld1     {v&reg1&.s}[1], [TMP2]
-.else
-    .error "unsupported"
-.endif
-.endm
-
-.macro pixld2_s elem_size, reg1, reg2, mem_operand
-.if 0 /* elem_size == 32 */
-    mov     TMP1, VX, asr #16
-    add     VX, VX, UNIT_X, asl #1
-    add     TMP1, mem_operand, TMP1, asl #2
-    mov     TMP2, VX, asr #16
-    sub     VX, VX, UNIT_X
-    add     TMP2, mem_operand, TMP2, asl #2
-    ld1     {v&reg1&.s}[0], [TMP1]
-    mov     TMP1, VX, asr #16
-    add     VX, VX, UNIT_X, asl #1
-    add     TMP1, mem_operand, TMP1, asl #2
-    ld1     {v&reg2&.s}[0], [TMP2, :32]
-    mov     TMP2, VX, asr #16
-    add     VX, VX, UNIT_X
-    add     TMP2, mem_operand, TMP2, asl #2
-    ld1     {v&reg1&.s}[1], [TMP1]
-    ld1     {v&reg2&.s}[1], [TMP2]
-.else
-    pixld1_s elem_size, reg1, mem_operand
-    pixld1_s elem_size, reg2, mem_operand
-.endif
-.endm
-
-.macro pixld0_s elem_size, reg1, idx, mem_operand
-.if elem_size == 16
-    asr     TMP1, VX, #16
-    adds    VX, VX, UNIT_X
-    bmi     55f
-5:  subs    VX, VX, SRC_WIDTH_FIXED
-    bpl     5b
-55:
-    add     TMP1, mem_operand, TMP1, lsl #1
-    ld1     {v&reg1&.h}[idx], [TMP1]
-.elseif elem_size == 32
-    asr     DUMMY, VX, #16
-    mov     TMP1, DUMMY
-    adds    VX, VX, UNIT_X
-    bmi     55f
-5:  subs    VX, VX, SRC_WIDTH_FIXED
-    bpl     5b
-55:
-    add     TMP1, mem_operand, TMP1, lsl #2
-    ld1     {v&reg1&.s}[idx], [TMP1]
-.endif
-.endm
-
-.macro pixld_s_internal numbytes, elem_size, basereg, mem_operand
-.if numbytes == 32
-    pixld2_s elem_size, %(basereg+4), %(basereg+5), mem_operand
-    pixld2_s elem_size, %(basereg+6), %(basereg+7), mem_operand
-    pixdeinterleave elem_size, %(basereg+4)
-.elseif numbytes == 16
-    pixld2_s elem_size, %(basereg+2), %(basereg+3), mem_operand
-.elseif numbytes == 8
-    pixld1_s elem_size, %(basereg+1), mem_operand
-.elseif numbytes == 4
-    .if elem_size == 32
-        pixld0_s elem_size, %(basereg+0), 1, mem_operand
-    .elseif elem_size == 16
-        pixld0_s elem_size, %(basereg+0), 2, mem_operand
-        pixld0_s elem_size, %(basereg+0), 3, mem_operand
-    .else
-        pixld0_s elem_size, %(basereg+0), 4, mem_operand
-        pixld0_s elem_size, %(basereg+0), 5, mem_operand
-        pixld0_s elem_size, %(basereg+0), 6, mem_operand
-        pixld0_s elem_size, %(basereg+0), 7, mem_operand
-    .endif
-.elseif numbytes == 2
-    .if elem_size == 16
-        pixld0_s elem_size, %(basereg+0), 1, mem_operand
-    .else
-        pixld0_s elem_size, %(basereg+0), 2, mem_operand
-        pixld0_s elem_size, %(basereg+0), 3, mem_operand
-    .endif
-.elseif numbytes == 1
-    pixld0_s elem_size, %(basereg+0), 1, mem_operand
-.else
-    .error "unsupported size: numbytes"
-.endif
-.endm
-
-.macro pixld_s numpix, bpp, basereg, mem_operand
-.if bpp > 0
-    pixld_s_internal %(numpix * bpp / 8), %(bpp), basereg, mem_operand
-.endif
-.endm
-
-.macro vuzp8 reg1, reg2
-    umov DUMMY, v16.d[0]
-    uzp1 v16.8b,     v&reg1&.8b, v&reg2&.8b
-    uzp2 v&reg2&.8b, v&reg1&.8b, v&reg2&.8b
-    mov  v&reg1&.8b, v16.8b
-    mov  v16.d[0], DUMMY
-.endm
-
-.macro vzip8 reg1, reg2
-    umov DUMMY, v16.d[0]
-    zip1 v16.8b,     v&reg1&.8b, v&reg2&.8b
-    zip2 v&reg2&.8b, v&reg1&.8b, v&reg2&.8b
-    mov  v&reg1&.8b, v16.8b
-    mov  v16.d[0], DUMMY
-.endm
-
-/* deinterleave B, G, R, A channels for eight 32bpp pixels in 4 registers */
-.macro pixdeinterleave bpp, basereg
-.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
-    vuzp8 %(basereg+0), %(basereg+1)
-    vuzp8 %(basereg+2), %(basereg+3)
-    vuzp8 %(basereg+1), %(basereg+3)
-    vuzp8 %(basereg+0), %(basereg+2)
-.endif
-.endm
-
-/* interleave B, G, R, A channels for eight 32bpp pixels in 4 registers */
-.macro pixinterleave bpp, basereg
-.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
-    vzip8 %(basereg+0), %(basereg+2)
-    vzip8 %(basereg+1), %(basereg+3)
-    vzip8 %(basereg+2), %(basereg+3)
-    vzip8 %(basereg+0), %(basereg+1)
-.endif
-.endm
-
-/*
- * This is a macro for implementing cache preload. The main idea is that
- * cache preload logic is mostly independent from the rest of pixels
- * processing code. It starts at the top left pixel and moves forward
- * across pixels and can jump across scanlines. Prefetch distance is
- * handled in an 'incremental' way: it starts from 0 and advances to the
- * optimal distance over time. After reaching optimal prefetch distance,
- * it is kept constant. There are some checks which prevent prefetching
- * unneeded pixel lines below the image (but it still can prefetch a bit
- * more data on the right side of the image - not a big issue and may
- * be actually helpful when rendering text glyphs). Additional trick is
- * the use of LDR instruction for prefetch instead of PLD when moving to
- * the next line, the point is that we have a high chance of getting TLB
- * miss in this case, and PLD would be useless.
- *
- * This sounds like it may introduce a noticeable overhead (when working with
- * fully cached data). But in reality, due to having a separate pipeline and
- * instruction queue for NEON unit in ARM Cortex-A8, normal ARM code can
- * execute simultaneously with NEON and be completely shadowed by it. Thus
- * we get no performance overhead at all (*). This looks like a very nice
- * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher,
- * but still can implement some rather advanced prefetch logic in software
- * for almost zero cost!
- *
- * (*) The overhead of the prefetcher is visible when running some trivial
- * pixels processing like simple copy. Anyway, having prefetch is a must
- * when working with the graphics data.
- */
-.macro PF a, x:vararg
-.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_ADVANCED)
-    a x
-.endif
-.endm
-
-.macro cache_preload std_increment, boost_increment
-.if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0)
-.if std_increment != 0
-    PF add PF_X, PF_X, #std_increment
-.endif
-    PF tst PF_CTL, #0xF
-    PF beq 71f
-    PF add PF_X, PF_X, #boost_increment
-    PF sub PF_CTL, PF_CTL, #1
-71:
-    PF cmp PF_X, ORIG_W
-.if src_bpp_shift >= 0
-    PF lsl DUMMY, PF_X, #src_bpp_shift
-    PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
-.endif
-.if dst_r_bpp != 0
-    PF lsl DUMMY, PF_X, #dst_bpp_shift
-    PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
-.endif
-.if mask_bpp_shift >= 0
-    PF lsl DUMMY, PF_X, #mask_bpp_shift
-    PF prfm PREFETCH_MODE, [PF_MASK, DUMMY]
-.endif
-    PF ble 71f
-    PF sub PF_X, PF_X, ORIG_W
-    PF subs PF_CTL, PF_CTL, #0x10
-71:
-    PF ble 72f
-.if src_bpp_shift >= 0
-    PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
-    PF ldrsb DUMMY, [PF_SRC, DUMMY]
-    PF add PF_SRC, PF_SRC, #1
-.endif
-.if dst_r_bpp != 0
-    PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
-    PF ldrsb DUMMY, [PF_DST, DUMMY]
-    PF add PF_DST, PF_DST, #1
-.endif
-.if mask_bpp_shift >= 0
-    PF lsl DUMMY, MASK_STRIDE, #mask_bpp_shift
-    PF ldrsb DUMMY, [PF_MASK, DUMMY]
-    PF add PF_MASK, PF_MASK, #1
-.endif
-72:
-.endif
-.endm
-
-.macro cache_preload_simple
-.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_SIMPLE)
-.if src_bpp > 0
-    prfm PREFETCH_MODE, [SRC, #(PREFETCH_DISTANCE_SIMPLE * src_bpp / 8)]
-.endif
-.if dst_r_bpp > 0
-    prfm PREFETCH_MODE, [DST_R, #(PREFETCH_DISTANCE_SIMPLE * dst_r_bpp / 8)]
-.endif
-.if mask_bpp > 0
-    prfm PREFETCH_MODE, [MASK, #(PREFETCH_DISTANCE_SIMPLE * mask_bpp / 8)]
-.endif
-.endif
-.endm
-
-.macro fetch_mask_pixblock
-    pixld       pixblock_size, mask_bpp, \
-                (mask_basereg - pixblock_size * mask_bpp / 64), MASK
-.endm
-
-/*
- * Macro which is used to process leading pixels until destination
- * pointer is properly aligned (at 16 bytes boundary). When destination
- * buffer uses 16bpp format, this is unnecessary, or even pointless.
- */
-.macro ensure_destination_ptr_alignment process_pixblock_head, \
-                                        process_pixblock_tail, \
-                                        process_pixblock_tail_head
-.if dst_w_bpp != 24
-    tst         DST_R, #0xF
-    beq         52f
-
-.if src_bpp > 0 || mask_bpp > 0 || dst_r_bpp > 0
-.irp lowbit, 1, 2, 4, 8, 16
-local skip1
-.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
-.if lowbit < 16 /* we don't need more than 16-byte alignment */
-    tst         DST_R, #lowbit
-    beq         51f
-.endif
-    pixld_src   (lowbit * 8 / dst_w_bpp), src_bpp, src_basereg, SRC
-    pixld       (lowbit * 8 / dst_w_bpp), mask_bpp, mask_basereg, MASK
-.if dst_r_bpp > 0
-    pixld_a     (lowbit * 8 / dst_r_bpp), dst_r_bpp, dst_r_basereg, DST_R
-.else
-    add         DST_R, DST_R, #lowbit
-.endif
-    PF add      PF_X, PF_X, #(lowbit * 8 / dst_w_bpp)
-    sub         W, W, #(lowbit * 8 / dst_w_bpp)
-51:
-.endif
-.endr
-.endif
-    pixdeinterleave src_bpp, src_basereg
-    pixdeinterleave mask_bpp, mask_basereg
-    pixdeinterleave dst_r_bpp, dst_r_basereg
-
-    process_pixblock_head
-    cache_preload 0, pixblock_size
-    cache_preload_simple
-    process_pixblock_tail
-
-    pixinterleave dst_w_bpp, dst_w_basereg
-
-.irp lowbit, 1, 2, 4, 8, 16
-.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
-.if lowbit < 16 /* we don't need more than 16-byte alignment */
-    tst         DST_W, #lowbit
-    beq         51f
-.endif
-.if src_bpp == 0 && mask_bpp == 0 && dst_r_bpp == 0
-    sub         W, W, #(lowbit * 8 / dst_w_bpp)
-.endif
-    pixst_a     (lowbit * 8 / dst_w_bpp), dst_w_bpp, dst_w_basereg, DST_W
-51:
-.endif
-.endr
-.endif
-52:
-.endm
-
-/*
- * Special code for processing up to (pixblock_size - 1) remaining
- * trailing pixels. As SIMD processing performs operation on
- * pixblock_size pixels, anything smaller than this has to be loaded
- * and stored in a special way. Loading and storing of pixel data is
- * performed in such a way that we fill some 'slots' in the NEON
- * registers (some slots naturally are unused), then perform compositing
- * operation as usual. In the end, the data is taken from these 'slots'
- * and saved to memory.
- *
- * cache_preload_flag - allows to suppress prefetch if
- *                      set to 0
- * dst_aligned_flag   - selects whether destination buffer
- *                      is aligned
- */
-.macro process_trailing_pixels cache_preload_flag, \
-                               dst_aligned_flag, \
-                               process_pixblock_head, \
-                               process_pixblock_tail, \
-                               process_pixblock_tail_head
-    tst         W, #(pixblock_size - 1)
-    beq         52f
-.if src_bpp > 0 || mask_bpp > 0 || dst_r_bpp > 0
-.irp chunk_size, 16, 8, 4, 2, 1
-.if pixblock_size > chunk_size
-    tst         W, #chunk_size
-    beq         51f
-    pixld_src   chunk_size, src_bpp, src_basereg, SRC
-    pixld       chunk_size, mask_bpp, mask_basereg, MASK
-.if dst_aligned_flag != 0
-    pixld_a     chunk_size, dst_r_bpp, dst_r_basereg, DST_R
-.else
-    pixld       chunk_size, dst_r_bpp, dst_r_basereg, DST_R
-.endif
-.if cache_preload_flag != 0
-    PF add      PF_X, PF_X, #chunk_size
-.endif
-51:
-.endif
-.endr
-.endif
-    pixdeinterleave src_bpp, src_basereg
-    pixdeinterleave mask_bpp, mask_basereg
-    pixdeinterleave dst_r_bpp, dst_r_basereg
-
-    process_pixblock_head
-.if cache_preload_flag != 0
-    cache_preload 0, pixblock_size
-    cache_preload_simple
-.endif
-    process_pixblock_tail
-    pixinterleave dst_w_bpp, dst_w_basereg
-.irp chunk_size, 16, 8, 4, 2, 1
-.if pixblock_size > chunk_size
-    tst         W, #chunk_size
-    beq         51f
-.if dst_aligned_flag != 0
-    pixst_a     chunk_size, dst_w_bpp, dst_w_basereg, DST_W
-.else
-    pixst       chunk_size, dst_w_bpp, dst_w_basereg, DST_W
-.endif
-51:
-.endif
-.endr
-52:
-.endm
-
-/*
- * Macro, which performs all the needed operations to switch to the next
- * scanline and start the next loop iteration unless all the scanlines
- * are already processed.
- */
-.macro advance_to_next_scanline start_of_loop_label
-    mov         W, ORIG_W
-    add         DST_W, DST_W, DST_STRIDE, lsl #dst_bpp_shift
-.if src_bpp != 0
-    add         SRC, SRC, SRC_STRIDE, lsl #src_bpp_shift
-.endif
-.if mask_bpp != 0
-    add         MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift
-.endif
-.if (dst_w_bpp != 24)
-    sub         DST_W, DST_W, W, lsl #dst_bpp_shift
-.endif
-.if (src_bpp != 24) && (src_bpp != 0)
-    sub         SRC, SRC, W, lsl #src_bpp_shift
-.endif
-.if (mask_bpp != 24) && (mask_bpp != 0)
-    sub         MASK, MASK, W, lsl #mask_bpp_shift
-.endif
-    subs        H, H, #1
-    mov         DST_R, DST_W
-    bge         start_of_loop_label
-.endm
-
-/*
- * Registers are allocated in the following way by default:
- * v0, v1, v2, v3     - reserved for loading source pixel data
- * v4, v5, v6, v7     - reserved for loading destination pixel data
- * v24, v25, v26, v27 - reserved for loading mask pixel data
- * v28, v29, v30, v31 - final destination pixel data for writeback to memory
- */
-.macro generate_composite_function fname, \
-                                   src_bpp_, \
-                                   mask_bpp_, \
-                                   dst_w_bpp_, \
-                                   flags, \
-                                   pixblock_size_, \
-                                   prefetch_distance, \
-                                   init, \
-                                   cleanup, \
-                                   process_pixblock_head, \
-                                   process_pixblock_tail, \
-                                   process_pixblock_tail_head, \
-                                   dst_w_basereg_ = 28, \
-                                   dst_r_basereg_ = 4, \
-                                   src_basereg_   = 0, \
-                                   mask_basereg_  = 24
-
-    pixman_asm_function fname
-    stp         x29, x30, [sp, -16]!
-    mov         x29, sp
-    sub         sp,   sp, 232  /* push all registers */
-    sub         x29, x29, 64
-    st1         {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], #32
-    st1         {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], #32
-    stp          x8,   x9, [x29, -80]
-    stp         x10,  x11, [x29, -96]
-    stp         x12,  x13, [x29, -112]
-    stp         x14,  x15, [x29, -128]
-    stp         x16,  x17, [x29, -144]
-    stp         x18,  x19, [x29, -160]
-    stp         x20,  x21, [x29, -176]
-    stp         x22,  x23, [x29, -192]
-    stp         x24,  x25, [x29, -208]
-    stp         x26,  x27, [x29, -224]
-    str         x28, [x29, -232]
-
-/*
- * Select prefetch type for this function. If prefetch distance is
- * set to 0 or one of the color formats is 24bpp, SIMPLE prefetch
- * has to be used instead of ADVANCED.
- */
-    .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_DEFAULT
-.if prefetch_distance == 0
-    .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
-.elseif (PREFETCH_TYPE_CURRENT > PREFETCH_TYPE_SIMPLE) && \
-        ((src_bpp_ == 24) || (mask_bpp_ == 24) || (dst_w_bpp_ == 24))
-    .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_SIMPLE
-.endif
-
-/*
- * Make some macro arguments globally visible and accessible
- * from other macros
- */
-    .set src_bpp, src_bpp_
-    .set mask_bpp, mask_bpp_
-    .set dst_w_bpp, dst_w_bpp_
-    .set pixblock_size, pixblock_size_
-    .set dst_w_basereg, dst_w_basereg_
-    .set dst_r_basereg, dst_r_basereg_
-    .set src_basereg, src_basereg_
-    .set mask_basereg, mask_basereg_
-
-    .macro pixld_src x:vararg
-        pixld x
-    .endm
-    .macro fetch_src_pixblock
-        pixld_src   pixblock_size, src_bpp, \
-                    (src_basereg - pixblock_size * src_bpp / 64), SRC
-    .endm
-/*
- * Assign symbolic names to registers
- */
-    W           .req       x0      /* width (is updated during processing) */
-    H           .req       x1      /* height (is updated during processing) */
-    DST_W       .req       x2      /* destination buffer pointer for writes */
-    DST_STRIDE  .req       x3      /* destination image stride */
-    SRC         .req       x4      /* source buffer pointer */
-    SRC_STRIDE  .req       x5      /* source image stride */
-    MASK        .req       x6      /* mask pointer */
-    MASK_STRIDE .req       x7      /* mask stride */
-
-    DST_R       .req       x8      /* destination buffer pointer for reads */
-
-    PF_CTL      .req       x9      /* combined lines counter and prefetch */
-                                    /* distance increment counter */
-    PF_X        .req       x10     /* pixel index in a scanline for current */
-                                    /* pretetch position */
-    PF_SRC      .req       x11     /* pointer to source scanline start */
-                                    /* for prefetch purposes */
-    PF_DST      .req       x12     /* pointer to destination scanline start */
-                                    /* for prefetch purposes */
-    PF_MASK     .req       x13     /* pointer to mask scanline start */
-                                    /* for prefetch purposes */
-
-    ORIG_W      .req       x14     /* saved original width */
-    DUMMY       .req       x15     /* temporary register */
-
-    sxtw        x0, w0
-    sxtw        x1, w1
-    sxtw        x3, w3
-    sxtw        x5, w5
-    sxtw        x7, w7
-
-    .set mask_bpp_shift, -1
-.if src_bpp == 32
-    .set src_bpp_shift, 2
-.elseif src_bpp == 24
-    .set src_bpp_shift, 0
-.elseif src_bpp == 16
-    .set src_bpp_shift, 1
-.elseif src_bpp == 8
-    .set src_bpp_shift, 0
-.elseif src_bpp == 0
-    .set src_bpp_shift, -1
-.else
-    .error "requested src bpp (src_bpp) is not supported"
-.endif
-.if mask_bpp == 32
-    .set mask_bpp_shift, 2
-.elseif mask_bpp == 24
-    .set mask_bpp_shift, 0
-.elseif mask_bpp == 8
-    .set mask_bpp_shift, 0
-.elseif mask_bpp == 0
-    .set mask_bpp_shift, -1
-.else
-    .error "requested mask bpp (mask_bpp) is not supported"
-.endif
-.if dst_w_bpp == 32
-    .set dst_bpp_shift, 2
-.elseif dst_w_bpp == 24
-    .set dst_bpp_shift, 0
-.elseif dst_w_bpp == 16
-    .set dst_bpp_shift, 1
-.elseif dst_w_bpp == 8
-    .set dst_bpp_shift, 0
-.else
-    .error "requested dst bpp (dst_w_bpp) is not supported"
-.endif
-
-.if (((flags) & FLAG_DST_READWRITE) != 0)
-    .set dst_r_bpp, dst_w_bpp
-.else
-    .set dst_r_bpp, 0
-.endif
-.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
-    .set DEINTERLEAVE_32BPP_ENABLED, 1
-.else
-    .set DEINTERLEAVE_32BPP_ENABLED, 0
-.endif
-
-.if prefetch_distance < 0 || prefetch_distance > 15
-    .error "invalid prefetch distance (prefetch_distance)"
-.endif
-
-    PF mov      PF_X, #0
-    mov         DST_R, DST_W
-
-.if src_bpp == 24
-    sub         SRC_STRIDE, SRC_STRIDE, W
-    sub         SRC_STRIDE, SRC_STRIDE, W, lsl #1
-.endif
-.if mask_bpp == 24
-    sub         MASK_STRIDE, MASK_STRIDE, W
-    sub         MASK_STRIDE, MASK_STRIDE, W, lsl #1
-.endif
-.if dst_w_bpp == 24
-    sub         DST_STRIDE, DST_STRIDE, W
-    sub         DST_STRIDE, DST_STRIDE, W, lsl #1
-.endif
-
-/*
- * Setup advanced prefetcher initial state
- */
-    PF mov      PF_SRC, SRC
-    PF mov      PF_DST, DST_R
-    PF mov      PF_MASK, MASK
-    /* PF_CTL = prefetch_distance | ((h - 1) << 4) */
-    PF lsl      DUMMY, H, #4
-    PF mov      PF_CTL, DUMMY
-    PF add      PF_CTL, PF_CTL, #(prefetch_distance - 0x10)
-
-    init
-    subs        H, H, #1
-    mov         ORIG_W, W
-    blt         9f
-    cmp         W, #(pixblock_size * 2)
-    blt         800f
-/*
- * This is the start of the pipelined loop, which if optimized for
- * long scanlines
- */
-0:
-    ensure_destination_ptr_alignment process_pixblock_head, \
-                                     process_pixblock_tail, \
-                                     process_pixblock_tail_head
-
-    /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
-    pixld_a     pixblock_size, dst_r_bpp, \
-                (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
-    fetch_src_pixblock
-    pixld       pixblock_size, mask_bpp, \
-                (mask_basereg - pixblock_size * mask_bpp / 64), MASK
-    PF add      PF_X, PF_X, #pixblock_size
-    process_pixblock_head
-    cache_preload 0, pixblock_size
-    cache_preload_simple
-    subs        W, W, #(pixblock_size * 2)
-    blt         200f
-
-100:
-    process_pixblock_tail_head
-    cache_preload_simple
-    subs        W, W, #pixblock_size
-    bge         100b
-
-200:
-    process_pixblock_tail
-    pixst_a     pixblock_size, dst_w_bpp, \
-                (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
-
-    /* Process the remaining trailing pixels in the scanline */
-    process_trailing_pixels 1, 1, \
-                            process_pixblock_head, \
-                            process_pixblock_tail, \
-                            process_pixblock_tail_head
-    advance_to_next_scanline 0b
-
-    cleanup
-1000:
-    /* pop all registers */
-    sub         x29, x29, 64
-    ld1         {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32
-    ld1         {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32
-    ldp          x8,   x9, [x29, -80]
-    ldp         x10,  x11, [x29, -96]
-    ldp         x12,  x13, [x29, -112]
-    ldp         x14,  x15, [x29, -128]
-    ldp         x16,  x17, [x29, -144]
-    ldp         x18,  x19, [x29, -160]
-    ldp         x20,  x21, [x29, -176]
-    ldp         x22,  x23, [x29, -192]
-    ldp         x24,  x25, [x29, -208]
-    ldp         x26,  x27, [x29, -224]
-    ldr         x28, [x29, -232]
-    mov         sp, x29
-    ldp         x29, x30, [sp], 16
-    ret  /* exit */
-/*
- * This is the start of the loop, designed to process images with small width
- * (less than pixblock_size * 2 pixels). In this case neither pipelining
- * nor prefetch are used.
- */
-800:
-.if src_bpp_shift >= 0
-    PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
-    PF prfm PREFETCH_MODE, [SRC, DUMMY]
-.endif
-.if dst_r_bpp != 0
-    PF lsl  DUMMY, DST_STRIDE, #dst_bpp_shift
-    PF prfm PREFETCH_MODE, [DST_R, DUMMY]
-.endif
-.if mask_bpp_shift >= 0
-    PF lsl  DUMMY, MASK_STRIDE, #mask_bpp_shift
-    PF prfm PREFETCH_MODE, [MASK, DUMMY]
-.endif
-    /* Process exactly pixblock_size pixels if needed */
-    tst         W, #pixblock_size
-    beq         100f
-    pixld       pixblock_size, dst_r_bpp, \
-                (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
-    fetch_src_pixblock
-    pixld       pixblock_size, mask_bpp, \
-                (mask_basereg - pixblock_size * mask_bpp / 64), MASK
-    process_pixblock_head
-    process_pixblock_tail
-    pixst       pixblock_size, dst_w_bpp, \
-                (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
-100:
-    /* Process the remaining trailing pixels in the scanline */
-    process_trailing_pixels 0, 0, \
-                            process_pixblock_head, \
-                            process_pixblock_tail, \
-                            process_pixblock_tail_head
-    advance_to_next_scanline 800b
-9:
-    cleanup
-    /* pop all registers */
-    sub         x29, x29, 64
-    ld1         {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32
-    ld1         {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32
-    ldp          x8,   x9, [x29, -80]
-    ldp         x10,  x11, [x29, -96]
-    ldp         x12,  x13, [x29, -112]
-    ldp         x14,  x15, [x29, -128]
-    ldp         x16,  x17, [x29, -144]
-    ldp         x18,  x19, [x29, -160]
-    ldp         x20,  x21, [x29, -176]
-    ldp         x22,  x23, [x29, -192]
-    ldp         x24,  x25, [x29, -208]
-    ldp         x26,  x27, [x29, -224]
-    ldr         x28, [x29, -232]
-    mov         sp, x29
-    ldp         x29, x30, [sp], 16
-    ret  /* exit */
-
-    .purgem     fetch_src_pixblock
-    .purgem     pixld_src
-
-    .unreq      SRC
-    .unreq      MASK
-    .unreq      DST_R
-    .unreq      DST_W
-    .unreq      ORIG_W
-    .unreq      W
-    .unreq      H
-    .unreq      SRC_STRIDE
-    .unreq      DST_STRIDE
-    .unreq      MASK_STRIDE
-    .unreq      PF_CTL
-    .unreq      PF_X
-    .unreq      PF_SRC
-    .unreq      PF_DST
-    .unreq      PF_MASK
-    .unreq      DUMMY
-    .endfunc
-.endm
-
-/*
- * A simplified variant of function generation template for a single
- * scanline processing (for implementing pixman combine functions)
- */
-.macro generate_composite_function_scanline        use_nearest_scaling, \
-                                                   fname, \
-                                                   src_bpp_, \
-                                                   mask_bpp_, \
-                                                   dst_w_bpp_, \
-                                                   flags, \
-                                                   pixblock_size_, \
-                                                   init, \
-                                                   cleanup, \
-                                                   process_pixblock_head, \
-                                                   process_pixblock_tail, \
-                                                   process_pixblock_tail_head, \
-                                                   dst_w_basereg_ = 28, \
-                                                   dst_r_basereg_ = 4, \
-                                                   src_basereg_   = 0, \
-                                                   mask_basereg_  = 24
-
-    pixman_asm_function fname
-    .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
-
-/*
- * Make some macro arguments globally visible and accessible
- * from other macros
- */
-    .set src_bpp, src_bpp_
-    .set mask_bpp, mask_bpp_
-    .set dst_w_bpp, dst_w_bpp_
-    .set pixblock_size, pixblock_size_
-    .set dst_w_basereg, dst_w_basereg_
-    .set dst_r_basereg, dst_r_basereg_
-    .set src_basereg, src_basereg_
-    .set mask_basereg, mask_basereg_
-
-.if use_nearest_scaling != 0
-    /*
-     * Assign symbolic names to registers for nearest scaling
-     */
-    W           .req        x0
-    DST_W       .req        x1
-    SRC         .req        x2
-    VX          .req        x3
-    UNIT_X      .req        x4
-    SRC_WIDTH_FIXED .req    x5
-    MASK        .req        x6
-    TMP1        .req        x8
-    TMP2        .req        x9
-    DST_R       .req        x10
-    DUMMY       .req        x30
-
-    .macro pixld_src x:vararg
-        pixld_s x
-    .endm
-
-    sxtw        x0, w0
-    sxtw        x3, w3
-    sxtw        x4, w4
-    sxtw        x5, w5
-
-    stp         x29, x30, [sp, -16]!
-    mov         x29, sp
-    sub         sp, sp, 88
-    sub         x29, x29, 64
-    st1         {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32
-    st1         {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32
-    stp         x8, x9, [x29, -80]
-    str         x10, [x29, -88]
-.else
-    /*
-     * Assign symbolic names to registers
-     */
-    W           .req        x0      /* width (is updated during processing) */
-    DST_W       .req        x1      /* destination buffer pointer for writes */
-    SRC         .req        x2      /* source buffer pointer */
-    MASK        .req        x3      /* mask pointer */
-    DST_R       .req        x4      /* destination buffer pointer for reads */
-    DUMMY       .req        x30
-
-    .macro pixld_src x:vararg
-        pixld x
-    .endm
-
-    sxtw        x0, w0
-
-    stp         x29, x30, [sp, -16]!
-    mov         x29, sp
-    sub         sp, sp, 64
-    sub         x29, x29, 64
-    st1         {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32
-    st1         {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32
-.endif
-
-.if (((flags) & FLAG_DST_READWRITE) != 0)
-    .set dst_r_bpp, dst_w_bpp
-.else
-    .set dst_r_bpp, 0
-.endif
-.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
-    .set DEINTERLEAVE_32BPP_ENABLED, 1
-.else
-    .set DEINTERLEAVE_32BPP_ENABLED, 0
-.endif
-
-    .macro fetch_src_pixblock
-        pixld_src   pixblock_size, src_bpp, \
-                    (src_basereg - pixblock_size * src_bpp / 64), SRC
-    .endm
-
-    init
-    mov         DST_R, DST_W
-
-    cmp         W, #pixblock_size
-    blt         800f
-
-    ensure_destination_ptr_alignment process_pixblock_head, \
-                                     process_pixblock_tail, \
-                                     process_pixblock_tail_head
-
-    subs        W, W, #pixblock_size
-    blt         700f
-
-    /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
-    pixld_a     pixblock_size, dst_r_bpp, \
-                (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
-    fetch_src_pixblock
-    pixld       pixblock_size, mask_bpp, \
-                (mask_basereg - pixblock_size * mask_bpp / 64), MASK
-    process_pixblock_head
-    subs        W, W, #pixblock_size
-    blt         200f
-100:
-    process_pixblock_tail_head
-    subs        W, W, #pixblock_size
-    bge         100b
-200:
-    process_pixblock_tail
-    pixst_a     pixblock_size, dst_w_bpp, \
-                (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
-700:
-    /* Process the remaining trailing pixels in the scanline (dst aligned) */
-    process_trailing_pixels 0, 1, \
-                            process_pixblock_head, \
-                            process_pixblock_tail, \
-                            process_pixblock_tail_head
-
-    cleanup
-.if use_nearest_scaling != 0
-    sub         x29, x29, 64
-    ld1         {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32
-    ld1         {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32
-    ldp         x8, x9, [x29, -80]
-    ldr         x10, [x29, -96]
-    mov         sp, x29
-    ldp         x29, x30, [sp], 16
-    ret  /* exit */
-.else
-    sub         x29, x29, 64
-    ld1         {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32
-    ld1         {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32
-    mov         sp, x29
-    ldp         x29, x30, [sp], 16
-    ret  /* exit */
-.endif
-800:
-    /* Process the remaining trailing pixels in the scanline (dst unaligned) */
-    process_trailing_pixels 0, 0, \
-                            process_pixblock_head, \
-                            process_pixblock_tail, \
-                            process_pixblock_tail_head
-
-    cleanup
-.if use_nearest_scaling != 0
-    sub         x29, x29, 64
-    ld1         {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32
-    ld1         {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32
-    ldp         x8, x9, [x29, -80]
-    ldr         x10, [x29, -88]
-    mov         sp, x29
-    ldp         x29, x30, [sp], 16
-    ret  /* exit */
-
-    .unreq      DUMMY
-    .unreq      DST_R
-    .unreq      SRC
-    .unreq      W
-    .unreq      VX
-    .unreq      UNIT_X
-    .unreq      TMP1
-    .unreq      TMP2
-    .unreq      DST_W
-    .unreq      MASK
-    .unreq      SRC_WIDTH_FIXED
-
-.else
-    sub         x29, x29, 64
-    ld1         {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32
-    ld1         {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32
-    mov          sp, x29
-    ldp          x29, x30, [sp], 16
-    ret  /* exit */
-
-    .unreq      DUMMY
-    .unreq      SRC
-    .unreq      MASK
-    .unreq      DST_R
-    .unreq      DST_W
-    .unreq      W
-.endif
-
-    .purgem     fetch_src_pixblock
-    .purgem     pixld_src
-
-    .endfunc
-.endm
-
-.macro generate_composite_function_single_scanline x:vararg
-    generate_composite_function_scanline 0, x
-.endm
-
-.macro generate_composite_function_nearest_scanline x:vararg
-    generate_composite_function_scanline 1, x
-.endm
-
-/* Default prologue/epilogue, nothing special needs to be done */
-
-.macro default_init
-.endm
-
-.macro default_cleanup
-.endm
-
-/*
- * Prologue/epilogue variant which additionally saves/restores v8-v15
- * registers (they need to be saved/restored by callee according to ABI).
- * This is required if the code needs to use all the NEON registers.
- */
-
-.macro default_init_need_all_regs
-.endm
-
-.macro default_cleanup_need_all_regs
-.endm
-
-/******************************************************************************/
-
-/*
- * Conversion of 8 r5g6b6 pixels packed in 128-bit register (in)
- * into a planar a8r8g8b8 format (with a, r, g, b color components
- * stored into 64-bit registers out_a, out_r, out_g, out_b respectively).
- *
- * Warning: the conversion is destructive and the original
- *          value (in) is lost.
- */
-.macro convert_0565_to_8888 in, out_a, out_r, out_g, out_b
-    shrn        &out_r&.8b, &in&.8h,    #8
-    shrn        &out_g&.8b, &in&.8h,    #3
-    sli         &in&.8h,    &in&.8h,    #5
-    movi        &out_a&.8b, #255
-    sri         &out_r&.8b, &out_r&.8b, #5
-    sri         &out_g&.8b, &out_g&.8b, #6
-    shrn        &out_b&.8b, &in&.8h,    #2
-.endm
-
-.macro convert_0565_to_x888 in, out_r, out_g, out_b
-    shrn        &out_r&.8b, &in&.8h,    #8
-    shrn        &out_g&.8b, &in&.8h,    #3
-    sli         &in&.8h,    &in&.8h,    #5
-    sri         &out_r&.8b, &out_r&.8b, #5
-    sri         &out_g&.8b, &out_g&.8b, #6
-    shrn        &out_b&.8b, &in&.8h,    #2
-.endm
-
-/*
- * Conversion from planar a8r8g8b8 format (with a, r, g, b color components
- * in 64-bit registers in_a, in_r, in_g, in_b respectively) into 8 r5g6b6
- * pixels packed in 128-bit register (out). Requires two temporary 128-bit
- * registers (tmp1, tmp2)
- */
-.macro convert_8888_to_0565 in_r, in_g, in_b, out, tmp1, tmp2
-    ushll       &tmp1&.8h, &in_g&.8b, #7
-    shl         &tmp1&.8h, &tmp1&.8h, #1
-    ushll       &out&.8h,  &in_r&.8b, #7
-    shl         &out&.8h,  &out&.8h,  #1
-    ushll       &tmp2&.8h, &in_b&.8b, #7
-    shl         &tmp2&.8h, &tmp2&.8h, #1
-    sri         &out&.8h, &tmp1&.8h, #5
-    sri         &out&.8h, &tmp2&.8h, #11
-.endm
-
-/*
- * Conversion of four r5g6b5 pixels (in) to four x8r8g8b8 pixels
- * returned in (out0, out1) registers pair. Requires one temporary
- * 64-bit register (tmp). 'out1' and 'in' may overlap, the original
- * value from 'in' is lost
- */
-.macro convert_four_0565_to_x888_packed in, out0, out1, tmp
-    shl         &out0&.4h, &in&.4h,   #5  /* G top 6 bits */
-    shl         &tmp&.4h,  &in&.4h,   #11 /* B top 5 bits */
-    sri         &in&.4h,   &in&.4h,   #5  /* R is ready in top bits */
-    sri         &out0&.4h, &out0&.4h, #6  /* G is ready in top bits */
-    sri         &tmp&.4h,  &tmp&.4h,  #5  /* B is ready in top bits */
-    ushr        &out1&.4h, &in&.4h,   #8  /* R is in place */
-    sri         &out0&.4h, &tmp&.4h,  #8  /* G & B is in place */
-    zip1        &tmp&.4h,  &out0&.4h, &out1&.4h  /* everything is in place */
-    zip2        &out1&.4h, &out0&.4h, &out1&.4h
-    mov         &out0&.d[0], &tmp&.d[0]
-.endm
diff --git a/vendor/pixman/pixman/pixman-bits-image.c b/vendor/pixman/pixman/pixman-bits-image.c
deleted file mode 100644
index 1698d7309..000000000
--- a/vendor/pixman/pixman/pixman-bits-image.c
+++ /dev/null
@@ -1,1383 +0,0 @@
-/*
- * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
- *             2005 Lars Knoll & Zack Rusin, Trolltech
- *             2008 Aaron Plattner, NVIDIA Corporation
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007, 2009 Red Hat, Inc.
- * Copyright © 2008 André Tupinambá <andrelrt@gmail.com>
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Keith Packard makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "pixman-private.h"
-#include "pixman-combine32.h"
-#include "pixman-inlines.h"
-#include "dither/blue-noise-64x64.h"
-
-/* Fetch functions */
-
-static force_inline void
-fetch_pixel_no_alpha_32 (bits_image_t *image,
-			 int x, int y, pixman_bool_t check_bounds,
-			 void *out)
-{
-    uint32_t *ret = out;
-
-    if (check_bounds &&
-	(x < 0 || x >= image->width || y < 0 || y >= image->height))
-	*ret = 0;
-    else
-	*ret = image->fetch_pixel_32 (image, x, y);
-}
-
-static force_inline void
-fetch_pixel_no_alpha_float (bits_image_t *image,
-			    int x, int y, pixman_bool_t check_bounds,
-			    void *out)
-{
-    argb_t *ret = out;
-
-    if (check_bounds &&
-	(x < 0 || x >= image->width || y < 0 || y >= image->height))
-	ret->a = ret->r = ret->g = ret->b = 0.f;
-    else
-	*ret = image->fetch_pixel_float (image, x, y);
-}
-
-typedef void (* get_pixel_t) (bits_image_t *image,
-			      int x, int y, pixman_bool_t check_bounds, void *out);
-
-static force_inline void
-bits_image_fetch_pixel_nearest (bits_image_t   *image,
-				pixman_fixed_t  x,
-				pixman_fixed_t  y,
-				get_pixel_t	get_pixel,
-				void	       *out)
-{
-    int x0 = pixman_fixed_to_int (x - pixman_fixed_e);
-    int y0 = pixman_fixed_to_int (y - pixman_fixed_e);
-
-    if (image->common.repeat != PIXMAN_REPEAT_NONE)
-    {
-	repeat (image->common.repeat, &x0, image->width);
-	repeat (image->common.repeat, &y0, image->height);
-
-	get_pixel (image, x0, y0, FALSE, out);
-    }
-    else
-    {
-	get_pixel (image, x0, y0, TRUE, out);
-    }
-}
-
-static force_inline void
-bits_image_fetch_pixel_bilinear_32 (bits_image_t   *image,
-				    pixman_fixed_t  x,
-				    pixman_fixed_t  y,
-				    get_pixel_t	    get_pixel,
-				    void	   *out)
-{
-    pixman_repeat_t repeat_mode = image->common.repeat;
-    int width = image->width;
-    int height = image->height;
-    int x1, y1, x2, y2;
-    uint32_t tl, tr, bl, br;
-    int32_t distx, disty;
-    uint32_t *ret = out;
-
-    x1 = x - pixman_fixed_1 / 2;
-    y1 = y - pixman_fixed_1 / 2;
-
-    distx = pixman_fixed_to_bilinear_weight (x1);
-    disty = pixman_fixed_to_bilinear_weight (y1);
-
-    x1 = pixman_fixed_to_int (x1);
-    y1 = pixman_fixed_to_int (y1);
-    x2 = x1 + 1;
-    y2 = y1 + 1;
-
-    if (repeat_mode != PIXMAN_REPEAT_NONE)
-    {
-	repeat (repeat_mode, &x1, width);
-	repeat (repeat_mode, &y1, height);
-	repeat (repeat_mode, &x2, width);
-	repeat (repeat_mode, &y2, height);
-
-	get_pixel (image, x1, y1, FALSE, &tl);
-	get_pixel (image, x2, y1, FALSE, &tr);
-	get_pixel (image, x1, y2, FALSE, &bl);
-	get_pixel (image, x2, y2, FALSE, &br);
-    }
-    else
-    {
-	get_pixel (image, x1, y1, TRUE, &tl);
-	get_pixel (image, x2, y1, TRUE, &tr);
-	get_pixel (image, x1, y2, TRUE, &bl);
-	get_pixel (image, x2, y2, TRUE, &br);
-    }
-
-    *ret = bilinear_interpolation (tl, tr, bl, br, distx, disty);
-}
-
-static force_inline void
-bits_image_fetch_pixel_bilinear_float (bits_image_t   *image,
-				       pixman_fixed_t  x,
-				       pixman_fixed_t  y,
-				       get_pixel_t     get_pixel,
-				       void	      *out)
-{
-    pixman_repeat_t repeat_mode = image->common.repeat;
-    int width = image->width;
-    int height = image->height;
-    int x1, y1, x2, y2;
-    argb_t tl, tr, bl, br;
-    float distx, disty;
-    argb_t *ret = out;
-
-    x1 = x - pixman_fixed_1 / 2;
-    y1 = y - pixman_fixed_1 / 2;
-
-    distx = ((float)pixman_fixed_fraction(x1)) / 65536.f;
-    disty = ((float)pixman_fixed_fraction(y1)) / 65536.f;
-
-    x1 = pixman_fixed_to_int (x1);
-    y1 = pixman_fixed_to_int (y1);
-    x2 = x1 + 1;
-    y2 = y1 + 1;
-
-    if (repeat_mode != PIXMAN_REPEAT_NONE)
-    {
-	repeat (repeat_mode, &x1, width);
-	repeat (repeat_mode, &y1, height);
-	repeat (repeat_mode, &x2, width);
-	repeat (repeat_mode, &y2, height);
-
-	get_pixel (image, x1, y1, FALSE, &tl);
-	get_pixel (image, x2, y1, FALSE, &tr);
-	get_pixel (image, x1, y2, FALSE, &bl);
-	get_pixel (image, x2, y2, FALSE, &br);
-    }
-    else
-    {
-	get_pixel (image, x1, y1, TRUE, &tl);
-	get_pixel (image, x2, y1, TRUE, &tr);
-	get_pixel (image, x1, y2, TRUE, &bl);
-	get_pixel (image, x2, y2, TRUE, &br);
-    }
-
-    *ret = bilinear_interpolation_float (tl, tr, bl, br, distx, disty);
-}
-
-static force_inline void accum_32(unsigned int *satot, unsigned int *srtot,
-				  unsigned int *sgtot, unsigned int *sbtot,
-				  const void *p, pixman_fixed_t f)
-{
-    uint32_t pixel = *(uint32_t *)p;
-
-    *srtot += (int)RED_8 (pixel) * f;
-    *sgtot += (int)GREEN_8 (pixel) * f;
-    *sbtot += (int)BLUE_8 (pixel) * f;
-    *satot += (int)ALPHA_8 (pixel) * f;
-}
-
-static force_inline void reduce_32(unsigned int satot, unsigned int srtot,
-				   unsigned int sgtot, unsigned int sbtot,
-                                   void *p)
-{
-    uint32_t *ret = p;
-
-    satot = (int32_t)(satot + 0x8000) / 65536;
-    srtot = (int32_t)(srtot + 0x8000) / 65536;
-    sgtot = (int32_t)(sgtot + 0x8000) / 65536;
-    sbtot = (int32_t)(sbtot + 0x8000) / 65536;
-
-    satot = CLIP ((int32_t)satot, 0, 0xff);
-    srtot = CLIP ((int32_t)srtot, 0, 0xff);
-    sgtot = CLIP ((int32_t)sgtot, 0, 0xff);
-    sbtot = CLIP ((int32_t)sbtot, 0, 0xff);
-
-    *ret = ((satot << 24) | (srtot << 16) | (sgtot <<  8) | (sbtot));
-}
-
-static force_inline void accum_float(unsigned int *satot, unsigned int *srtot,
-				     unsigned int *sgtot, unsigned int *sbtot,
-				     const void *p, pixman_fixed_t f)
-{
-    const argb_t *pixel = p;
-
-    *satot += pixel->a * f;
-    *srtot += pixel->r * f;
-    *sgtot += pixel->g * f;
-    *sbtot += pixel->b * f;
-}
-
-static force_inline void reduce_float(unsigned int satot, unsigned int srtot,
-				      unsigned int sgtot, unsigned int sbtot,
-				      void *p)
-{
-    argb_t *ret = p;
-
-    ret->a = CLIP ((int32_t)satot / 65536.f, 0.f, 1.f);
-    ret->r = CLIP ((int32_t)srtot / 65536.f, 0.f, 1.f);
-    ret->g = CLIP ((int32_t)sgtot / 65536.f, 0.f, 1.f);
-    ret->b = CLIP ((int32_t)sbtot / 65536.f, 0.f, 1.f);
-}
-
-typedef void (* accumulate_pixel_t) (unsigned int *satot, unsigned int *srtot,
-				     unsigned int *sgtot, unsigned int *sbtot,
-				     const void *pixel, pixman_fixed_t f);
-
-typedef void (* reduce_pixel_t) (unsigned int satot, unsigned int srtot,
-				 unsigned int sgtot, unsigned int sbtot,
-                                 void *out);
-
-static force_inline void
-bits_image_fetch_pixel_convolution (bits_image_t   *image,
-				    pixman_fixed_t  x,
-				    pixman_fixed_t  y,
-				    get_pixel_t     get_pixel,
-				    void	      *out,
-				    accumulate_pixel_t accum,
-				    reduce_pixel_t reduce)
-{
-    pixman_fixed_t *params = image->common.filter_params;
-    int x_off = (params[0] - pixman_fixed_1) >> 1;
-    int y_off = (params[1] - pixman_fixed_1) >> 1;
-    int32_t cwidth = pixman_fixed_to_int (params[0]);
-    int32_t cheight = pixman_fixed_to_int (params[1]);
-    int32_t i, j, x1, x2, y1, y2;
-    pixman_repeat_t repeat_mode = image->common.repeat;
-    int width = image->width;
-    int height = image->height;
-    unsigned int srtot, sgtot, sbtot, satot;
-
-    params += 2;
-
-    x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
-    y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
-    x2 = x1 + cwidth;
-    y2 = y1 + cheight;
-
-    srtot = sgtot = sbtot = satot = 0;
-
-    for (i = y1; i < y2; ++i)
-    {
-	for (j = x1; j < x2; ++j)
-	{
-	    int rx = j;
-	    int ry = i;
-
-	    pixman_fixed_t f = *params;
-
-	    if (f)
-	    {
-		/* Must be big enough to hold a argb_t */
-		argb_t pixel;
-
-		if (repeat_mode != PIXMAN_REPEAT_NONE)
-		{
-		    repeat (repeat_mode, &rx, width);
-		    repeat (repeat_mode, &ry, height);
-
-		    get_pixel (image, rx, ry, FALSE, &pixel);
-		}
-		else
-		{
-		    get_pixel (image, rx, ry, TRUE, &pixel);
-		}
-
-		accum (&satot, &srtot, &sgtot, &sbtot, &pixel, f);
-	    }
-
-	    params++;
-	}
-    }
-
-    reduce (satot, srtot, sgtot, sbtot, out);
-}
-
-static void
-bits_image_fetch_pixel_separable_convolution (bits_image_t  *image,
-					      pixman_fixed_t x,
-					      pixman_fixed_t y,
-					      get_pixel_t    get_pixel,
-					      void	    *out,
-					      accumulate_pixel_t accum,
-					      reduce_pixel_t     reduce)
-{
-    pixman_fixed_t *params = image->common.filter_params;
-    pixman_repeat_t repeat_mode = image->common.repeat;
-    int width = image->width;
-    int height = image->height;
-    int cwidth = pixman_fixed_to_int (params[0]);
-    int cheight = pixman_fixed_to_int (params[1]);
-    int x_phase_bits = pixman_fixed_to_int (params[2]);
-    int y_phase_bits = pixman_fixed_to_int (params[3]);
-    int x_phase_shift = 16 - x_phase_bits;
-    int y_phase_shift = 16 - y_phase_bits;
-    int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1;
-    int y_off = ((cheight << 16) - pixman_fixed_1) >> 1;
-    pixman_fixed_t *y_params;
-    unsigned int srtot, sgtot, sbtot, satot;
-    int32_t x1, x2, y1, y2;
-    int32_t px, py;
-    int i, j;
-
-    /* Round x and y to the middle of the closest phase before continuing. This
-     * ensures that the convolution matrix is aligned right, since it was
-     * positioned relative to a particular phase (and not relative to whatever
-     * exact fraction we happen to get here).
-     */
-    x = ((x >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1);
-    y = ((y >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1);
-
-    px = (x & 0xffff) >> x_phase_shift;
-    py = (y & 0xffff) >> y_phase_shift;
-
-    y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight;
-
-    x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
-    y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
-    x2 = x1 + cwidth;
-    y2 = y1 + cheight;
-
-    srtot = sgtot = sbtot = satot = 0;
-
-    for (i = y1; i < y2; ++i)
-    {
-        pixman_fixed_48_16_t fy = *y_params++;
-        pixman_fixed_t *x_params = params + 4 + px * cwidth;
-
-        if (fy)
-        {
-            for (j = x1; j < x2; ++j)
-            {
-                pixman_fixed_t fx = *x_params++;
-		int rx = j;
-		int ry = i;
-
-                if (fx)
-                {
-                    /* Must be big enough to hold a argb_t */
-                    argb_t pixel;
-                    pixman_fixed_t f;
-
-                    if (repeat_mode != PIXMAN_REPEAT_NONE)
-                    {
-                        repeat (repeat_mode, &rx, width);
-                        repeat (repeat_mode, &ry, height);
-
-                        get_pixel (image, rx, ry, FALSE, &pixel);
-                    }
-                    else
-                    {
-                        get_pixel (image, rx, ry, TRUE, &pixel);
-		    }
-
-                    f = (fy * fx + 0x8000) >> 16;
-
-		    accum(&satot, &srtot, &sgtot, &sbtot, &pixel, f);
-                }
-            }
-	}
-    }
-
-
-    reduce(satot, srtot, sgtot, sbtot, out);
-}
-
-static force_inline void
-bits_image_fetch_pixel_filtered (bits_image_t  *image,
-				 pixman_bool_t  wide,
-				 pixman_fixed_t x,
-				 pixman_fixed_t y,
-				 get_pixel_t    get_pixel,
-				 void          *out)
-{
-    switch (image->common.filter)
-    {
-    case PIXMAN_FILTER_NEAREST:
-    case PIXMAN_FILTER_FAST:
-	bits_image_fetch_pixel_nearest (image, x, y, get_pixel, out);
-	break;
-
-    case PIXMAN_FILTER_BILINEAR:
-    case PIXMAN_FILTER_GOOD:
-    case PIXMAN_FILTER_BEST:
-	if (wide)
-	    bits_image_fetch_pixel_bilinear_float (image, x, y, get_pixel, out);
-	else
-	    bits_image_fetch_pixel_bilinear_32 (image, x, y, get_pixel, out);
-	break;
-
-    case PIXMAN_FILTER_CONVOLUTION:
-	if (wide)
-	{
-	    bits_image_fetch_pixel_convolution (image, x, y,
-						get_pixel, out,
-						accum_float,
-						reduce_float);
-	}
-	else
-	{
-	    bits_image_fetch_pixel_convolution (image, x, y,
-						get_pixel, out,
-						accum_32, reduce_32);
-	}
-	break;
-
-    case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
-	if (wide)
-	{
-	    bits_image_fetch_pixel_separable_convolution (image, x, y,
-							  get_pixel, out,
-							  accum_float,
-							  reduce_float);
-	}
-	else
-	{
-	    bits_image_fetch_pixel_separable_convolution (image, x, y,
-							  get_pixel, out,
-							  accum_32, reduce_32);
-	}
-        break;
-
-    default:
-	assert (0);
-        break;
-    }
-}
-
-static uint32_t *
-__bits_image_fetch_affine_no_alpha (pixman_iter_t *  iter,
-				    pixman_bool_t    wide,
-				    const uint32_t * mask)
-{
-    pixman_image_t *image  = iter->image;
-    int             offset = iter->x;
-    int             line   = iter->y++;
-    int             width  = iter->width;
-    uint32_t *      buffer = iter->buffer;
-
-    const uint32_t wide_zero[4] = {0};
-    pixman_fixed_t x, y;
-    pixman_fixed_t ux, uy;
-    pixman_vector_t v;
-    int i;
-    get_pixel_t get_pixel =
-	wide ? fetch_pixel_no_alpha_float : fetch_pixel_no_alpha_32;
-
-    /* reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
-    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
-    v.vector[2] = pixman_fixed_1;
-
-    if (image->common.transform)
-    {
-	if (!pixman_transform_point_3d (image->common.transform, &v))
-	    return iter->buffer;
-
-	ux = image->common.transform->matrix[0][0];
-	uy = image->common.transform->matrix[1][0];
-    }
-    else
-    {
-	ux = pixman_fixed_1;
-	uy = 0;
-    }
-
-    x = v.vector[0];
-    y = v.vector[1];
-
-    for (i = 0; i < width; ++i)
-    {
-	if (!mask || (!wide && mask[i]) ||
-	    (wide && memcmp(&mask[4 * i], wide_zero, 16) != 0))
-	{
-	    bits_image_fetch_pixel_filtered (
-		&image->bits, wide, x, y, get_pixel, buffer);
-	}
-
-	x += ux;
-	y += uy;
-	buffer += wide ? 4 : 1;
-    }
-
-    return iter->buffer;
-}
-
-static uint32_t *
-bits_image_fetch_affine_no_alpha_32 (pixman_iter_t  *iter,
-				     const uint32_t *mask)
-{
-    return __bits_image_fetch_affine_no_alpha(iter, FALSE, mask);
-}
-
-static uint32_t *
-bits_image_fetch_affine_no_alpha_float (pixman_iter_t  *iter,
-					const uint32_t *mask)
-{
-    return __bits_image_fetch_affine_no_alpha(iter, TRUE, mask);
-}
-
-/* General fetcher */
-static force_inline void
-fetch_pixel_general_32 (bits_image_t *image,
-			int x, int y, pixman_bool_t check_bounds,
-			void *out)
-{
-    uint32_t pixel, *ret = out;
-
-    if (check_bounds &&
-	(x < 0 || x >= image->width || y < 0 || y >= image->height))
-    {
-	*ret = 0;
-	return;
-    }
-
-    pixel = image->fetch_pixel_32 (image, x, y);
-
-    if (image->common.alpha_map)
-    {
-	uint32_t pixel_a;
-
-	x -= image->common.alpha_origin_x;
-	y -= image->common.alpha_origin_y;
-
-	if (x < 0 || x >= image->common.alpha_map->width ||
-	    y < 0 || y >= image->common.alpha_map->height)
-	{
-	    pixel_a = 0;
-	}
-	else
-	{
-	    pixel_a = image->common.alpha_map->fetch_pixel_32 (
-		image->common.alpha_map, x, y);
-
-	    pixel_a = ALPHA_8 (pixel_a);
-	}
-
-	pixel &= 0x00ffffff;
-	pixel |= (pixel_a << 24);
-    }
-
-    *ret = pixel;
-}
-
-static force_inline void
-fetch_pixel_general_float (bits_image_t *image,
-			int x, int y, pixman_bool_t check_bounds,
-			void *out)
-{
-    argb_t *ret = out;
-
-    if (check_bounds &&
-	(x < 0 || x >= image->width || y < 0 || y >= image->height))
-    {
-	ret->a = ret->r = ret->g = ret->b = 0;
-	return;
-    }
-
-    *ret = image->fetch_pixel_float (image, x, y);
-
-    if (image->common.alpha_map)
-    {
-	x -= image->common.alpha_origin_x;
-	y -= image->common.alpha_origin_y;
-
-	if (x < 0 || x >= image->common.alpha_map->width ||
-	    y < 0 || y >= image->common.alpha_map->height)
-	{
-	    ret->a = 0.f;
-	}
-	else
-	{
-	    argb_t alpha;
-
-	    alpha = image->common.alpha_map->fetch_pixel_float (
-		    image->common.alpha_map, x, y);
-
-	    ret->a = alpha.a;
-	}
-    }
-}
-
-static uint32_t *
-__bits_image_fetch_general (pixman_iter_t  *iter,
-			    pixman_bool_t wide,
-			    const uint32_t *mask)
-{
-    pixman_image_t *image  = iter->image;
-    int             offset = iter->x;
-    int             line   = iter->y++;
-    int             width  = iter->width;
-    uint32_t *      buffer = iter->buffer;
-    get_pixel_t     get_pixel =
-	wide ? fetch_pixel_general_float : fetch_pixel_general_32;
-
-    const uint32_t wide_zero[4] = {0};
-    pixman_fixed_t x, y, w;
-    pixman_fixed_t ux, uy, uw;
-    pixman_vector_t v;
-    int i;
-
-    /* reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
-    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
-    v.vector[2] = pixman_fixed_1;
-
-    if (image->common.transform)
-    {
-	if (!pixman_transform_point_3d (image->common.transform, &v))
-	    return buffer;
-
-	ux = image->common.transform->matrix[0][0];
-	uy = image->common.transform->matrix[1][0];
-	uw = image->common.transform->matrix[2][0];
-    }
-    else
-    {
-	ux = pixman_fixed_1;
-	uy = 0;
-	uw = 0;
-    }
-
-    x = v.vector[0];
-    y = v.vector[1];
-    w = v.vector[2];
-
-    for (i = 0; i < width; ++i)
-    {
-	pixman_fixed_t x0, y0;
-
-	if (!mask || (!wide && mask[i]) ||
-	    (wide && memcmp(&mask[4 * i], wide_zero, 16) != 0))
-	{
-	    if (w != 0)
-	    {
-		x0 = ((uint64_t)x << 16) / w;
-		y0 = ((uint64_t)y << 16) / w;
-	    }
-	    else
-	    {
-		x0 = 0;
-		y0 = 0;
-	    }
-
-	    bits_image_fetch_pixel_filtered (
-		&image->bits, wide, x0, y0, get_pixel, buffer);
-	}
-
-	x += ux;
-	y += uy;
-	w += uw;
-	buffer += wide ? 4 : 1;
-    }
-
-    return iter->buffer;
-}
-
-static uint32_t *
-bits_image_fetch_general_32 (pixman_iter_t  *iter,
-			     const uint32_t *mask)
-{
-    return __bits_image_fetch_general(iter, FALSE, mask);
-}
-
-static uint32_t *
-bits_image_fetch_general_float (pixman_iter_t  *iter,
-				const uint32_t *mask)
-{
-    return __bits_image_fetch_general(iter, TRUE, mask);
-}
-
-static void
-replicate_pixel_32 (bits_image_t *   bits,
-		    int              x,
-		    int              y,
-		    int              width,
-		    uint32_t *       buffer)
-{
-    uint32_t color;
-    uint32_t *end;
-
-    color = bits->fetch_pixel_32 (bits, x, y);
-
-    end = buffer + width;
-    while (buffer < end)
-	*(buffer++) = color;
-}
-
-static void
-replicate_pixel_float (bits_image_t *   bits,
-		       int              x,
-		       int              y,
-		       int              width,
-		       uint32_t *       b)
-{
-    argb_t color;
-    argb_t *buffer = (argb_t *)b;
-    argb_t *end;
-
-    color = bits->fetch_pixel_float (bits, x, y);
-
-    end = buffer + width;
-    while (buffer < end)
-	*(buffer++) = color;
-}
-
-static void
-bits_image_fetch_untransformed_repeat_none (bits_image_t *image,
-                                            pixman_bool_t wide,
-                                            int           x,
-                                            int           y,
-                                            int           width,
-                                            uint32_t *    buffer)
-{
-    uint32_t w;
-
-    if (y < 0 || y >= image->height)
-    {
-	memset (buffer, 0, width * (wide? sizeof (argb_t) : 4));
-	return;
-    }
-
-    if (x < 0)
-    {
-	w = MIN (width, -x);
-
-	memset (buffer, 0, w * (wide ? sizeof (argb_t) : 4));
-
-	width -= w;
-	buffer += w * (wide? 4 : 1);
-	x += w;
-    }
-
-    if (x < image->width)
-    {
-	w = MIN (width, image->width - x);
-
-	if (wide)
-	    image->fetch_scanline_float (image, x, y, w, buffer, NULL);
-	else
-	    image->fetch_scanline_32 (image, x, y, w, buffer, NULL);
-
-	width -= w;
-	buffer += w * (wide? 4 : 1);
-	x += w;
-    }
-
-    memset (buffer, 0, width * (wide ? sizeof (argb_t) : 4));
-}
-
-static void
-bits_image_fetch_untransformed_repeat_normal (bits_image_t *image,
-                                              pixman_bool_t wide,
-                                              int           x,
-                                              int           y,
-                                              int           width,
-                                              uint32_t *    buffer)
-{
-    uint32_t w;
-
-    while (y < 0)
-	y += image->height;
-
-    while (y >= image->height)
-	y -= image->height;
-
-    if (image->width == 1)
-    {
-	if (wide)
-	    replicate_pixel_float (image, 0, y, width, buffer);
-	else
-	    replicate_pixel_32 (image, 0, y, width, buffer);
-
-	return;
-    }
-
-    while (width)
-    {
-	while (x < 0)
-	    x += image->width;
-	while (x >= image->width)
-	    x -= image->width;
-
-	w = MIN (width, image->width - x);
-
-	if (wide)
-	    image->fetch_scanline_float (image, x, y, w, buffer, NULL);
-	else
-	    image->fetch_scanline_32 (image, x, y, w, buffer, NULL);
-
-	buffer += w * (wide? 4 : 1);
-	x += w;
-	width -= w;
-    }
-}
-
-static uint32_t *
-bits_image_fetch_untransformed_32 (pixman_iter_t * iter,
-				   const uint32_t *mask)
-{
-    pixman_image_t *image  = iter->image;
-    int             x      = iter->x;
-    int             y      = iter->y;
-    int             width  = iter->width;
-    uint32_t *      buffer = iter->buffer;
-
-    if (image->common.repeat == PIXMAN_REPEAT_NONE)
-    {
-	bits_image_fetch_untransformed_repeat_none (
-	    &image->bits, FALSE, x, y, width, buffer);
-    }
-    else
-    {
-	bits_image_fetch_untransformed_repeat_normal (
-	    &image->bits, FALSE, x, y, width, buffer);
-    }
-
-    iter->y++;
-    return buffer;
-}
-
-static uint32_t *
-bits_image_fetch_untransformed_float (pixman_iter_t * iter,
-				      const uint32_t *mask)
-{
-    pixman_image_t *image  = iter->image;
-    int             x      = iter->x;
-    int             y      = iter->y;
-    int             width  = iter->width;
-    uint32_t *      buffer = iter->buffer;
-
-    if (image->common.repeat == PIXMAN_REPEAT_NONE)
-    {
-	bits_image_fetch_untransformed_repeat_none (
-	    &image->bits, TRUE, x, y, width, buffer);
-    }
-    else
-    {
-	bits_image_fetch_untransformed_repeat_normal (
-	    &image->bits, TRUE, x, y, width, buffer);
-    }
-
-    iter->y++;
-    return buffer;
-}
-
-typedef struct
-{
-    pixman_format_code_t	format;
-    uint32_t			flags;
-    pixman_iter_get_scanline_t	get_scanline_32;
-    pixman_iter_get_scanline_t  get_scanline_float;
-} fetcher_info_t;
-
-static const fetcher_info_t fetcher_info[] =
-{
-    { PIXMAN_any,
-      (FAST_PATH_NO_ALPHA_MAP			|
-       FAST_PATH_ID_TRANSFORM			|
-       FAST_PATH_NO_CONVOLUTION_FILTER		|
-       FAST_PATH_NO_PAD_REPEAT			|
-       FAST_PATH_NO_REFLECT_REPEAT),
-      bits_image_fetch_untransformed_32,
-      bits_image_fetch_untransformed_float
-    },
-
-    /* Affine, no alpha */
-    { PIXMAN_any,
-      (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM),
-      bits_image_fetch_affine_no_alpha_32,
-      bits_image_fetch_affine_no_alpha_float,
-    },
-
-    /* General */
-    { PIXMAN_any,
-      0,
-      bits_image_fetch_general_32,
-      bits_image_fetch_general_float,
-    },
-
-    { PIXMAN_null },
-};
-
-static void
-bits_image_property_changed (pixman_image_t *image)
-{
-    _pixman_bits_image_setup_accessors (&image->bits);
-}
-
-void
-_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter)
-{
-    pixman_format_code_t format = image->common.extended_format_code;
-    uint32_t flags = image->common.flags;
-    const fetcher_info_t *info;
-
-    for (info = fetcher_info; info->format != PIXMAN_null; ++info)
-    {
-	if ((info->format == format || info->format == PIXMAN_any)	&&
-	    (info->flags & flags) == info->flags)
-	{
-	    if (iter->iter_flags & ITER_NARROW)
-	    {
-		iter->get_scanline = info->get_scanline_32;
-	    }
-	    else
-	    {
-		iter->get_scanline = info->get_scanline_float;
-	    }
-	    return;
-	}
-    }
-
-    /* Just in case we somehow didn't find a scanline function */
-    iter->get_scanline = _pixman_iter_get_scanline_noop;
-}
-
-static uint32_t *
-dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
-{
-    pixman_image_t *image  = iter->image;
-    int             x      = iter->x;
-    int             y      = iter->y;
-    int             width  = iter->width;
-    uint32_t *	    buffer = iter->buffer;
-
-    image->bits.fetch_scanline_32 (&image->bits, x, y, width, buffer, mask);
-    if (image->common.alpha_map)
-    {
-	uint32_t *alpha;
-
-	if ((alpha = malloc (width * sizeof (uint32_t))))
-	{
-	    int i;
-
-	    x -= image->common.alpha_origin_x;
-	    y -= image->common.alpha_origin_y;
-
-	    image->common.alpha_map->fetch_scanline_32 (
-		image->common.alpha_map, x, y, width, alpha, mask);
-
-	    for (i = 0; i < width; ++i)
-	    {
-		buffer[i] &= ~0xff000000;
-		buffer[i] |= (alpha[i] & 0xff000000);
-	    }
-
-	    free (alpha);
-	}
-    }
-
-    return iter->buffer;
-}
-
-static uint32_t *
-dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
-{
-    bits_image_t *  image  = &iter->image->bits;
-    int             x      = iter->x;
-    int             y      = iter->y;
-    int             width  = iter->width;
-    argb_t *	    buffer = (argb_t *)iter->buffer;
-
-    image->fetch_scanline_float (
-	image, x, y, width, (uint32_t *)buffer, mask);
-    if (image->common.alpha_map)
-    {
-	argb_t *alpha;
-
-	if ((alpha = malloc (width * sizeof (argb_t))))
-	{
-	    int i;
-
-	    x -= image->common.alpha_origin_x;
-	    y -= image->common.alpha_origin_y;
-
-	    image->common.alpha_map->fetch_scanline_float (
-		image->common.alpha_map, x, y, width, (uint32_t *)alpha, mask);
-
-	    for (i = 0; i < width; ++i)
-		buffer[i].a = alpha[i].a;
-
-	    free (alpha);
-	}
-    }
-
-    return iter->buffer;
-}
-
-static void
-dest_write_back_narrow (pixman_iter_t *iter)
-{
-    bits_image_t *  image  = &iter->image->bits;
-    int             x      = iter->x;
-    int             y      = iter->y;
-    int             width  = iter->width;
-    const uint32_t *buffer = iter->buffer;
-
-    image->store_scanline_32 (image, x, y, width, buffer);
-
-    if (image->common.alpha_map)
-    {
-	x -= image->common.alpha_origin_x;
-	y -= image->common.alpha_origin_y;
-
-	image->common.alpha_map->store_scanline_32 (
-	    image->common.alpha_map, x, y, width, buffer);
-    }
-
-    iter->y++;
-}
-
-static float
-dither_factor_blue_noise_64 (int x, int y)
-{
-    float m = dither_blue_noise_64x64[((y & 0x3f) << 6) | (x & 0x3f)];
-    return m * (1. / 4096.f) + (1. / 8192.f);
-}
-
-static float
-dither_factor_bayer_8 (int x, int y)
-{
-    uint32_t m;
-
-    y ^= x;
-
-    /* Compute reverse(interleave(xor(x mod n, y mod n), x mod n))
-     * Here n = 8 and `mod n` is the bottom 3 bits.
-     */
-    m = ((y & 0x1) << 5) | ((x & 0x1) << 4) |
-	((y & 0x2) << 2) | ((x & 0x2) << 1) |
-	((y & 0x4) >> 1) | ((x & 0x4) >> 2);
-
-    /* m is in range [0, 63].  We scale it to [0, 63.0f/64.0f], then
-     * shift it to to [1.0f/128.0f, 127.0f/128.0f] so that 0 < d < 1.
-     * This ensures exact values are not changed by dithering.
-     */
-    return (float)(m) * (1 / 64.0f) + (1.0f / 128.0f);
-}
-
-typedef float (* dither_factor_t)(int x, int y);
-
-static force_inline float
-dither_apply_channel (float f, float d, float s)
-{
-    /* float_to_unorm splits the [0, 1] segment in (1 << n_bits)
-     * subsections of equal length; however unorm_to_float does not
-     * map to the center of those sections.  In fact, pixel value u is
-     * mapped to:
-     *
-     *       u              u              u               1
-     * -------------- = ---------- + -------------- * ----------
-     *  2^n_bits - 1     2^n_bits     2^n_bits - 1     2^n_bits
-     *
-     * Hence if f = u / (2^n_bits - 1) is exactly representable on a
-     * n_bits palette, all the numbers between
-     *
-     *     u
-     * ----------  =  f - f * 2^n_bits = f + (0 - f) * 2^n_bits
-     *  2^n_bits
-     *
-     *  and
-     *
-     *    u + 1
-     * ---------- = f - (f - 1) * 2^n_bits = f + (1 - f) * 2^n_bits
-     *  2^n_bits
-     *
-     * are also mapped back to u.
-     *
-     * Hence the following calculation ensures that we add as much
-     * noise as possible without perturbing values which are exactly
-     * representable in the target colorspace.  Note that this corresponds to
-     * mixing the original color with noise with a ratio of `1 / 2^n_bits`.
-     */
-    return f + (d - f) * s;
-}
-
-static force_inline float
-dither_compute_scale (int n_bits)
-{
-    // No dithering for wide formats
-    if (n_bits == 0 || n_bits >= 32)
-	return 0.f;
-
-    return 1.f / (float)(1 << n_bits);
-}
-
-static const uint32_t *
-dither_apply_ordered (pixman_iter_t *iter, dither_factor_t factor)
-{
-    bits_image_t        *image  = &iter->image->bits;
-    int                  x      = iter->x + image->dither_offset_x;
-    int                  y      = iter->y + image->dither_offset_y;
-    int                  width  = iter->width;
-    argb_t              *buffer = (argb_t *)iter->buffer;
-
-    pixman_format_code_t format = image->format;
-    int                  a_size = PIXMAN_FORMAT_A (format);
-    int                  r_size = PIXMAN_FORMAT_R (format);
-    int                  g_size = PIXMAN_FORMAT_G (format);
-    int                  b_size = PIXMAN_FORMAT_B (format);
-
-    float a_scale = dither_compute_scale (a_size);
-    float r_scale = dither_compute_scale (r_size);
-    float g_scale = dither_compute_scale (g_size);
-    float b_scale = dither_compute_scale (b_size);
-
-    int   i;
-    float d;
-
-    for (i = 0; i < width; ++i)
-    {
-	d = factor (x + i, y);
-
-	buffer->a = dither_apply_channel (buffer->a, d, a_scale);
-	buffer->r = dither_apply_channel (buffer->r, d, r_scale);
-	buffer->g = dither_apply_channel (buffer->g, d, g_scale);
-	buffer->b = dither_apply_channel (buffer->b, d, b_scale);
-
-	buffer++;
-    }
-
-    return iter->buffer;
-}
-
-static void
-dest_write_back_wide (pixman_iter_t *iter)
-{
-    bits_image_t *  image  = &iter->image->bits;
-    int             x      = iter->x;
-    int             y      = iter->y;
-    int             width  = iter->width;
-    const uint32_t *buffer = iter->buffer;
-
-    switch (image->dither)
-    {
-    case PIXMAN_DITHER_NONE:
-	break;
-
-    case PIXMAN_DITHER_GOOD:
-    case PIXMAN_DITHER_BEST:
-    case PIXMAN_DITHER_ORDERED_BLUE_NOISE_64:
-	buffer = dither_apply_ordered (iter, dither_factor_blue_noise_64);
-	break;
-
-    case PIXMAN_DITHER_FAST:
-    case PIXMAN_DITHER_ORDERED_BAYER_8:
-	buffer = dither_apply_ordered (iter, dither_factor_bayer_8);
-	break;
-    }
-
-    image->store_scanline_float (image, x, y, width, buffer);
-
-    if (image->common.alpha_map)
-    {
-	x -= image->common.alpha_origin_x;
-	y -= image->common.alpha_origin_y;
-
-	image->common.alpha_map->store_scanline_float (
-	    image->common.alpha_map, x, y, width, buffer);
-    }
-
-    iter->y++;
-}
-
-void
-_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter)
-{
-    if (iter->iter_flags & ITER_NARROW)
-    {
-	if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
-	    (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
-	{
-	    iter->get_scanline = _pixman_iter_get_scanline_noop;
-	}
-	else
-	{
-	    iter->get_scanline = dest_get_scanline_narrow;
-	}
-	
-	iter->write_back = dest_write_back_narrow;
-    }
-    else
-    {
-	iter->get_scanline = dest_get_scanline_wide;
-	iter->write_back = dest_write_back_wide;
-    }
-}
-
-static uint32_t *
-create_bits (pixman_format_code_t format,
-             int                  width,
-             int                  height,
-             int *		  rowstride_bytes,
-	     pixman_bool_t	  clear)
-{
-    int stride;
-    size_t buf_size;
-    int bpp;
-
-    /* what follows is a long-winded way, avoiding any possibility of integer
-     * overflows, of saying:
-     * stride = ((width * bpp + 0x1f) >> 5) * sizeof (uint32_t);
-     */
-
-    bpp = PIXMAN_FORMAT_BPP (format);
-    if (_pixman_multiply_overflows_int (width, bpp))
-	return NULL;
-
-    stride = width * bpp;
-    if (_pixman_addition_overflows_int (stride, 0x1f))
-	return NULL;
-
-    stride += 0x1f;
-    stride >>= 5;
-
-    stride *= sizeof (uint32_t);
-
-    if (_pixman_multiply_overflows_size (height, stride))
-	return NULL;
-
-    buf_size = (size_t)height * stride;
-
-    if (rowstride_bytes)
-	*rowstride_bytes = stride;
-
-    if (clear)
-	return calloc (buf_size, 1);
-    else
-	return malloc (buf_size);
-}
-
-pixman_bool_t
-_pixman_bits_image_init (pixman_image_t *     image,
-                         pixman_format_code_t format,
-                         int                  width,
-                         int                  height,
-                         uint32_t *           bits,
-                         int                  rowstride,
-			 pixman_bool_t	      clear)
-{
-    uint32_t *free_me = NULL;
-
-    if (PIXMAN_FORMAT_BPP (format) == 128)
-	return_val_if_fail(!(rowstride % 4), FALSE);
-
-    if (!bits && width && height)
-    {
-	int rowstride_bytes;
-
-	free_me = bits = create_bits (format, width, height, &rowstride_bytes, clear);
-
-	if (!bits)
-	    return FALSE;
-
-	rowstride = rowstride_bytes / (int) sizeof (uint32_t);
-    }
-
-    _pixman_image_init (image);
-
-    image->type = BITS;
-    image->bits.format = format;
-    image->bits.width = width;
-    image->bits.height = height;
-    image->bits.bits = bits;
-    image->bits.free_me = free_me;
-    image->bits.dither = PIXMAN_DITHER_NONE;
-    image->bits.dither_offset_x = 0;
-    image->bits.dither_offset_y = 0;
-    image->bits.read_func = NULL;
-    image->bits.write_func = NULL;
-    image->bits.rowstride = rowstride;
-    image->bits.indexed = NULL;
-
-    image->common.property_changed = bits_image_property_changed;
-
-    _pixman_image_reset_clip_region (image);
-
-    return TRUE;
-}
-
-static pixman_image_t *
-create_bits_image_internal (pixman_format_code_t format,
-			    int                  width,
-			    int                  height,
-			    uint32_t *           bits,
-			    int                  rowstride_bytes,
-			    pixman_bool_t	 clear)
-{
-    pixman_image_t *image;
-
-    /* must be a whole number of uint32_t's
-     */
-    return_val_if_fail (
-	bits == NULL || (rowstride_bytes % sizeof (uint32_t)) == 0, NULL);
-
-    return_val_if_fail (PIXMAN_FORMAT_BPP (format) >= PIXMAN_FORMAT_DEPTH (format), NULL);
-
-    image = _pixman_image_allocate ();
-
-    if (!image)
-	return NULL;
-
-    if (!_pixman_bits_image_init (image, format, width, height, bits,
-				  rowstride_bytes / (int) sizeof (uint32_t),
-				  clear))
-    {
-	free (image);
-	return NULL;
-    }
-
-    return image;
-}
-
-/* If bits is NULL, a buffer will be allocated and initialized to 0 */
-PIXMAN_EXPORT pixman_image_t *
-pixman_image_create_bits (pixman_format_code_t format,
-                          int                  width,
-                          int                  height,
-                          uint32_t *           bits,
-                          int                  rowstride_bytes)
-{
-    return create_bits_image_internal (
-	format, width, height, bits, rowstride_bytes, TRUE);
-}
-
-
-/* If bits is NULL, a buffer will be allocated and _not_ initialized */
-PIXMAN_EXPORT pixman_image_t *
-pixman_image_create_bits_no_clear (pixman_format_code_t format,
-				   int                  width,
-				   int                  height,
-				   uint32_t *           bits,
-				   int                  rowstride_bytes)
-{
-    return create_bits_image_internal (
-	format, width, height, bits, rowstride_bytes, FALSE);
-}
diff --git a/vendor/pixman/pixman/pixman-combine-float.c b/vendor/pixman/pixman/pixman-combine-float.c
deleted file mode 100644
index 27392d608..000000000
--- a/vendor/pixman/pixman/pixman-combine-float.c
+++ /dev/null
@@ -1,1158 +0,0 @@
-/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
-/*
- * Copyright © 2010, 2012 Soren Sandmann Pedersen
- * Copyright © 2010, 2012 Red Hat, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author: Soren Sandmann Pedersen (sandmann@cs.au.dk)
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include <math.h>
-#include <string.h>
-#include <float.h>
-
-#include "pixman-private.h"
-
-/* Workaround for http://gcc.gnu.org/PR54965 */
-/* GCC 4.6 has problems with force_inline, so just use normal inline instead */
-#if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 6)
-#undef force_inline
-#define force_inline __inline__
-#endif
-
-typedef float (* combine_channel_t) (float sa, float s, float da, float d);
-
-static force_inline void
-combine_inner (pixman_bool_t component,
-	       float *dest, const float *src, const float *mask, int n_pixels,
-	       combine_channel_t combine_a, combine_channel_t combine_c)
-{
-    int i;
-
-    if (!mask)
-    {
-	for (i = 0; i < 4 * n_pixels; i += 4)
-	{
-	    float sa = src[i + 0];
-	    float sr = src[i + 1];
-	    float sg = src[i + 2];
-	    float sb = src[i + 3];
-	    
-	    float da = dest[i + 0];
-	    float dr = dest[i + 1];
-	    float dg = dest[i + 2];
-	    float db = dest[i + 3];					
-	    
-	    dest[i + 0] = combine_a (sa, sa, da, da);
-	    dest[i + 1] = combine_c (sa, sr, da, dr);
-	    dest[i + 2] = combine_c (sa, sg, da, dg);
-	    dest[i + 3] = combine_c (sa, sb, da, db);
-	}
-    }
-    else
-    {
-	for (i = 0; i < 4 * n_pixels; i += 4)
-	{
-	    float sa, sr, sg, sb;
-	    float ma, mr, mg, mb;
-	    float da, dr, dg, db;
-	    
-	    sa = src[i + 0];
-	    sr = src[i + 1];
-	    sg = src[i + 2];
-	    sb = src[i + 3];
-	    
-	    if (component)
-	    {
-		ma = mask[i + 0];
-		mr = mask[i + 1];
-		mg = mask[i + 2];
-		mb = mask[i + 3];
-
-		sr *= mr;
-		sg *= mg;
-		sb *= mb;
-
-		ma *= sa;
-		mr *= sa;
-		mg *= sa;
-		mb *= sa;
-		
-		sa = ma;
-	    }
-	    else
-	    {
-		ma = mask[i + 0];
-
-		sa *= ma;
-		sr *= ma;
-		sg *= ma;
-		sb *= ma;
-
-		ma = mr = mg = mb = sa;
-	    }
-	    
-	    da = dest[i + 0];
-	    dr = dest[i + 1];
-	    dg = dest[i + 2];
-	    db = dest[i + 3];
-	    
-	    dest[i + 0] = combine_a (ma, sa, da, da);
-	    dest[i + 1] = combine_c (mr, sr, da, dr);
-	    dest[i + 2] = combine_c (mg, sg, da, dg);
-	    dest[i + 3] = combine_c (mb, sb, da, db);
-	}
-    }
-}
-
-#define MAKE_COMBINER(name, component, combine_a, combine_c)		\
-    static void								\
-    combine_ ## name ## _float (pixman_implementation_t *imp,		\
-				pixman_op_t              op,		\
-				float                   *dest,		\
-				const float             *src,		\
-				const float             *mask,		\
-				int		         n_pixels)	\
-    {									\
-	combine_inner (component, dest, src, mask, n_pixels,		\
-		       combine_a, combine_c);				\
-    }
-
-#define MAKE_COMBINERS(name, combine_a, combine_c)			\
-    MAKE_COMBINER(name ## _ca, TRUE, combine_a, combine_c)		\
-    MAKE_COMBINER(name ## _u, FALSE, combine_a, combine_c)
-
-
-/*
- * Porter/Duff operators
- */
-typedef enum
-{
-    ZERO,
-    ONE,
-    SRC_ALPHA,
-    DEST_ALPHA,
-    INV_SA,
-    INV_DA,
-    SA_OVER_DA,
-    DA_OVER_SA,
-    INV_SA_OVER_DA,
-    INV_DA_OVER_SA,
-    ONE_MINUS_SA_OVER_DA,
-    ONE_MINUS_DA_OVER_SA,
-    ONE_MINUS_INV_DA_OVER_SA,
-    ONE_MINUS_INV_SA_OVER_DA
-} combine_factor_t;
-
-#define CLAMP(f)					\
-    (((f) < 0)? 0 : (((f) > 1.0) ? 1.0 : (f)))
-
-static force_inline float
-get_factor (combine_factor_t factor, float sa, float da)
-{
-    float f = -1;
-
-    switch (factor)
-    {
-    case ZERO:
-	f = 0.0f;
-	break;
-
-    case ONE:
-	f = 1.0f;
-	break;
-
-    case SRC_ALPHA:
-	f = sa;
-	break;
-
-    case DEST_ALPHA:
-	f = da;
-	break;
-
-    case INV_SA:
-	f = 1 - sa;
-	break;
-
-    case INV_DA:
-	f = 1 - da;
-	break;
-
-    case SA_OVER_DA:
-	if (FLOAT_IS_ZERO (da))
-	    f = 1.0f;
-	else
-	    f = CLAMP (sa / da);
-	break;
-
-    case DA_OVER_SA:
-	if (FLOAT_IS_ZERO (sa))
-	    f = 1.0f;
-	else
-	    f = CLAMP (da / sa);
-	break;
-
-    case INV_SA_OVER_DA:
-	if (FLOAT_IS_ZERO (da))
-	    f = 1.0f;
-	else
-	    f = CLAMP ((1.0f - sa) / da);
-	break;
-
-    case INV_DA_OVER_SA:
-	if (FLOAT_IS_ZERO (sa))
-	    f = 1.0f;
-	else
-	    f = CLAMP ((1.0f - da) / sa);
-	break;
-
-    case ONE_MINUS_SA_OVER_DA:
-	if (FLOAT_IS_ZERO (da))
-	    f = 0.0f;
-	else
-	    f = CLAMP (1.0f - sa / da);
-	break;
-
-    case ONE_MINUS_DA_OVER_SA:
-	if (FLOAT_IS_ZERO (sa))
-	    f = 0.0f;
-	else
-	    f = CLAMP (1.0f - da / sa);
-	break;
-
-    case ONE_MINUS_INV_DA_OVER_SA:
-	if (FLOAT_IS_ZERO (sa))
-	    f = 0.0f;
-	else
-	    f = CLAMP (1.0f - (1.0f - da) / sa);
-	break;
-
-    case ONE_MINUS_INV_SA_OVER_DA:
-	if (FLOAT_IS_ZERO (da))
-	    f = 0.0f;
-	else
-	    f = CLAMP (1.0f - (1.0f - sa) / da);
-	break;
-    }
-
-    return f;
-}
-
-#define MAKE_PD_COMBINERS(name, a, b)					\
-    static float force_inline						\
-    pd_combine_ ## name (float sa, float s, float da, float d)		\
-    {									\
-	const float fa = get_factor (a, sa, da);			\
-	const float fb = get_factor (b, sa, da);			\
-									\
-	return MIN (1.0f, s * fa + d * fb);				\
-    }									\
-    									\
-    MAKE_COMBINERS(name, pd_combine_ ## name, pd_combine_ ## name)
-
-MAKE_PD_COMBINERS (clear,			ZERO,				ZERO)
-MAKE_PD_COMBINERS (src,				ONE,				ZERO)
-MAKE_PD_COMBINERS (dst,				ZERO,				ONE)
-MAKE_PD_COMBINERS (over,			ONE,				INV_SA)
-MAKE_PD_COMBINERS (over_reverse,		INV_DA,				ONE)
-MAKE_PD_COMBINERS (in,				DEST_ALPHA,			ZERO)
-MAKE_PD_COMBINERS (in_reverse,			ZERO,				SRC_ALPHA)
-MAKE_PD_COMBINERS (out,				INV_DA,				ZERO)
-MAKE_PD_COMBINERS (out_reverse,			ZERO,				INV_SA)
-MAKE_PD_COMBINERS (atop,			DEST_ALPHA,			INV_SA)
-MAKE_PD_COMBINERS (atop_reverse,		INV_DA,				SRC_ALPHA)
-MAKE_PD_COMBINERS (xor,				INV_DA,				INV_SA)
-MAKE_PD_COMBINERS (add,				ONE,				ONE)
-
-MAKE_PD_COMBINERS (saturate,			INV_DA_OVER_SA,			ONE)
-
-MAKE_PD_COMBINERS (disjoint_clear,		ZERO,				ZERO)
-MAKE_PD_COMBINERS (disjoint_src,		ONE,				ZERO)
-MAKE_PD_COMBINERS (disjoint_dst,		ZERO,				ONE)
-MAKE_PD_COMBINERS (disjoint_over,		ONE,				INV_SA_OVER_DA)
-MAKE_PD_COMBINERS (disjoint_over_reverse,	INV_DA_OVER_SA,			ONE)
-MAKE_PD_COMBINERS (disjoint_in,			ONE_MINUS_INV_DA_OVER_SA,	ZERO)
-MAKE_PD_COMBINERS (disjoint_in_reverse,		ZERO,				ONE_MINUS_INV_SA_OVER_DA)
-MAKE_PD_COMBINERS (disjoint_out,		INV_DA_OVER_SA,			ZERO)
-MAKE_PD_COMBINERS (disjoint_out_reverse,	ZERO,				INV_SA_OVER_DA)
-MAKE_PD_COMBINERS (disjoint_atop,		ONE_MINUS_INV_DA_OVER_SA,	INV_SA_OVER_DA)
-MAKE_PD_COMBINERS (disjoint_atop_reverse,	INV_DA_OVER_SA,			ONE_MINUS_INV_SA_OVER_DA)
-MAKE_PD_COMBINERS (disjoint_xor,		INV_DA_OVER_SA,			INV_SA_OVER_DA)
-
-MAKE_PD_COMBINERS (conjoint_clear,		ZERO,				ZERO)
-MAKE_PD_COMBINERS (conjoint_src,		ONE,				ZERO)
-MAKE_PD_COMBINERS (conjoint_dst,		ZERO,				ONE)
-MAKE_PD_COMBINERS (conjoint_over,		ONE,				ONE_MINUS_SA_OVER_DA)
-MAKE_PD_COMBINERS (conjoint_over_reverse,	ONE_MINUS_DA_OVER_SA,		ONE)
-MAKE_PD_COMBINERS (conjoint_in,			DA_OVER_SA,			ZERO)
-MAKE_PD_COMBINERS (conjoint_in_reverse,		ZERO,				SA_OVER_DA)
-MAKE_PD_COMBINERS (conjoint_out,		ONE_MINUS_DA_OVER_SA,		ZERO)
-MAKE_PD_COMBINERS (conjoint_out_reverse,	ZERO,				ONE_MINUS_SA_OVER_DA)
-MAKE_PD_COMBINERS (conjoint_atop,		DA_OVER_SA,			ONE_MINUS_SA_OVER_DA)
-MAKE_PD_COMBINERS (conjoint_atop_reverse,	ONE_MINUS_DA_OVER_SA,		SA_OVER_DA)
-MAKE_PD_COMBINERS (conjoint_xor,		ONE_MINUS_DA_OVER_SA,		ONE_MINUS_SA_OVER_DA)
-
-/*
- * PDF blend modes:
- *
- * The following blend modes have been taken from the PDF ISO 32000
- * specification, which at this point in time is available from
- *
- *     http://www.adobe.com/devnet/pdf/pdf_reference.html
- *
- * The specific documents of interest are the PDF spec itself:
- *
- *     http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf
- *
- * chapters 11.3.5 and 11.3.6 and a later supplement for Adobe Acrobat
- * 9.1 and Reader 9.1:
- *
- *     http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000_1.pdf
- *
- * that clarifies the specifications for blend modes ColorDodge and
- * ColorBurn.
- *
- * The formula for computing the final pixel color given in 11.3.6 is:
- *
- *     αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
- *
- * with B() is the blend function. When B(Cb, Cs) = Cs, this formula
- * reduces to the regular OVER operator.
- *
- * Cs and Cb are not premultiplied, so in our implementation we instead
- * use:
- *
- *     cr = (1 – αs) × cb  +  (1 – αb) × cs  +  αb × αs × B (cb/αb, cs/αs)
- *
- * where cr, cs, and cb are premultiplied colors, and where the
- *
- *     αb × αs × B(cb/αb, cs/αs)
- *
- * part is first arithmetically simplified under the assumption that αb
- * and αs are not 0, and then updated to produce a meaningful result when
- * they are.
- *
- * For all the blend mode operators, the alpha channel is given by
- *
- *     αr = αs + αb + αb × αs
- */
-
-#define MAKE_SEPARABLE_PDF_COMBINERS(name)				\
-    static force_inline float						\
-    combine_ ## name ## _a (float sa, float s, float da, float d)	\
-    {									\
-	return da + sa - da * sa;					\
-    }									\
-    									\
-    static force_inline float						\
-    combine_ ## name ## _c (float sa, float s, float da, float d)	\
-    {									\
-	float f = (1 - sa) * d + (1 - da) * s;				\
-									\
-	return f + blend_ ## name (sa, s, da, d);			\
-    }									\
-    									\
-    MAKE_COMBINERS (name, combine_ ## name ## _a, combine_ ## name ## _c)
-
-/*
- * Multiply
- *
- *      ad * as * B(d / ad, s / as)
- *    = ad * as * d/ad * s/as
- *    = d * s
- *
- */
-static force_inline float
-blend_multiply (float sa, float s, float da, float d)
-{
-    return d * s;
-}
-
-/*
- * Screen
- *
- *      ad * as * B(d/ad, s/as)
- *    = ad * as * (d/ad + s/as - s/as * d/ad)
- *    = ad * s + as * d - s * d
- */
-static force_inline float
-blend_screen (float sa, float s, float da, float d)
-{
-    return d * sa + s * da - s * d;
-}
-
-/*
- * Overlay
- *
- *     ad * as * B(d/ad, s/as)
- *   = ad * as * Hardlight (s, d)
- *   = if (d / ad < 0.5)
- *         as * ad * Multiply (s/as, 2 * d/ad)
- *     else
- *         as * ad * Screen (s/as, 2 * d / ad - 1)
- *   = if (d < 0.5 * ad)
- *         as * ad * s/as * 2 * d /ad
- *     else
- *         as * ad * (s/as + 2 * d / ad - 1 - s / as * (2 * d / ad - 1))
- *   = if (2 * d < ad)
- *         2 * s * d
- *     else
- *         ad * s + 2 * as * d - as * ad - ad * s * (2 * d / ad - 1)
- *   = if (2 * d < ad)
- *         2 * s * d
- *     else
- *         as * ad - 2 * (ad - d) * (as - s)
- */
-static force_inline float
-blend_overlay (float sa, float s, float da, float d)
-{
-    if (2 * d < da)
-	return 2 * s * d;
-    else
-	return sa * da - 2 * (da - d) * (sa - s);
-}
-
-/*
- * Darken
- *
- *     ad * as * B(d/ad, s/as)
- *   = ad * as * MIN(d/ad, s/as)
- *   = MIN (as * d, ad * s)
- */
-static force_inline float
-blend_darken (float sa, float s, float da, float d)
-{
-    s = s * da;
-    d = d * sa;
-
-    if (s > d)
-	return d;
-    else
-	return s;
-}
-
-/*
- * Lighten
- *
- *     ad * as * B(d/ad, s/as)
- *   = ad * as * MAX(d/ad, s/as)
- *   = MAX (as * d, ad * s)
- */
-static force_inline float
-blend_lighten (float sa, float s, float da, float d)
-{
-    s = s * da;
-    d = d * sa;
-
-    if (s > d)
-	return s;
-    else
-	return d;
-}
-
-/*
- * Color dodge
- *
- *     ad * as * B(d/ad, s/as)
- *   = if d/ad = 0
- *         ad * as * 0
- *     else if (d/ad >= (1 - s/as)
- *         ad * as * 1
- *     else
- *         ad * as * ((d/ad) / (1 - s/as))
- *   = if d = 0
- *         0
- *     elif as * d >= ad * (as - s)
- *         ad * as
- *     else
- *         as * (as * d / (as - s))
- *
- */
-static force_inline float
-blend_color_dodge (float sa, float s, float da, float d)
-{
-    if (FLOAT_IS_ZERO (d))
-	return 0.0f;
-    else if (d * sa >= sa * da - s * da)
-	return sa * da;
-    else if (FLOAT_IS_ZERO (sa - s))
-	return sa * da;
-    else
-	return sa * sa * d / (sa - s);
-}
-
-/*
- * Color burn
- *
- * We modify the first clause "if d = 1" to "if d >= 1" since with
- * premultiplied colors d > 1 can actually happen.
- *
- *     ad * as * B(d/ad, s/as)
- *   = if d/ad >= 1
- *         ad * as * 1
- *     elif (1 - d/ad) >= s/as
- *         ad * as * 0
- *     else
- *         ad * as * (1 - ((1 - d/ad) / (s/as)))
- *   = if d >= ad
- *         ad * as
- *     elif as * ad - as * d >= ad * s
- *         0
- *     else
- *         ad * as  - as * as * (ad - d) / s
- */
-static force_inline float
-blend_color_burn (float sa, float s, float da, float d)
-{
-    if (d >= da)
-	return sa * da;
-    else if (sa * (da - d) >= s * da)
-	return 0.0f;
-    else if (FLOAT_IS_ZERO (s))
-	return 0.0f;
-    else
-	return sa * (da - sa * (da - d) / s);
-}
-
-/*
- * Hard light
- *
- *     ad * as * B(d/ad, s/as)
- *   = if (s/as <= 0.5)
- *         ad * as * Multiply (d/ad, 2 * s/as)
- *     else
- *         ad * as * Screen (d/ad, 2 * s/as - 1)
- *   = if 2 * s <= as
- *         ad * as * d/ad * 2 * s / as
- *     else
- *         ad * as * (d/ad + (2 * s/as - 1) + d/ad * (2 * s/as - 1))
- *   = if 2 * s <= as
- *         2 * s * d
- *     else
- *         as * ad - 2 * (ad - d) * (as - s)
- */
-static force_inline float
-blend_hard_light (float sa, float s, float da, float d)
-{
-    if (2 * s < sa)
-	return 2 * s * d;
-    else
-	return sa * da - 2 * (da - d) * (sa - s);
-}
-
-/*
- * Soft light
- *
- *     ad * as * B(d/ad, s/as)
- *   = if (s/as <= 0.5)
- *         ad * as * (d/ad - (1 - 2 * s/as) * d/ad * (1 - d/ad))
- *     else if (d/ad <= 0.25)
- *         ad * as * (d/ad + (2 * s/as - 1) * ((((16 * d/ad - 12) * d/ad + 4) * d/ad) - d/ad))
- *     else
- *         ad * as * (d/ad + (2 * s/as - 1) * sqrt (d/ad))
- *   = if (2 * s <= as)
- *         d * as - d * (ad - d) * (as - 2 * s) / ad;
- *     else if (4 * d <= ad)
- *         (2 * s - as) * d * ((16 * d / ad - 12) * d / ad + 3);
- *     else
- *         d * as + (sqrt (d * ad) - d) * (2 * s - as);
- */
-static force_inline float
-blend_soft_light (float sa, float s, float da, float d)
-{
-    if (2 * s <= sa)
-    {
-	if (FLOAT_IS_ZERO (da))
-	    return d * sa;
-	else
-	    return d * sa - d * (da - d) * (sa - 2 * s) / da;
-    }
-    else
-    {
-	if (FLOAT_IS_ZERO (da))
-	{
-	    return d * sa;
-	}
-	else
-	{
-	    if (4 * d <= da)
-		return d * sa + (2 * s - sa) * d * ((16 * d / da - 12) * d / da + 3);
-	    else
-		return d * sa + (sqrtf (d * da) - d) * (2 * s - sa);
-	}
-    }
-}
-
-/*
- * Difference
- *
- *     ad * as * B(s/as, d/ad)
- *   = ad * as * abs (s/as - d/ad)
- *   = if (s/as <= d/ad)
- *         ad * as * (d/ad - s/as)
- *     else
- *         ad * as * (s/as - d/ad)
- *   = if (ad * s <= as * d)
- *        as * d - ad * s
- *     else
- *        ad * s - as * d
- */
-static force_inline float
-blend_difference (float sa, float s, float da, float d)
-{
-    float dsa = d * sa;
-    float sda = s * da;
-
-    if (sda < dsa)
-	return dsa - sda;
-    else
-	return sda - dsa;
-}
-
-/*
- * Exclusion
- *
- *     ad * as * B(s/as, d/ad)
- *   = ad * as * (d/ad + s/as - 2 * d/ad * s/as)
- *   = as * d + ad * s - 2 * s * d
- */
-static force_inline float
-blend_exclusion (float sa, float s, float da, float d)
-{
-    return s * da + d * sa - 2 * d * s;
-}
-
-MAKE_SEPARABLE_PDF_COMBINERS (multiply)
-MAKE_SEPARABLE_PDF_COMBINERS (screen)
-MAKE_SEPARABLE_PDF_COMBINERS (overlay)
-MAKE_SEPARABLE_PDF_COMBINERS (darken)
-MAKE_SEPARABLE_PDF_COMBINERS (lighten)
-MAKE_SEPARABLE_PDF_COMBINERS (color_dodge)
-MAKE_SEPARABLE_PDF_COMBINERS (color_burn)
-MAKE_SEPARABLE_PDF_COMBINERS (hard_light)
-MAKE_SEPARABLE_PDF_COMBINERS (soft_light)
-MAKE_SEPARABLE_PDF_COMBINERS (difference)
-MAKE_SEPARABLE_PDF_COMBINERS (exclusion)
-
-/*
- * PDF nonseperable blend modes are implemented using the following functions
- * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid
- * and min value of the red, green and blue components.
- *
- * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue
- *
- * clip_color (C):
- *     l = LUM (C)
- *     min = Cmin
- *     max = Cmax
- *     if n < 0.0
- *         C = l + (((C – l) × l) ⁄ (l – min))
- *     if x > 1.0
- *         C = l + (((C – l) × (1 – l) ) ⁄ (max – l))
- *     return C
- *
- * set_lum (C, l):
- *     d = l – LUM (C)
- *     C += d
- *     return clip_color (C)
- *
- * SAT (C) = CH_MAX (C) - CH_MIN (C)
- *
- * set_sat (C, s):
- *     if Cmax > Cmin
- *         Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) )
- *         Cmax = s
- *     else
- *         Cmid = Cmax = 0.0
- *         Cmin = 0.0
- *     return C
- */
-
-/* For premultiplied colors, we need to know what happens when C is
- * multiplied by a real number. LUM and SAT are linear:
- *
- *     LUM (r × C) = r × LUM (C)	SAT (r * C) = r * SAT (C)
- *
- * If we extend clip_color with an extra argument a and change
- *
- *     if x >= 1.0
- *
- * into
- *
- *     if x >= a
- *
- * then clip_color is also linear:
- *
- *     r * clip_color (C, a) = clip_color (r * C, r * a);
- *
- * for positive r.
- *
- * Similarly, we can extend set_lum with an extra argument that is just passed
- * on to clip_color:
- *
- *       r * set_lum (C, l, a)
- *
- *     = r × clip_color (C + l - LUM (C), a)
- *
- *     = clip_color (r * C + r × l - r * LUM (C), r * a)
- *
- *     = set_lum (r * C, r * l, r * a)
- *
- * Finally, set_sat:
- *
- *       r * set_sat (C, s) = set_sat (x * C, r * s)
- *
- * The above holds for all non-zero x, because the x'es in the fraction for
- * C_mid cancel out. Specifically, it holds for x = r:
- *
- *       r * set_sat (C, s) = set_sat (r * C, r * s)
- *
- */
-typedef struct
-{
-    float	r;
-    float	g;
-    float	b;
-} rgb_t;
-
-static force_inline float
-minf (float a, float b)
-{
-    return a < b? a : b;
-}
-
-static force_inline float
-maxf (float a, float b)
-{
-    return a > b? a : b;
-}
-
-static force_inline float
-channel_min (const rgb_t *c)
-{
-    return minf (minf (c->r, c->g), c->b);
-}
-
-static force_inline float
-channel_max (const rgb_t *c)
-{
-    return maxf (maxf (c->r, c->g), c->b);
-}
-
-static force_inline float
-get_lum (const rgb_t *c)
-{
-    return c->r * 0.3f + c->g * 0.59f + c->b * 0.11f;
-}
-
-static force_inline float
-get_sat (const rgb_t *c)
-{
-    return channel_max (c) - channel_min (c);
-}
-
-static void
-clip_color (rgb_t *color, float a)
-{
-    float l = get_lum (color);
-    float n = channel_min (color);
-    float x = channel_max (color);
-    float t;
-
-    if (n < 0.0f)
-    {
-	t = l - n;
-	if (FLOAT_IS_ZERO (t))
-	{
-	    color->r = 0.0f;
-	    color->g = 0.0f;
-	    color->b = 0.0f;
-	}
-	else
-	{
-	    color->r = l + (((color->r - l) * l) / t);
-	    color->g = l + (((color->g - l) * l) / t);
-	    color->b = l + (((color->b - l) * l) / t);
-	}
-    }
-    if (x > a)
-    {
-	t = x - l;
-	if (FLOAT_IS_ZERO (t))
-	{
-	    color->r = a;
-	    color->g = a;
-	    color->b = a;
-	}
-	else
-	{
-	    color->r = l + (((color->r - l) * (a - l) / t));
-	    color->g = l + (((color->g - l) * (a - l) / t));
-	    color->b = l + (((color->b - l) * (a - l) / t));
-	}
-    }
-}
-
-static void
-set_lum (rgb_t *color, float sa, float l)
-{
-    float d = l - get_lum (color);
-
-    color->r = color->r + d;
-    color->g = color->g + d;
-    color->b = color->b + d;
-
-    clip_color (color, sa);
-}
-
-static void
-set_sat (rgb_t *src, float sat)
-{
-    float *max, *mid, *min;
-    float t;
-
-    if (src->r > src->g)
-    {
-	if (src->r > src->b)
-	{
-	    max = &(src->r);
-
-	    if (src->g > src->b)
-	    {
-		mid = &(src->g);
-		min = &(src->b);
-	    }
-	    else
-	    {
-		mid = &(src->b);
-		min = &(src->g);
-	    }
-	}
-	else
-	{
-	    max = &(src->b);
-	    mid = &(src->r);
-	    min = &(src->g);
-	}
-    }
-    else
-    {
-	if (src->r > src->b)
-	{
-	    max = &(src->g);
-	    mid = &(src->r);
-	    min = &(src->b);
-	}
-	else
-	{
-	    min = &(src->r);
-
-	    if (src->g > src->b)
-	    {
-		max = &(src->g);
-		mid = &(src->b);
-	    }
-	    else
-	    {
-		max = &(src->b);
-		mid = &(src->g);
-	    }
-	}
-    }
-
-    t = *max - *min;
-
-    if (FLOAT_IS_ZERO (t))
-    {
-	*mid = *max = 0.0f;
-    }
-    else
-    {
-	*mid = ((*mid - *min) * sat) / t;
-	*max = sat;
-    }
-
-    *min = 0.0f;
-}
-
-/* Hue:
- *
- *       as * ad * B(s/as, d/as)
- *     = as * ad * set_lum (set_sat (s/as, SAT (d/ad)), LUM (d/ad), 1)
- *     = set_lum (set_sat (ad * s, as * SAT (d)), as * LUM (d), as * ad)
- *
- */
-static force_inline void
-blend_hsl_hue (rgb_t *res,
-	       const rgb_t *dest, float da,
-	       const rgb_t *src, float sa)
-{
-    res->r = src->r * da;
-    res->g = src->g * da;
-    res->b = src->b * da;
-
-    set_sat (res, get_sat (dest) * sa);
-    set_lum (res, sa * da, get_lum (dest) * sa);
-}
-
-/* 
- * Saturation
- *
- *     as * ad * B(s/as, d/ad)
- *   = as * ad * set_lum (set_sat (d/ad, SAT (s/as)), LUM (d/ad), 1)
- *   = set_lum (as * ad * set_sat (d/ad, SAT (s/as)),
- *                                       as * LUM (d), as * ad)
- *   = set_lum (set_sat (as * d, ad * SAT (s), as * LUM (d), as * ad))
- */
-static force_inline void
-blend_hsl_saturation (rgb_t *res,
-		      const rgb_t *dest, float da,
-		      const rgb_t *src, float sa)
-{
-    res->r = dest->r * sa;
-    res->g = dest->g * sa;
-    res->b = dest->b * sa;
-
-    set_sat (res, get_sat (src) * da);
-    set_lum (res, sa * da, get_lum (dest) * sa);
-}
-
-/* 
- * Color
- *
- *     as * ad * B(s/as, d/as)
- *   = as * ad * set_lum (s/as, LUM (d/ad), 1)
- *   = set_lum (s * ad, as * LUM (d), as * ad)
- */
-static force_inline void
-blend_hsl_color (rgb_t *res,
-		 const rgb_t *dest, float da,
-		 const rgb_t *src, float sa)
-{
-    res->r = src->r * da;
-    res->g = src->g * da;
-    res->b = src->b * da;
-
-    set_lum (res, sa * da, get_lum (dest) * sa);
-}
-
-/*
- * Luminosity
- *
- *     as * ad * B(s/as, d/ad)
- *   = as * ad * set_lum (d/ad, LUM (s/as), 1)
- *   = set_lum (as * d, ad * LUM (s), as * ad)
- */
-static force_inline void
-blend_hsl_luminosity (rgb_t *res,
-		      const rgb_t *dest, float da,
-		      const rgb_t *src, float sa)
-{
-    res->r = dest->r * sa;
-    res->g = dest->g * sa;
-    res->b = dest->b * sa;
-
-    set_lum (res, sa * da, get_lum (src) * da);
-}
-
-#define MAKE_NON_SEPARABLE_PDF_COMBINERS(name)				\
-    static void								\
-    combine_ ## name ## _u_float (pixman_implementation_t *imp,		\
-				  pixman_op_t              op,		\
-				  float                   *dest,	\
-				  const float             *src,		\
-				  const float             *mask,	\
-				  int		           n_pixels)	\
-    {									\
-    	int i;								\
-									\
-	for (i = 0; i < 4 * n_pixels; i += 4)				\
-	{								\
-	    float sa, da;						\
-	    rgb_t sc, dc, rc;						\
-									\
-	    sa = src[i + 0];						\
-	    sc.r = src[i + 1];						\
-	    sc.g = src[i + 2];						\
-	    sc.b = src[i + 3];						\
-									\
-	    da = dest[i + 0];						\
-	    dc.r = dest[i + 1];						\
-	    dc.g = dest[i + 2];						\
-	    dc.b = dest[i + 3];						\
-									\
-	    if (mask)							\
-	    {								\
-		float ma = mask[i + 0];					\
-									\
-		/* Component alpha is not supported for HSL modes */	\
-		sa *= ma;						\
-		sc.r *= ma;						\
-		sc.g *= ma;						\
-		sc.g *= ma;						\
-	    }								\
-									\
-	    blend_ ## name (&rc, &dc, da, &sc, sa);			\
-									\
-	    dest[i + 0] = sa + da - sa * da;				\
-	    dest[i + 1] = (1 - sa) * dc.r + (1 - da) * sc.r + rc.r;	\
-	    dest[i + 2] = (1 - sa) * dc.g + (1 - da) * sc.g + rc.g;	\
-	    dest[i + 3] = (1 - sa) * dc.b + (1 - da) * sc.b + rc.b;	\
-	}								\
-    }
-
-MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_hue)
-MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_saturation)
-MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_color)
-MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_luminosity)
-
-void
-_pixman_setup_combiner_functions_float (pixman_implementation_t *imp)
-{
-    /* Unified alpha */
-    imp->combine_float[PIXMAN_OP_CLEAR] = combine_clear_u_float;
-    imp->combine_float[PIXMAN_OP_SRC] = combine_src_u_float;
-    imp->combine_float[PIXMAN_OP_DST] = combine_dst_u_float;
-    imp->combine_float[PIXMAN_OP_OVER] = combine_over_u_float;
-    imp->combine_float[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u_float;
-    imp->combine_float[PIXMAN_OP_IN] = combine_in_u_float;
-    imp->combine_float[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u_float;
-    imp->combine_float[PIXMAN_OP_OUT] = combine_out_u_float;
-    imp->combine_float[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u_float;
-    imp->combine_float[PIXMAN_OP_ATOP] = combine_atop_u_float;
-    imp->combine_float[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u_float;
-    imp->combine_float[PIXMAN_OP_XOR] = combine_xor_u_float;
-    imp->combine_float[PIXMAN_OP_ADD] = combine_add_u_float;
-    imp->combine_float[PIXMAN_OP_SATURATE] = combine_saturate_u_float;
-
-    /* Disjoint, unified */
-    imp->combine_float[PIXMAN_OP_DISJOINT_CLEAR] = combine_disjoint_clear_u_float;
-    imp->combine_float[PIXMAN_OP_DISJOINT_SRC] = combine_disjoint_src_u_float;
-    imp->combine_float[PIXMAN_OP_DISJOINT_DST] = combine_disjoint_dst_u_float;
-    imp->combine_float[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u_float;
-    imp->combine_float[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_disjoint_over_reverse_u_float;
-    imp->combine_float[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u_float;
-    imp->combine_float[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u_float;
-    imp->combine_float[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u_float;
-    imp->combine_float[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u_float;
-    imp->combine_float[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u_float;
-    imp->combine_float[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u_float;
-    imp->combine_float[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u_float;
-
-    /* Conjoint, unified */
-    imp->combine_float[PIXMAN_OP_CONJOINT_CLEAR] = combine_conjoint_clear_u_float;
-    imp->combine_float[PIXMAN_OP_CONJOINT_SRC] = combine_conjoint_src_u_float;
-    imp->combine_float[PIXMAN_OP_CONJOINT_DST] = combine_conjoint_dst_u_float;
-    imp->combine_float[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u_float;
-    imp->combine_float[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u_float;
-    imp->combine_float[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u_float;
-    imp->combine_float[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u_float;
-    imp->combine_float[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u_float;
-    imp->combine_float[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u_float;
-    imp->combine_float[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u_float;
-    imp->combine_float[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u_float;
-    imp->combine_float[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u_float;
-
-    /* PDF operators, unified */
-    imp->combine_float[PIXMAN_OP_MULTIPLY] = combine_multiply_u_float;
-    imp->combine_float[PIXMAN_OP_SCREEN] = combine_screen_u_float;
-    imp->combine_float[PIXMAN_OP_OVERLAY] = combine_overlay_u_float;
-    imp->combine_float[PIXMAN_OP_DARKEN] = combine_darken_u_float;
-    imp->combine_float[PIXMAN_OP_LIGHTEN] = combine_lighten_u_float;
-    imp->combine_float[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u_float;
-    imp->combine_float[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u_float;
-    imp->combine_float[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u_float;
-    imp->combine_float[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u_float;
-    imp->combine_float[PIXMAN_OP_DIFFERENCE] = combine_difference_u_float;
-    imp->combine_float[PIXMAN_OP_EXCLUSION] = combine_exclusion_u_float;
-
-    imp->combine_float[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u_float;
-    imp->combine_float[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u_float;
-    imp->combine_float[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u_float;
-    imp->combine_float[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u_float;
-
-    /* Component alpha combiners */
-    imp->combine_float_ca[PIXMAN_OP_CLEAR] = combine_clear_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_SRC] = combine_src_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_DST] = combine_dst_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_OVER] = combine_over_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_IN] = combine_in_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_OUT] = combine_out_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_ATOP] = combine_atop_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_XOR] = combine_xor_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_ADD] = combine_add_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca_float;
-
-    /* Disjoint CA */
-    imp->combine_float_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_disjoint_clear_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_DISJOINT_SRC] = combine_disjoint_src_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_DISJOINT_DST] = combine_disjoint_dst_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_disjoint_over_reverse_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca_float;
-
-    /* Conjoint CA */
-    imp->combine_float_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_conjoint_clear_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_CONJOINT_SRC] = combine_conjoint_src_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_CONJOINT_DST] = combine_conjoint_dst_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca_float;
-
-    /* PDF operators CA */
-    imp->combine_float_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_SCREEN] = combine_screen_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_DARKEN] = combine_darken_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca_float;
-    imp->combine_float_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca_float;
-
-    /* It is not clear that these make sense, so make them noops for now */
-    imp->combine_float_ca[PIXMAN_OP_HSL_HUE] = combine_dst_u_float;
-    imp->combine_float_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst_u_float;
-    imp->combine_float_ca[PIXMAN_OP_HSL_COLOR] = combine_dst_u_float;
-    imp->combine_float_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst_u_float;
-}
diff --git a/vendor/pixman/pixman/pixman-combine32.c b/vendor/pixman/pixman/pixman-combine32.c
deleted file mode 100644
index de51f64e1..000000000
--- a/vendor/pixman/pixman/pixman-combine32.c
+++ /dev/null
@@ -1,1189 +0,0 @@
-/*
- * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
- *             2005 Lars Knoll & Zack Rusin, Trolltech
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Keith Packard makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- */
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include <math.h>
-#include <string.h>
-
-#include "pixman-private.h"
-#include "pixman-combine32.h"
-
-/* component alpha helper functions */
-
-static void
-combine_mask_ca (uint32_t *src, uint32_t *mask)
-{
-    uint32_t a = *mask;
-
-    uint32_t x;
-    uint16_t xa;
-
-    if (!a)
-    {
-	*(src) = 0;
-	return;
-    }
-
-    x = *(src);
-    if (a == ~0)
-    {
-	x = x >> A_SHIFT;
-	x |= x << G_SHIFT;
-	x |= x << R_SHIFT;
-	*(mask) = x;
-	return;
-    }
-
-    xa = x >> A_SHIFT;
-    UN8x4_MUL_UN8x4 (x, a);
-    *(src) = x;
-    
-    UN8x4_MUL_UN8 (a, xa);
-    *(mask) = a;
-}
-
-static void
-combine_mask_value_ca (uint32_t *src, const uint32_t *mask)
-{
-    uint32_t a = *mask;
-    uint32_t x;
-
-    if (!a)
-    {
-	*(src) = 0;
-	return;
-    }
-
-    if (a == ~0)
-	return;
-
-    x = *(src);
-    UN8x4_MUL_UN8x4 (x, a);
-    *(src) = x;
-}
-
-static void
-combine_mask_alpha_ca (const uint32_t *src, uint32_t *mask)
-{
-    uint32_t a = *(mask);
-    uint32_t x;
-
-    if (!a)
-	return;
-
-    x = *(src) >> A_SHIFT;
-    if (x == MASK)
-	return;
-
-    if (a == ~0)
-    {
-	x |= x << G_SHIFT;
-	x |= x << R_SHIFT;
-	*(mask) = x;
-	return;
-    }
-
-    UN8x4_MUL_UN8 (a, x);
-    *(mask) = a;
-}
-
-/*
- * There are two ways of handling alpha -- either as a single unified value or
- * a separate value for each component, hence each macro must have two
- * versions.  The unified alpha version has a 'u' at the end of the name,
- * the component version has a 'ca'.  Similarly, functions which deal with
- * this difference will have two versions using the same convention.
- */
-
-static force_inline uint32_t
-combine_mask (const uint32_t *src, const uint32_t *mask, int i)
-{
-    uint32_t s, m;
-
-    if (mask)
-    {
-	m = *(mask + i) >> A_SHIFT;
-
-	if (!m)
-	    return 0;
-    }
-
-    s = *(src + i);
-
-    if (mask)
-	UN8x4_MUL_UN8 (s, m);
-
-    return s;
-}
-
-static void
-combine_clear (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               uint32_t *               dest,
-               const uint32_t *         src,
-               const uint32_t *         mask,
-               int                      width)
-{
-    memset (dest, 0, width * sizeof (uint32_t));
-}
-
-static void
-combine_dst (pixman_implementation_t *imp,
-	     pixman_op_t	      op,
-	     uint32_t *		      dest,
-	     const uint32_t *	      src,
-	     const uint32_t *         mask,
-	     int		      width)
-{
-    return;
-}
-
-static void
-combine_src_u (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               uint32_t *               dest,
-               const uint32_t *         src,
-               const uint32_t *         mask,
-               int                      width)
-{
-    int i;
-
-    if (!mask)
-    {
-	memcpy (dest, src, width * sizeof (uint32_t));
-    }
-    else
-    {
-	for (i = 0; i < width; ++i)
-	{
-	    uint32_t s = combine_mask (src, mask, i);
-
-	    *(dest + i) = s;
-	}
-    }
-}
-
-static void
-combine_over_u (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                uint32_t *               dest,
-                const uint32_t *         src,
-                const uint32_t *         mask,
-                int                      width)
-{
-    int i;
-
-    if (!mask)
-    {
-	for (i = 0; i < width; ++i)
-	{
-	    uint32_t s = *(src + i);
-	    uint32_t a = ALPHA_8 (s);
-	    if (a == 0xFF)
-	    {
-		*(dest + i) = s;
-	    }
-	    else if (s)
-	    {
-		uint32_t d = *(dest + i);
-		uint32_t ia = a ^ 0xFF;
-		UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
-		*(dest + i) = d;
-	    }
-	}
-    }
-    else
-    {
-	for (i = 0; i < width; ++i)
-	{
-	    uint32_t m = ALPHA_8 (*(mask + i));
-	    if (m == 0xFF)
-	    {
-		uint32_t s = *(src + i);
-		uint32_t a = ALPHA_8 (s);
-		if (a == 0xFF)
-		{
-		    *(dest + i) = s;
-		}
-		else if (s)
-		{
-		    uint32_t d = *(dest + i);
-		    uint32_t ia = a ^ 0xFF;
-		    UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
-		    *(dest + i) = d;
-		}
-	    }
-	    else if (m)
-	    {
-		uint32_t s = *(src + i);
-		if (s)
-		{
-		    uint32_t d = *(dest + i);
-		    UN8x4_MUL_UN8 (s, m);
-		    UN8x4_MUL_UN8_ADD_UN8x4 (d, ALPHA_8 (~s), s);
-		    *(dest + i) = d;
-		}
-	    }
-	}
-    }
-}
-
-static void
-combine_over_reverse_u (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        uint32_t *               dest,
-                        const uint32_t *         src,
-                        const uint32_t *         mask,
-                        int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint32_t ia = ALPHA_8 (~*(dest + i));
-	UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_in_u (pixman_implementation_t *imp,
-              pixman_op_t              op,
-              uint32_t *               dest,
-              const uint32_t *         src,
-              const uint32_t *         mask,
-              int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t a = ALPHA_8 (*(dest + i));
-	UN8x4_MUL_UN8 (s, a);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_in_reverse_u (pixman_implementation_t *imp,
-                      pixman_op_t              op,
-                      uint32_t *               dest,
-                      const uint32_t *         src,
-                      const uint32_t *         mask,
-                      int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint32_t a = ALPHA_8 (s);
-	UN8x4_MUL_UN8 (d, a);
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_out_u (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               uint32_t *               dest,
-               const uint32_t *         src,
-               const uint32_t *         mask,
-               int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t a = ALPHA_8 (~*(dest + i));
-	UN8x4_MUL_UN8 (s, a);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_out_reverse_u (pixman_implementation_t *imp,
-                       pixman_op_t              op,
-                       uint32_t *               dest,
-                       const uint32_t *         src,
-                       const uint32_t *         mask,
-                       int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint32_t a = ALPHA_8 (~s);
-	UN8x4_MUL_UN8 (d, a);
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_atop_u (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                uint32_t *               dest,
-                const uint32_t *         src,
-                const uint32_t *         mask,
-                int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint32_t dest_a = ALPHA_8 (d);
-	uint32_t src_ia = ALPHA_8 (~s);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_atop_reverse_u (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        uint32_t *               dest,
-                        const uint32_t *         src,
-                        const uint32_t *         mask,
-                        int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint32_t src_a = ALPHA_8 (s);
-	uint32_t dest_ia = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_xor_u (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               uint32_t *               dest,
-               const uint32_t *         src,
-               const uint32_t *         mask,
-               int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint32_t src_ia = ALPHA_8 (~s);
-	uint32_t dest_ia = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_add_u (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               uint32_t *               dest,
-               const uint32_t *         src,
-               const uint32_t *         mask,
-               int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	UN8x4_ADD_UN8x4 (d, s);
-	*(dest + i) = d;
-    }
-}
-
-/*
- * PDF blend modes:
- *
- * The following blend modes have been taken from the PDF ISO 32000
- * specification, which at this point in time is available from
- *
- *     http://www.adobe.com/devnet/pdf/pdf_reference.html
- *
- * The specific documents of interest are the PDF spec itself:
- *
- *     http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf
- *
- * chapters 11.3.5 and 11.3.6 and a later supplement for Adobe Acrobat
- * 9.1 and Reader 9.1:
- *
- *     http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000_1.pdf
- *
- * that clarifies the specifications for blend modes ColorDodge and
- * ColorBurn.
- *
- * The formula for computing the final pixel color given in 11.3.6 is:
- *
- *     αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
- *
- * with B() is the blend function. When B(Cb, Cs) = Cs, this formula
- * reduces to the regular OVER operator.
- *
- * Cs and Cb are not premultiplied, so in our implementation we instead
- * use:
- *
- *     cr = (1 – αs) × cb  +  (1 – αb) × cs  +  αb × αs × B (cb/αb, cs/αs)
- *
- * where cr, cs, and cb are premultiplied colors, and where the
- *
- *     αb × αs × B(cb/αb, cs/αs)
- *
- * part is first arithmetically simplified under the assumption that αb
- * and αs are not 0, and then updated to produce a meaningful result when
- * they are.
- *
- * For all the blend mode operators, the alpha channel is given by
- *
- *     αr = αs + αb + αb × αs
- */
-
-/*
- * Multiply
- *
- *      ad * as * B(d / ad, s / as)
- *    = ad * as * d/ad * s/as
- *    = d * s
- *
- */
-static void
-combine_multiply_u (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint32_t ss = s;
-	uint32_t src_ia = ALPHA_8 (~s);
-	uint32_t dest_ia = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (ss, dest_ia, d, src_ia);
-	UN8x4_MUL_UN8x4 (d, s);
-	UN8x4_ADD_UN8x4 (d, ss);
-
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_multiply_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               dest,
-                     const uint32_t *         src,
-                     const uint32_t *         mask,
-                     int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t m = *(mask + i);
-	uint32_t s = *(src + i);
-	uint32_t d = *(dest + i);
-	uint32_t r = d;
-	uint32_t dest_ia = ALPHA_8 (~d);
-
-	combine_mask_ca (&s, &m);
-
-	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (r, ~m, s, dest_ia);
-	UN8x4_MUL_UN8x4 (d, s);
-	UN8x4_ADD_UN8x4 (r, d);
-
-	*(dest + i) = r;
-    }
-}
-
-#define CLAMP(v, low, high)						\
-    do									\
-    {									\
-	if (v < (low))							\
-	    v = (low);							\
-	if (v > (high))							\
-	    v = (high);							\
-    } while (0)
-
-#define PDF_SEPARABLE_BLEND_MODE(name)					\
-    static void								\
-    combine_ ## name ## _u (pixman_implementation_t *imp,		\
-			    pixman_op_t              op,		\
-                            uint32_t *               dest,		\
-			    const uint32_t *         src,		\
-			    const uint32_t *         mask,		\
-			    int                      width)		\
-    {									\
-	int i;								\
-	for (i = 0; i < width; ++i)					\
-	{								\
-	    uint32_t s = combine_mask (src, mask, i);			\
-	    uint32_t d = *(dest + i);					\
-	    uint8_t sa = ALPHA_8 (s);					\
-	    uint8_t isa = ~sa;						\
-	    uint8_t da = ALPHA_8 (d);					\
-	    uint8_t ida = ~da;						\
-	    uint32_t ra, rr, rg, rb;					\
-	    								\
-	    ra = da * 0xff + sa * 0xff - sa * da;			\
-	    rr = isa * RED_8 (d) + ida * RED_8 (s);			\
-	    rg = isa * GREEN_8 (d) + ida * GREEN_8 (s);			\
-	    rb = isa * BLUE_8 (d) + ida * BLUE_8 (s);			\
-									\
-	    rr += blend_ ## name (RED_8 (d), da, RED_8 (s), sa);	\
-	    rg += blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), sa);    \
-	    rb += blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), sa);	\
-                                                                        \
-	    CLAMP (ra, 0, 255 * 255);				        \
-	    CLAMP (rr, 0, 255 * 255);				        \
-	    CLAMP (rg, 0, 255 * 255);				        \
-	    CLAMP (rb, 0, 255 * 255);				        \
-									\
-	    ra = DIV_ONE_UN8 (ra);					\
-	    rr = DIV_ONE_UN8 (rr);					\
-	    rg = DIV_ONE_UN8 (rg);					\
-	    rb = DIV_ONE_UN8 (rb);					\
-									\
-	    *(dest + i) = ra << 24 | rr << 16 | rg << 8 | rb;		\
-	}								\
-    }									\
-    									\
-    static void								\
-    combine_ ## name ## _ca (pixman_implementation_t *imp,		\
-			     pixman_op_t              op,		\
-                             uint32_t *               dest,		\
-			     const uint32_t *         src,		\
-			     const uint32_t *         mask,		\
-			     int                      width)		\
-    {									\
-	int i;								\
-	for (i = 0; i < width; ++i)					\
-	{								\
-	    uint32_t m = *(mask + i);					\
-	    uint32_t s = *(src + i);					\
-	    uint32_t d = *(dest + i);					\
-	    uint8_t da = ALPHA_8 (d);					\
-	    uint8_t ida = ~da;						\
-	    uint32_t ra, rr, rg, rb;					\
-	    uint8_t ira, iga, iba;					\
-	    								\
-	    combine_mask_ca (&s, &m);					\
-	    								\
-	    ira = ~RED_8 (m);						\
-	    iga = ~GREEN_8 (m);						\
-	    iba = ~BLUE_8 (m);						\
-									\
-	    ra = da * 0xff + ALPHA_8 (s) * 0xff - ALPHA_8 (s) * da;	\
-	    rr = ira * RED_8 (d) + ida * RED_8 (s);			\
-	    rg = iga * GREEN_8 (d) + ida * GREEN_8 (s);			\
-	    rb = iba * BLUE_8 (d) + ida * BLUE_8 (s);			\
-									\
-	    rr += blend_ ## name (RED_8 (d), da, RED_8 (s), RED_8 (m));	\
-	    rg += blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), GREEN_8 (m)); \
-	    rb += blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), BLUE_8 (m)); \
-									\
-	    CLAMP (ra, 0, 255 * 255);				        \
-	    CLAMP (rr, 0, 255 * 255);				        \
-	    CLAMP (rg, 0, 255 * 255);				        \
-	    CLAMP (rb, 0, 255 * 255);				        \
-									\
-	    ra = DIV_ONE_UN8 (ra);					\
-	    rr = DIV_ONE_UN8 (rr);					\
-	    rg = DIV_ONE_UN8 (rg);					\
-	    rb = DIV_ONE_UN8 (rb);					\
-									\
-	    *(dest + i) = ra << 24 | rr << 16 | rg << 8 | rb;		\
-	}								\
-    }
-
-/*
- * Screen
- *
- *      ad * as * B(d/ad, s/as)
- *    = ad * as * (d/ad + s/as - s/as * d/ad)
- *    = ad * s + as * d - s * d
- */
-static inline int32_t
-blend_screen (int32_t d, int32_t ad, int32_t s, int32_t as)
-{
-    return s * ad + d * as - s * d;
-}
-
-PDF_SEPARABLE_BLEND_MODE (screen)
-
-/*
- * Overlay
- *
- *     ad * as * B(d/ad, s/as)
- *   = ad * as * Hardlight (s, d)
- *   = if (d / ad < 0.5)
- *         as * ad * Multiply (s/as, 2 * d/ad)
- *     else
- *         as * ad * Screen (s/as, 2 * d / ad - 1)
- *   = if (d < 0.5 * ad)
- *         as * ad * s/as * 2 * d /ad
- *     else
- *         as * ad * (s/as + 2 * d / ad - 1 - s / as * (2 * d / ad - 1))
- *   = if (2 * d < ad)
- *         2 * s * d
- *     else
- *         ad * s + 2 * as * d - as * ad - ad * s * (2 * d / ad - 1)
- *   = if (2 * d < ad)
- *         2 * s * d
- *     else
- *         as * ad - 2 * (ad - d) * (as - s)
- */
-static inline int32_t
-blend_overlay (int32_t d, int32_t ad, int32_t s, int32_t as)
-{
-    uint32_t r;
-
-    if (2 * d < ad)
-	r = 2 * s * d;
-    else
-	r = as * ad - 2 * (ad - d) * (as - s);
-
-    return r;
-}
-
-PDF_SEPARABLE_BLEND_MODE (overlay)
-
-/*
- * Darken
- *
- *     ad * as * B(d/ad, s/as)
- *   = ad * as * MIN(d/ad, s/as)
- *   = MIN (as * d, ad * s)
- */
-static inline int32_t
-blend_darken (int32_t d, int32_t ad, int32_t s, int32_t as)
-{
-    s = ad * s;
-    d = as * d;
-
-    return s > d ? d : s;
-}
-
-PDF_SEPARABLE_BLEND_MODE (darken)
-
-/*
- * Lighten
- *
- *     ad * as * B(d/ad, s/as)
- *   = ad * as * MAX(d/ad, s/as)
- *   = MAX (as * d, ad * s)
- */
-static inline int32_t
-blend_lighten (int32_t d, int32_t ad, int32_t s, int32_t as)
-{
-    s = ad * s;
-    d = as * d;
-    
-    return s > d ? s : d;
-}
-
-PDF_SEPARABLE_BLEND_MODE (lighten)
-
-/*
- * Hard light
- *
- *     ad * as * B(d/ad, s/as)
- *   = if (s/as <= 0.5)
- *         ad * as * Multiply (d/ad, 2 * s/as)
- *     else
- *         ad * as * Screen (d/ad, 2 * s/as - 1)
- *   = if 2 * s <= as
- *         ad * as * d/ad * 2 * s / as
- *     else
- *         ad * as * (d/ad + (2 * s/as - 1) + d/ad * (2 * s/as - 1))
- *   = if 2 * s <= as
- *         2 * s * d
- *     else
- *         as * ad - 2 * (ad - d) * (as - s)
- */
-static inline int32_t
-blend_hard_light (int32_t d, int32_t ad, int32_t s, int32_t as)
-{
-    if (2 * s < as)
-	return 2 * s * d;
-    else
-	return as * ad - 2 * (ad - d) * (as - s);
-}
-
-PDF_SEPARABLE_BLEND_MODE (hard_light)
-
-/*
- * Difference
- *
- *     ad * as * B(s/as, d/ad)
- *   = ad * as * abs (s/as - d/ad)
- *   = if (s/as <= d/ad)
- *         ad * as * (d/ad - s/as)
- *     else
- *         ad * as * (s/as - d/ad)
- *   = if (ad * s <= as * d)
- *        as * d - ad * s
- *     else
- *        ad * s - as * d
- */
-static inline int32_t
-blend_difference (int32_t d, int32_t ad, int32_t s, int32_t as)
-{
-    int32_t das = d * as;
-    int32_t sad = s * ad;
-
-    if (sad < das)
-	return das - sad;
-    else
-	return sad - das;
-}
-
-PDF_SEPARABLE_BLEND_MODE (difference)
-
-/*
- * Exclusion
- *
- *     ad * as * B(s/as, d/ad)
- *   = ad * as * (d/ad + s/as - 2 * d/ad * s/as)
- *   = as * d + ad * s - 2 * s * d
- */
-
-/* This can be made faster by writing it directly and not using
- * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */
-
-static inline int32_t
-blend_exclusion (int32_t d, int32_t ad, int32_t s, int32_t as)
-{
-    return s * ad + d * as - 2 * d * s;
-}
-
-PDF_SEPARABLE_BLEND_MODE (exclusion)
-
-#undef PDF_SEPARABLE_BLEND_MODE
-
-/* Component alpha combiners */
-
-static void
-combine_clear_ca (pixman_implementation_t *imp,
-                  pixman_op_t              op,
-                  uint32_t *                dest,
-                  const uint32_t *          src,
-                  const uint32_t *          mask,
-                  int                      width)
-{
-    memset (dest, 0, width * sizeof(uint32_t));
-}
-
-static void
-combine_src_ca (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                uint32_t *                dest,
-                const uint32_t *          src,
-                const uint32_t *          mask,
-                int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-
-	combine_mask_value_ca (&s, &m);
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_over_ca (pixman_implementation_t *imp,
-                 pixman_op_t              op,
-                 uint32_t *                dest,
-                 const uint32_t *          src,
-                 const uint32_t *          mask,
-                 int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t a;
-
-	combine_mask_ca (&s, &m);
-
-	a = ~m;
-	if (a)
-	{
-	    uint32_t d = *(dest + i);
-	    UN8x4_MUL_UN8x4_ADD_UN8x4 (d, a, s);
-	    s = d;
-	}
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_over_reverse_ca (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         uint32_t *                dest,
-                         const uint32_t *          src,
-                         const uint32_t *          mask,
-                         int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t d = *(dest + i);
-	uint32_t a = ~d >> A_SHIFT;
-
-	if (a)
-	{
-	    uint32_t s = *(src + i);
-	    uint32_t m = *(mask + i);
-
-	    UN8x4_MUL_UN8x4 (s, m);
-	    UN8x4_MUL_UN8_ADD_UN8x4 (s, a, d);
-
-	    *(dest + i) = s;
-	}
-    }
-}
-
-static void
-combine_in_ca (pixman_implementation_t *imp,
-               pixman_op_t              op,
-               uint32_t *                dest,
-               const uint32_t *          src,
-               const uint32_t *          mask,
-               int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t d = *(dest + i);
-	uint16_t a = d >> A_SHIFT;
-	uint32_t s = 0;
-
-	if (a)
-	{
-	    uint32_t m = *(mask + i);
-
-	    s = *(src + i);
-	    combine_mask_value_ca (&s, &m);
-
-	    if (a != MASK)
-		UN8x4_MUL_UN8 (s, a);
-	}
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_in_reverse_ca (pixman_implementation_t *imp,
-                       pixman_op_t              op,
-                       uint32_t *                dest,
-                       const uint32_t *          src,
-                       const uint32_t *          mask,
-                       int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t a;
-
-	combine_mask_alpha_ca (&s, &m);
-
-	a = m;
-	if (a != ~0)
-	{
-	    uint32_t d = 0;
-
-	    if (a)
-	    {
-		d = *(dest + i);
-		UN8x4_MUL_UN8x4 (d, a);
-	    }
-
-	    *(dest + i) = d;
-	}
-    }
-}
-
-static void
-combine_out_ca (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                uint32_t *                dest,
-                const uint32_t *          src,
-                const uint32_t *          mask,
-                int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t d = *(dest + i);
-	uint16_t a = ~d >> A_SHIFT;
-	uint32_t s = 0;
-
-	if (a)
-	{
-	    uint32_t m = *(mask + i);
-
-	    s = *(src + i);
-	    combine_mask_value_ca (&s, &m);
-
-	    if (a != MASK)
-		UN8x4_MUL_UN8 (s, a);
-	}
-
-	*(dest + i) = s;
-    }
-}
-
-static void
-combine_out_reverse_ca (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        uint32_t *                dest,
-                        const uint32_t *          src,
-                        const uint32_t *          mask,
-                        int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t a;
-
-	combine_mask_alpha_ca (&s, &m);
-
-	a = ~m;
-	if (a != ~0)
-	{
-	    uint32_t d = 0;
-
-	    if (a)
-	    {
-		d = *(dest + i);
-		UN8x4_MUL_UN8x4 (d, a);
-	    }
-
-	    *(dest + i) = d;
-	}
-    }
-}
-
-static void
-combine_atop_ca (pixman_implementation_t *imp,
-                 pixman_op_t              op,
-                 uint32_t *                dest,
-                 const uint32_t *          src,
-                 const uint32_t *          mask,
-                 int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t d = *(dest + i);
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t ad;
-	uint16_t as = d >> A_SHIFT;
-
-	combine_mask_ca (&s, &m);
-
-	ad = ~m;
-
-	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
-
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_atop_reverse_ca (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         uint32_t *                dest,
-                         const uint32_t *          src,
-                         const uint32_t *          mask,
-                         int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t d = *(dest + i);
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t ad;
-	uint16_t as = ~d >> A_SHIFT;
-
-	combine_mask_ca (&s, &m);
-
-	ad = m;
-
-	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
-
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_xor_ca (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                uint32_t *                dest,
-                const uint32_t *          src,
-                const uint32_t *          mask,
-                int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t d = *(dest + i);
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t ad;
-	uint16_t as = ~d >> A_SHIFT;
-
-	combine_mask_ca (&s, &m);
-
-	ad = ~m;
-
-	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
-
-	*(dest + i) = d;
-    }
-}
-
-static void
-combine_add_ca (pixman_implementation_t *imp,
-                pixman_op_t              op,
-                uint32_t *                dest,
-                const uint32_t *          src,
-                const uint32_t *          mask,
-                int                      width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t d = *(dest + i);
-
-	combine_mask_value_ca (&s, &m);
-
-	UN8x4_ADD_UN8x4 (d, s);
-
-	*(dest + i) = d;
-    }
-}
-
-void
-_pixman_setup_combiner_functions_32 (pixman_implementation_t *imp)
-{
-    /* Unified alpha */
-    imp->combine_32[PIXMAN_OP_CLEAR] = combine_clear;
-    imp->combine_32[PIXMAN_OP_SRC] = combine_src_u;
-    imp->combine_32[PIXMAN_OP_DST] = combine_dst;
-    imp->combine_32[PIXMAN_OP_OVER] = combine_over_u;
-    imp->combine_32[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u;
-    imp->combine_32[PIXMAN_OP_IN] = combine_in_u;
-    imp->combine_32[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u;
-    imp->combine_32[PIXMAN_OP_OUT] = combine_out_u;
-    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u;
-    imp->combine_32[PIXMAN_OP_ATOP] = combine_atop_u;
-    imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u;
-    imp->combine_32[PIXMAN_OP_XOR] = combine_xor_u;
-    imp->combine_32[PIXMAN_OP_ADD] = combine_add_u;
-
-    imp->combine_32[PIXMAN_OP_MULTIPLY] = combine_multiply_u;
-    imp->combine_32[PIXMAN_OP_SCREEN] = combine_screen_u;
-    imp->combine_32[PIXMAN_OP_OVERLAY] = combine_overlay_u;
-    imp->combine_32[PIXMAN_OP_DARKEN] = combine_darken_u;
-    imp->combine_32[PIXMAN_OP_LIGHTEN] = combine_lighten_u;
-    imp->combine_32[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u;
-    imp->combine_32[PIXMAN_OP_DIFFERENCE] = combine_difference_u;
-    imp->combine_32[PIXMAN_OP_EXCLUSION] = combine_exclusion_u;
-
-    /* Component alpha combiners */
-    imp->combine_32_ca[PIXMAN_OP_CLEAR] = combine_clear_ca;
-    imp->combine_32_ca[PIXMAN_OP_SRC] = combine_src_ca;
-    /* dest */
-    imp->combine_32_ca[PIXMAN_OP_OVER] = combine_over_ca;
-    imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_IN] = combine_in_ca;
-    imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_OUT] = combine_out_ca;
-    imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_ATOP] = combine_atop_ca;
-    imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_XOR] = combine_xor_ca;
-    imp->combine_32_ca[PIXMAN_OP_ADD] = combine_add_ca;
-
-    imp->combine_32_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca;
-    imp->combine_32_ca[PIXMAN_OP_SCREEN] = combine_screen_ca;
-    imp->combine_32_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca;
-    imp->combine_32_ca[PIXMAN_OP_DARKEN] = combine_darken_ca;
-    imp->combine_32_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca;
-    imp->combine_32_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca;
-    imp->combine_32_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca;
-    imp->combine_32_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca;
-}
diff --git a/vendor/pixman/pixman/pixman-combine32.h b/vendor/pixman/pixman/pixman-combine32.h
deleted file mode 100644
index 59bb2477a..000000000
--- a/vendor/pixman/pixman/pixman-combine32.h
+++ /dev/null
@@ -1,272 +0,0 @@
-#define COMPONENT_SIZE 8
-#define MASK 0xff
-#define ONE_HALF 0x80
-
-#define A_SHIFT 8 * 3
-#define R_SHIFT 8 * 2
-#define G_SHIFT 8
-#define A_MASK 0xff000000
-#define R_MASK 0xff0000
-#define G_MASK 0xff00
-
-#define RB_MASK 0xff00ff
-#define AG_MASK 0xff00ff00
-#define RB_ONE_HALF 0x800080
-#define RB_MASK_PLUS_ONE 0x1000100
-
-#define ALPHA_8(x) ((x) >> A_SHIFT)
-#define RED_8(x) (((x) >> R_SHIFT) & MASK)
-#define GREEN_8(x) (((x) >> G_SHIFT) & MASK)
-#define BLUE_8(x) ((x) & MASK)
-
-/*
- * ARMv6 has UQADD8 instruction, which implements unsigned saturated
- * addition for 8-bit values packed in 32-bit registers. It is very useful
- * for UN8x4_ADD_UN8x4, UN8_rb_ADD_UN8_rb and ADD_UN8 macros (which would
- * otherwise need a lot of arithmetic operations to simulate this operation).
- * Since most of the major ARM linux distros are built for ARMv7, we are
- * much less dependent on runtime CPU detection and can get practical
- * benefits from conditional compilation here for a lot of users.
- */
-
-#if defined(USE_GCC_INLINE_ASM) && defined(__arm__) && \
-    !defined(__aarch64__) && (!defined(__thumb__) || defined(__thumb2__))
-#if defined(__ARM_ARCH_6__)   || defined(__ARM_ARCH_6J__)  || \
-    defined(__ARM_ARCH_6K__)  || defined(__ARM_ARCH_6Z__)  || \
-    defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \
-    defined(__ARM_ARCH_6M__)  || defined(__ARM_ARCH_7__)   || \
-    defined(__ARM_ARCH_7A__)  || defined(__ARM_ARCH_7R__)  || \
-    defined(__ARM_ARCH_7M__)  || defined(__ARM_ARCH_7EM__)
-
-static force_inline uint32_t
-un8x4_add_un8x4 (uint32_t x, uint32_t y)
-{
-    uint32_t t;
-    asm ("uqadd8 %0, %1, %2" : "=r" (t) : "%r" (x), "r" (y));
-    return t;
-}
-
-#define UN8x4_ADD_UN8x4(x, y) \
-    ((x) = un8x4_add_un8x4 ((x), (y)))
-
-#define UN8_rb_ADD_UN8_rb(x, y, t) \
-    ((t) = un8x4_add_un8x4 ((x), (y)), (x) = (t))
-
-#define ADD_UN8(x, y, t) \
-    ((t) = (x), un8x4_add_un8x4 ((t), (y)))
-
-#endif
-#endif
-
-/*****************************************************************************/
-
-/*
- * Helper macros.
- */
-
-#define MUL_UN8(a, b, t)						\
-    ((t) = (a) * (uint16_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT ))
-
-#define DIV_UN8(a, b)							\
-    (((uint16_t) (a) * MASK + ((b) / 2)) / (b))
-
-#ifndef ADD_UN8
-#define ADD_UN8(x, y, t)				     \
-    ((t) = (x) + (y),					     \
-     (uint32_t) (uint8_t) ((t) | (0 - ((t) >> G_SHIFT))))
-#endif
-
-#define DIV_ONE_UN8(x)							\
-    (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT)
-
-/*
- * The methods below use some tricks to be able to do two color
- * components at the same time.
- */
-
-/*
- * x_rb = (x_rb * a) / 255
- */
-#define UN8_rb_MUL_UN8(x, a, t)						\
-    do									\
-    {									\
-	t  = ((x) & RB_MASK) * (a);					\
-	t += RB_ONE_HALF;						\
-	x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;		\
-	x &= RB_MASK;							\
-    } while (0)
-
-/*
- * x_rb = min (x_rb + y_rb, 255)
- */
-#ifndef UN8_rb_ADD_UN8_rb
-#define UN8_rb_ADD_UN8_rb(x, y, t)					\
-    do									\
-    {									\
-	t = ((x) + (y));						\
-	t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);		\
-	x = (t & RB_MASK);						\
-    } while (0)
-#endif
-
-/*
- * x_rb = (x_rb * a_rb) / 255
- */
-#define UN8_rb_MUL_UN8_rb(x, a, t)					\
-    do									\
-    {									\
-	t  = (x & MASK) * (a & MASK);					\
-	t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK);			\
-	t += RB_ONE_HALF;						\
-	t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;		\
-	x = t & RB_MASK;						\
-    } while (0)
-
-/*
- * x_c = (x_c * a) / 255
- */
-#define UN8x4_MUL_UN8(x, a)						\
-    do									\
-    {									\
-	uint32_t r1__, r2__, t__;					\
-									\
-	r1__ = (x);							\
-	UN8_rb_MUL_UN8 (r1__, (a), t__);				\
-									\
-	r2__ = (x) >> G_SHIFT;						\
-	UN8_rb_MUL_UN8 (r2__, (a), t__);				\
-									\
-	(x) = r1__ | (r2__ << G_SHIFT);					\
-    } while (0)
-
-/*
- * x_c = (x_c * a) / 255 + y_c
- */
-#define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y)				\
-    do									\
-    {									\
-	uint32_t r1__, r2__, r3__, t__;					\
-									\
-	r1__ = (x);							\
-	r2__ = (y) & RB_MASK;						\
-	UN8_rb_MUL_UN8 (r1__, (a), t__);				\
-	UN8_rb_ADD_UN8_rb (r1__, r2__, t__);				\
-									\
-	r2__ = (x) >> G_SHIFT;						\
-	r3__ = ((y) >> G_SHIFT) & RB_MASK;				\
-	UN8_rb_MUL_UN8 (r2__, (a), t__);				\
-	UN8_rb_ADD_UN8_rb (r2__, r3__, t__);				\
-									\
-	(x) = r1__ | (r2__ << G_SHIFT);					\
-    } while (0)
-
-/*
- * x_c = (x_c * a + y_c * b) / 255
- */
-#define UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8(x, a, y, b)			\
-    do									\
-    {									\
-	uint32_t r1__, r2__, r3__, t__;					\
-									\
-	r1__ = (x);							\
-	r2__ = (y);							\
-	UN8_rb_MUL_UN8 (r1__, (a), t__);				\
-	UN8_rb_MUL_UN8 (r2__, (b), t__);				\
-	UN8_rb_ADD_UN8_rb (r1__, r2__, t__);				\
-									\
-	r2__ = ((x) >> G_SHIFT);					\
-	r3__ = ((y) >> G_SHIFT);					\
-	UN8_rb_MUL_UN8 (r2__, (a), t__);				\
-	UN8_rb_MUL_UN8 (r3__, (b), t__);				\
-	UN8_rb_ADD_UN8_rb (r2__, r3__, t__);				\
-									\
-	(x) = r1__ | (r2__ << G_SHIFT);					\
-    } while (0)
-
-/*
- * x_c = (x_c * a_c) / 255
- */
-#define UN8x4_MUL_UN8x4(x, a)						\
-    do									\
-    {									\
-	uint32_t r1__, r2__, r3__, t__;					\
-									\
-	r1__ = (x);							\
-	r2__ = (a);							\
-	UN8_rb_MUL_UN8_rb (r1__, r2__, t__);				\
-									\
-	r2__ = (x) >> G_SHIFT;						\
-	r3__ = (a) >> G_SHIFT;						\
-	UN8_rb_MUL_UN8_rb (r2__, r3__, t__);				\
-									\
-	(x) = r1__ | (r2__ << G_SHIFT);					\
-    } while (0)
-
-/*
- * x_c = (x_c * a_c) / 255 + y_c
- */
-#define UN8x4_MUL_UN8x4_ADD_UN8x4(x, a, y)				\
-    do									\
-    {									\
-	uint32_t r1__, r2__, r3__, t__;					\
-									\
-	r1__ = (x);							\
-	r2__ = (a);							\
-	UN8_rb_MUL_UN8_rb (r1__, r2__, t__);				\
-	r2__ = (y) & RB_MASK;						\
-	UN8_rb_ADD_UN8_rb (r1__, r2__, t__);				\
-									\
-	r2__ = ((x) >> G_SHIFT);					\
-	r3__ = ((a) >> G_SHIFT);					\
-	UN8_rb_MUL_UN8_rb (r2__, r3__, t__);				\
-	r3__ = ((y) >> G_SHIFT) & RB_MASK;				\
-	UN8_rb_ADD_UN8_rb (r2__, r3__, t__);				\
-									\
-	(x) = r1__ | (r2__ << G_SHIFT);					\
-    } while (0)
-
-/*
- * x_c = (x_c * a_c + y_c * b) / 255
- */
-#define UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8(x, a, y, b)			\
-    do									\
-    {									\
-	uint32_t r1__, r2__, r3__, t__;					\
-									\
-	r1__ = (x);							\
-	r2__ = (a);							\
-	UN8_rb_MUL_UN8_rb (r1__, r2__, t__);				\
-	r2__ = (y);							\
-	UN8_rb_MUL_UN8 (r2__, (b), t__);				\
-	UN8_rb_ADD_UN8_rb (r1__, r2__, t__);				\
-									\
-	r2__ = (x) >> G_SHIFT;						\
-	r3__ = (a) >> G_SHIFT;						\
-	UN8_rb_MUL_UN8_rb (r2__, r3__, t__);				\
-	r3__ = (y) >> G_SHIFT;						\
-	UN8_rb_MUL_UN8 (r3__, (b), t__);				\
-	UN8_rb_ADD_UN8_rb (r2__, r3__, t__);				\
-									\
-	x = r1__ | (r2__ << G_SHIFT);					\
-    } while (0)
-
-/*
-  x_c = min(x_c + y_c, 255)
-*/
-#ifndef UN8x4_ADD_UN8x4
-#define UN8x4_ADD_UN8x4(x, y)						\
-    do									\
-    {									\
-	uint32_t r1__, r2__, r3__, t__;					\
-									\
-	r1__ = (x) & RB_MASK;						\
-	r2__ = (y) & RB_MASK;						\
-	UN8_rb_ADD_UN8_rb (r1__, r2__, t__);				\
-									\
-	r2__ = ((x) >> G_SHIFT) & RB_MASK;				\
-	r3__ = ((y) >> G_SHIFT) & RB_MASK;				\
-	UN8_rb_ADD_UN8_rb (r2__, r3__, t__);				\
-									\
-	x = r1__ | (r2__ << G_SHIFT);					\
-    } while (0)
-#endif
diff --git a/vendor/pixman/pixman/pixman-compiler.h b/vendor/pixman/pixman/pixman-compiler.h
deleted file mode 100644
index 639415693..000000000
--- a/vendor/pixman/pixman/pixman-compiler.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/* Pixman uses some non-standard compiler features. This file ensures
- * they exist
- *
- * The features are:
- *
- *    FUNC	     must be defined to expand to the current function
- *    PIXMAN_EXPORT  should be defined to whatever is required to
- *                   export functions from a shared library
- *    limits	     limits for various types must be defined
- *    inline         must be defined
- *    force_inline   must be defined
- */
-#if defined (__GNUC__)
-#  define FUNC     ((const char*) (__PRETTY_FUNCTION__))
-#elif defined (__sun) || (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L)
-#  define FUNC     ((const char*) (__func__))
-#else
-#  define FUNC     ((const char*) ("???"))
-#endif
-
-#if defined (__GNUC__)
-#  define unlikely(expr) __builtin_expect ((expr), 0)
-#else
-#  define unlikely(expr)  (expr)
-#endif
-
-#if defined (__GNUC__)
-#  define MAYBE_UNUSED  __attribute__((unused))
-#else
-#  define MAYBE_UNUSED
-#endif
-
-#ifndef INT16_MIN
-# define INT16_MIN              (-32767-1)
-#endif
-
-#ifndef INT16_MAX
-# define INT16_MAX              (32767)
-#endif
-
-#ifndef INT32_MIN
-# define INT32_MIN              (-2147483647-1)
-#endif
-
-#ifndef INT32_MAX
-# define INT32_MAX              (2147483647)
-#endif
-
-#ifndef UINT32_MIN
-# define UINT32_MIN             (0)
-#endif
-
-#ifndef UINT32_MAX
-# define UINT32_MAX             (4294967295U)
-#endif
-
-#ifndef INT64_MIN
-# define INT64_MIN              (-9223372036854775807-1)
-#endif
-
-#ifndef INT64_MAX
-# define INT64_MAX              (9223372036854775807)
-#endif
-
-#ifndef SIZE_MAX
-# define SIZE_MAX               ((size_t)-1)
-#endif
-
-
-#ifndef M_PI
-# define M_PI			3.14159265358979323846
-#endif
-
-#ifdef _MSC_VER
-/* 'inline' is available only in C++ in MSVC */
-#   define inline __inline
-#   define force_inline __forceinline
-#   define noinline __declspec(noinline)
-#elif defined __GNUC__ || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
-#   define inline __inline__
-#   define force_inline __inline__ __attribute__ ((__always_inline__))
-#   define noinline __attribute__((noinline))
-#else
-#   ifndef force_inline
-#      define force_inline inline
-#   endif
-#   ifndef noinline
-#      define noinline
-#   endif
-#endif
-
-/* GCC visibility */
-#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(_WIN32)
-#   define PIXMAN_EXPORT __attribute__ ((visibility("default")))
-/* Sun Studio 8 visibility */
-#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
-#   define PIXMAN_EXPORT __global
-#elif defined (_MSC_VER) || defined(__MINGW32__)
-#   define PIXMAN_EXPORT PIXMAN_API
-#else
-#   define PIXMAN_EXPORT
-#endif
-
-/* member offsets */
-#define CONTAINER_OF(type, member, data)				\
-    ((type *)(((uint8_t *)data) - offsetof (type, member)))
-
-/* TLS */
-#if defined(PIXMAN_NO_TLS)
-
-#   define PIXMAN_DEFINE_THREAD_LOCAL(type, name)			\
-    static type name;
-#   define PIXMAN_GET_THREAD_LOCAL(name)				\
-    (&name)
-
-#elif defined(TLS)
-
-#   define PIXMAN_DEFINE_THREAD_LOCAL(type, name)			\
-    static TLS type name;
-#   define PIXMAN_GET_THREAD_LOCAL(name)				\
-    (&name)
-
-#elif defined(__MINGW32__)
-
-#   define _NO_W32_PSEUDO_MODIFIERS
-#   include <windows.h>
-
-#   define PIXMAN_DEFINE_THREAD_LOCAL(type, name)			\
-    static volatile int tls_ ## name ## _initialized = 0;		\
-    static void *tls_ ## name ## _mutex = NULL;				\
-    static unsigned tls_ ## name ## _index;				\
-									\
-    static type *							\
-    tls_ ## name ## _alloc (void)					\
-    {									\
-        type *value = calloc (1, sizeof (type));			\
-        if (value)							\
-            TlsSetValue (tls_ ## name ## _index, value);		\
-        return value;							\
-    }									\
-									\
-    static force_inline type *						\
-    tls_ ## name ## _get (void)						\
-    {									\
-	type *value;							\
-	if (!tls_ ## name ## _initialized)				\
-	{								\
-	    if (!tls_ ## name ## _mutex)				\
-	    {								\
-		void *mutex = CreateMutexA (NULL, 0, NULL);		\
-		if (InterlockedCompareExchangePointer (			\
-			&tls_ ## name ## _mutex, mutex, NULL) != NULL)	\
-		{							\
-		    CloseHandle (mutex);				\
-		}							\
-	    }								\
-	    WaitForSingleObject (tls_ ## name ## _mutex, 0xFFFFFFFF);	\
-	    if (!tls_ ## name ## _initialized)				\
-	    {								\
-		tls_ ## name ## _index = TlsAlloc ();			\
-		tls_ ## name ## _initialized = 1;			\
-	    }								\
-	    ReleaseMutex (tls_ ## name ## _mutex);			\
-	}								\
-	if (tls_ ## name ## _index == 0xFFFFFFFF)			\
-	    return NULL;						\
-	value = TlsGetValue (tls_ ## name ## _index);			\
-	if (!value)							\
-	    value = tls_ ## name ## _alloc ();				\
-	return value;							\
-    }
-
-#   define PIXMAN_GET_THREAD_LOCAL(name)				\
-    tls_ ## name ## _get ()
-
-#elif defined(_MSC_VER)
-
-#   define PIXMAN_DEFINE_THREAD_LOCAL(type, name)			\
-    static __declspec(thread) type name;
-#   define PIXMAN_GET_THREAD_LOCAL(name)				\
-    (&name)
-
-#elif defined(HAVE_PTHREADS)
-
-#include <pthread.h>
-
-#  define PIXMAN_DEFINE_THREAD_LOCAL(type, name)			\
-    static pthread_once_t tls_ ## name ## _once_control = PTHREAD_ONCE_INIT; \
-    static pthread_key_t tls_ ## name ## _key;				\
-									\
-    static void								\
-    tls_ ## name ## _destroy_value (void *value)			\
-    {									\
-	free (value);							\
-    }									\
-									\
-    static void								\
-    tls_ ## name ## _make_key (void)					\
-    {									\
-	pthread_key_create (&tls_ ## name ## _key,			\
-			    tls_ ## name ## _destroy_value);		\
-    }									\
-									\
-    static type *							\
-    tls_ ## name ## _alloc (void)					\
-    {									\
-	type *value = calloc (1, sizeof (type));			\
-	if (value)							\
-	    pthread_setspecific (tls_ ## name ## _key, value);		\
-	return value;							\
-    }									\
-									\
-    static force_inline type *						\
-    tls_ ## name ## _get (void)						\
-    {									\
-	type *value = NULL;						\
-	if (pthread_once (&tls_ ## name ## _once_control,		\
-			  tls_ ## name ## _make_key) == 0)		\
-	{								\
-	    value = pthread_getspecific (tls_ ## name ## _key);		\
-	    if (!value)							\
-		value = tls_ ## name ## _alloc ();			\
-	}								\
-	return value;							\
-    }
-
-#   define PIXMAN_GET_THREAD_LOCAL(name)				\
-    tls_ ## name ## _get ()
-
-#else
-
-#    error "Unknown thread local support for this system. Pixman will not work with multiple threads. Define PIXMAN_NO_TLS to acknowledge and accept this limitation and compile pixman without thread-safety support."
-
-#endif
diff --git a/vendor/pixman/pixman/pixman-conical-gradient.c b/vendor/pixman/pixman/pixman-conical-gradient.c
deleted file mode 100644
index 37dfffd73..000000000
--- a/vendor/pixman/pixman/pixman-conical-gradient.c
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
- *             2005 Lars Knoll & Zack Rusin, Trolltech
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Keith Packard makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include <stdlib.h>
-#include <math.h>
-#include "pixman-private.h"
-
-static force_inline double
-coordinates_to_parameter (double x, double y, double angle)
-{
-    double t;
-
-    t = atan2 (y, x) + angle;
-
-    while (t < 0)
-	t += 2 * M_PI;
-
-    while (t >= 2 * M_PI)
-	t -= 2 * M_PI;
-
-    return 1 - t * (1 / (2 * M_PI)); /* Scale t to [0, 1] and
-				      * make rotation CCW
-				      */
-}
-
-static uint32_t *
-conical_get_scanline (pixman_iter_t                 *iter,
-		      const uint32_t                *mask,
-		      int                            Bpp,
-		      pixman_gradient_walker_write_t write_pixel)
-{
-    pixman_image_t *image = iter->image;
-    int x = iter->x;
-    int y = iter->y;
-    int width = iter->width;
-    uint32_t *buffer = iter->buffer;
-
-    gradient_t *gradient = (gradient_t *)image;
-    conical_gradient_t *conical = (conical_gradient_t *)image;
-    uint32_t       *end = buffer + width * (Bpp / 4);
-    pixman_gradient_walker_t walker;
-    pixman_bool_t affine = TRUE;
-    double cx = 1.;
-    double cy = 0.;
-    double cz = 0.;
-    double rx = x + 0.5;
-    double ry = y + 0.5;
-    double rz = 1.;
-
-    _pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
-
-    if (image->common.transform)
-    {
-	pixman_vector_t v;
-
-	/* reference point is the center of the pixel */
-	v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
-	v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
-	v.vector[2] = pixman_fixed_1;
-
-	if (!pixman_transform_point_3d (image->common.transform, &v))
-	    return iter->buffer;
-
-	cx = image->common.transform->matrix[0][0] / 65536.;
-	cy = image->common.transform->matrix[1][0] / 65536.;
-	cz = image->common.transform->matrix[2][0] / 65536.;
-
-	rx = v.vector[0] / 65536.;
-	ry = v.vector[1] / 65536.;
-	rz = v.vector[2] / 65536.;
-
-	affine =
-	    image->common.transform->matrix[2][0] == 0 &&
-	    v.vector[2] == pixman_fixed_1;
-    }
-
-    if (affine)
-    {
-	rx -= conical->center.x / 65536.;
-	ry -= conical->center.y / 65536.;
-
-	while (buffer < end)
-	{
-	    if (!mask || *mask++)
-	    {
-		double t = coordinates_to_parameter (rx, ry, conical->angle);
-
-		write_pixel (&walker,
-			     (pixman_fixed_48_16_t)pixman_double_to_fixed (t),
-			     buffer);
-	    }
-
-	    buffer += (Bpp / 4);
-
-	    rx += cx;
-	    ry += cy;
-	}
-    }
-    else
-    {
-	while (buffer < end)
-	{
-	    double x, y;
-
-	    if (!mask || *mask++)
-	    {
-		double t;
-
-		if (rz != 0)
-		{
-		    x = rx / rz;
-		    y = ry / rz;
-		}
-		else
-		{
-		    x = y = 0.;
-		}
-
-		x -= conical->center.x / 65536.;
-		y -= conical->center.y / 65536.;
-
-		t = coordinates_to_parameter (x, y, conical->angle);
-
-		write_pixel (&walker,
-			     (pixman_fixed_48_16_t)pixman_double_to_fixed (t),
-			     buffer);
-	    }
-
-	    buffer += (Bpp / 4);
-
-	    rx += cx;
-	    ry += cy;
-	    rz += cz;
-	}
-    }
-
-    iter->y++;
-    return iter->buffer;
-}
-
-static uint32_t *
-conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
-{
-    return conical_get_scanline (iter, mask, 4,
-				 _pixman_gradient_walker_write_narrow);
-}
-
-static uint32_t *
-conical_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
-{
-    return conical_get_scanline (iter, NULL, 16,
-				 _pixman_gradient_walker_write_wide);
-}
-
-void
-_pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
-{
-    if (iter->iter_flags & ITER_NARROW)
-	iter->get_scanline = conical_get_scanline_narrow;
-    else
-	iter->get_scanline = conical_get_scanline_wide;
-}
-
-PIXMAN_EXPORT pixman_image_t *
-pixman_image_create_conical_gradient (const pixman_point_fixed_t *  center,
-                                      pixman_fixed_t                angle,
-                                      const pixman_gradient_stop_t *stops,
-                                      int                           n_stops)
-{
-    pixman_image_t *image = _pixman_image_allocate ();
-    conical_gradient_t *conical;
-
-    if (!image)
-	return NULL;
-
-    conical = &image->conical;
-
-    if (!_pixman_init_gradient (&conical->common, stops, n_stops))
-    {
-	free (image);
-	return NULL;
-    }
-
-    angle = MOD (angle, pixman_int_to_fixed (360));
-
-    image->type = CONICAL;
-
-    conical->center = *center;
-    conical->angle = (pixman_fixed_to_double (angle) / 180.0) * M_PI;
-
-    return image;
-}
-
diff --git a/vendor/pixman/pixman/pixman-edge-accessors.c b/vendor/pixman/pixman/pixman-edge-accessors.c
deleted file mode 100644
index ea3a31e2f..000000000
--- a/vendor/pixman/pixman/pixman-edge-accessors.c
+++ /dev/null
@@ -1,4 +0,0 @@
-
-#define PIXMAN_FB_ACCESSORS
-
-#include "pixman-edge.c"
diff --git a/vendor/pixman/pixman/pixman-edge-imp.h b/vendor/pixman/pixman/pixman-edge-imp.h
deleted file mode 100644
index a4698eddb..000000000
--- a/vendor/pixman/pixman/pixman-edge-imp.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright © 2004 Keith Packard
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Keith Packard makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
- * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
- * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifndef rasterize_span
-#endif
-
-static void
-RASTERIZE_EDGES (pixman_image_t  *image,
-		pixman_edge_t	*l,
-		pixman_edge_t	*r,
-		pixman_fixed_t		t,
-		pixman_fixed_t		b)
-{
-    pixman_fixed_t  y = t;
-    uint32_t  *line;
-    uint32_t *buf = (image)->bits.bits;
-    int stride = (image)->bits.rowstride;
-    int width = (image)->bits.width;
-
-    line = buf + pixman_fixed_to_int (y) * stride;
-
-    for (;;)
-    {
-	pixman_fixed_t	lx;
-	pixman_fixed_t      rx;
-	int	lxi;
-	int rxi;
-
-	lx = l->x;
-	rx = r->x;
-#if N_BITS == 1
-	/* For the non-antialiased case, round the coordinates up, in effect
-	 * sampling just slightly to the left of the pixel. This is so that
-	 * when the sample point lies exactly on the line, we round towards
-	 * north-west.
-	 *
-	 * (The AA case does a similar  adjustment in RENDER_SAMPLES_X)
-	 */
-	lx += X_FRAC_FIRST(1) - pixman_fixed_e;
-	rx += X_FRAC_FIRST(1) - pixman_fixed_e;
-#endif
-	/* clip X */
-	if (lx < 0)
-	    lx = 0;
-	if (pixman_fixed_to_int (rx) >= width)
-#if N_BITS == 1
-	    rx = pixman_int_to_fixed (width);
-#else
-	    /* Use the last pixel of the scanline, covered 100%.
-	     * We can't use the first pixel following the scanline,
-	     * because accessing it could result in a buffer overrun.
-	     */
-	    rx = pixman_int_to_fixed (width) - 1;
-#endif
-
-	/* Skip empty (or backwards) sections */
-	if (rx > lx)
-	{
-
-	    /* Find pixel bounds for span */
-	    lxi = pixman_fixed_to_int (lx);
-	    rxi = pixman_fixed_to_int (rx);
-
-#if N_BITS == 1
-	    {
-
-#define LEFT_MASK(x)							\
-		(((x) & 0x1f) ?						\
-		 SCREEN_SHIFT_RIGHT (0xffffffff, (x) & 0x1f) : 0)
-#define RIGHT_MASK(x)							\
-		(((32 - (x)) & 0x1f) ?					\
-		 SCREEN_SHIFT_LEFT (0xffffffff, (32 - (x)) & 0x1f) : 0)
-		
-#define MASK_BITS(x,w,l,n,r) {						\
-		    n = (w);						\
-		    r = RIGHT_MASK ((x) + n);				\
-		    l = LEFT_MASK (x);					\
-		    if (l) {						\
-			n -= 32 - ((x) & 0x1f);				\
-			if (n < 0) {					\
-			    n = 0;					\
-			    l &= r;					\
-			    r = 0;					\
-			}						\
-		    }							\
-		    n >>= 5;						\
-		}
-		
-		uint32_t  *a = line;
-		uint32_t  startmask;
-		uint32_t  endmask;
-		int	    nmiddle;
-		int	    width = rxi - lxi;
-		int	    x = lxi;
-		
-		a += x >> 5;
-		x &= 0x1f;
-		
-		MASK_BITS (x, width, startmask, nmiddle, endmask);
-
-		if (startmask) {
-		    WRITE(image, a, READ(image, a) | startmask);
-		    a++;
-		}
-		while (nmiddle--)
-		    WRITE(image, a++, 0xffffffff);
-		if (endmask)
-		    WRITE(image, a, READ(image, a) | endmask);
-	    }
-#else
-	    {
-		DEFINE_ALPHA(line,lxi);
-		int	    lxs;
-		int     rxs;
-
-		/* Sample coverage for edge pixels */
-		lxs = RENDER_SAMPLES_X (lx, N_BITS);
-		rxs = RENDER_SAMPLES_X (rx, N_BITS);
-
-		/* Add coverage across row */
-		if (lxi == rxi)
-		{
-		    ADD_ALPHA (rxs - lxs);
-		}
-		else
-		{
-		    int	xi;
-
-		    ADD_ALPHA (N_X_FRAC(N_BITS) - lxs);
-		    STEP_ALPHA;
-		    for (xi = lxi + 1; xi < rxi; xi++)
-		    {
-			ADD_ALPHA (N_X_FRAC(N_BITS));
-			STEP_ALPHA;
-		    }
-		    ADD_ALPHA (rxs);
-		}
-	    }
-#endif
-	}
-
-	if (y == b)
-	    break;
-
-#if N_BITS > 1
-	if (pixman_fixed_frac (y) != Y_FRAC_LAST(N_BITS))
-	{
-	    RENDER_EDGE_STEP_SMALL (l);
-	    RENDER_EDGE_STEP_SMALL (r);
-	    y += STEP_Y_SMALL(N_BITS);
-	}
-	else
-#endif
-	{
-	    RENDER_EDGE_STEP_BIG (l);
-	    RENDER_EDGE_STEP_BIG (r);
-	    y += STEP_Y_BIG(N_BITS);
-	    line += stride;
-	}
-    }
-}
-
-#undef rasterize_span
diff --git a/vendor/pixman/pixman/pixman-edge.c b/vendor/pixman/pixman/pixman-edge.c
deleted file mode 100644
index c324cd3d4..000000000
--- a/vendor/pixman/pixman/pixman-edge.c
+++ /dev/null
@@ -1,385 +0,0 @@
-/*
- * Copyright © 2004 Keith Packard
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Keith Packard makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
- * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
- * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include <string.h>
-
-#include "pixman-private.h"
-#include "pixman-accessor.h"
-
-/*
- * Step across a small sample grid gap
- */
-#define RENDER_EDGE_STEP_SMALL(edge)					\
-    {									\
-	edge->x += edge->stepx_small;					\
-	edge->e += edge->dx_small;					\
-	if (edge->e > 0)						\
-	{								\
-	    edge->e -= edge->dy;					\
-	    edge->x += edge->signdx;					\
-	}								\
-    }
-
-/*
- * Step across a large sample grid gap
- */
-#define RENDER_EDGE_STEP_BIG(edge)					\
-    {									\
-	edge->x += edge->stepx_big;					\
-	edge->e += edge->dx_big;					\
-	if (edge->e > 0)						\
-	{								\
-	    edge->e -= edge->dy;					\
-	    edge->x += edge->signdx;					\
-	}								\
-    }
-
-#ifdef PIXMAN_FB_ACCESSORS
-#define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_accessors
-#else
-#define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_no_accessors
-#endif
-
-/*
- * 4 bit alpha
- */
-
-#define N_BITS  4
-#define RASTERIZE_EDGES rasterize_edges_4
-
-#ifndef WORDS_BIGENDIAN
-#define SHIFT_4(o)      ((o) << 2)
-#else
-#define SHIFT_4(o)      ((1 - (o)) << 2)
-#endif
-
-#define GET_4(x, o)      (((x) >> SHIFT_4 (o)) & 0xf)
-#define PUT_4(x, o, v)							\
-    (((x) & ~(0xf << SHIFT_4 (o))) | (((v) & 0xf) << SHIFT_4 (o)))
-
-#define DEFINE_ALPHA(line, x)						\
-    uint8_t   *__ap = (uint8_t *) line + ((x) >> 1);			\
-    int __ao = (x) & 1
-
-#define STEP_ALPHA      ((__ap += __ao), (__ao ^= 1))
-
-#define ADD_ALPHA(a)							\
-    {									\
-        uint8_t __o = READ (image, __ap);				\
-        uint8_t __a = (a) + GET_4 (__o, __ao);				\
-        WRITE (image, __ap, PUT_4 (__o, __ao, __a | (0 - ((__a) >> 4)))); \
-    }
-
-#include "pixman-edge-imp.h"
-
-#undef ADD_ALPHA
-#undef STEP_ALPHA
-#undef DEFINE_ALPHA
-#undef RASTERIZE_EDGES
-#undef N_BITS
-
-
-/*
- * 1 bit alpha
- */
-
-#define N_BITS 1
-#define RASTERIZE_EDGES rasterize_edges_1
-
-#include "pixman-edge-imp.h"
-
-#undef RASTERIZE_EDGES
-#undef N_BITS
-
-/*
- * 8 bit alpha
- */
-
-static force_inline uint8_t
-clip255 (int x)
-{
-    if (x > 255)
-	return 255;
-
-    return x;
-}
-
-#define ADD_SATURATE_8(buf, val, length)				\
-    do									\
-    {									\
-        int i__ = (length);						\
-        uint8_t *buf__ = (buf);						\
-        int val__ = (val);						\
-									\
-        while (i__--)							\
-        {								\
-            WRITE (image, (buf__), clip255 (READ (image, (buf__)) + (val__))); \
-            (buf__)++;							\
-	}								\
-    } while (0)
-
-/*
- * We want to detect the case where we add the same value to a long
- * span of pixels.  The triangles on the end are filled in while we
- * count how many sub-pixel scanlines contribute to the middle section.
- *
- *                 +--------------------------+
- *  fill_height =|   \                      /
- *                     +------------------+
- *                      |================|
- *                   fill_start       fill_end
- */
-static void
-rasterize_edges_8 (pixman_image_t *image,
-                   pixman_edge_t * l,
-                   pixman_edge_t * r,
-                   pixman_fixed_t  t,
-                   pixman_fixed_t  b)
-{
-    pixman_fixed_t y = t;
-    uint32_t  *line;
-    int fill_start = -1, fill_end = -1;
-    int fill_size = 0;
-    uint32_t *buf = (image)->bits.bits;
-    int stride = (image)->bits.rowstride;
-    int width = (image)->bits.width;
-
-    line = buf + pixman_fixed_to_int (y) * stride;
-
-    for (;;)
-    {
-        uint8_t *ap = (uint8_t *) line;
-        pixman_fixed_t lx, rx;
-        int lxi, rxi;
-
-        /* clip X */
-        lx = l->x;
-        if (lx < 0)
-	    lx = 0;
-
-        rx = r->x;
-
-        if (pixman_fixed_to_int (rx) >= width)
-	{
-	    /* Use the last pixel of the scanline, covered 100%.
-	     * We can't use the first pixel following the scanline,
-	     * because accessing it could result in a buffer overrun.
-	     */
-	    rx = pixman_int_to_fixed (width) - 1;
-	}
-
-        /* Skip empty (or backwards) sections */
-        if (rx > lx)
-        {
-            int lxs, rxs;
-
-            /* Find pixel bounds for span. */
-            lxi = pixman_fixed_to_int (lx);
-            rxi = pixman_fixed_to_int (rx);
-
-            /* Sample coverage for edge pixels */
-            lxs = RENDER_SAMPLES_X (lx, 8);
-            rxs = RENDER_SAMPLES_X (rx, 8);
-
-            /* Add coverage across row */
-            if (lxi == rxi)
-            {
-                WRITE (image, ap + lxi,
-		       clip255 (READ (image, ap + lxi) + rxs - lxs));
-	    }
-            else
-            {
-                WRITE (image, ap + lxi,
-		       clip255 (READ (image, ap + lxi) + N_X_FRAC (8) - lxs));
-
-                /* Move forward so that lxi/rxi is the pixel span */
-                lxi++;
-
-                /* Don't bother trying to optimize the fill unless
-		 * the span is longer than 4 pixels. */
-                if (rxi - lxi > 4)
-                {
-                    if (fill_start < 0)
-                    {
-                        fill_start = lxi;
-                        fill_end = rxi;
-                        fill_size++;
-		    }
-                    else
-                    {
-                        if (lxi >= fill_end || rxi < fill_start)
-                        {
-                            /* We're beyond what we saved, just fill it */
-                            ADD_SATURATE_8 (ap + fill_start,
-                                            fill_size * N_X_FRAC (8),
-                                            fill_end - fill_start);
-                            fill_start = lxi;
-                            fill_end = rxi;
-                            fill_size = 1;
-			}
-                        else
-                        {
-                            /* Update fill_start */
-                            if (lxi > fill_start)
-                            {
-                                ADD_SATURATE_8 (ap + fill_start,
-                                                fill_size * N_X_FRAC (8),
-                                                lxi - fill_start);
-                                fill_start = lxi;
-			    }
-                            else if (lxi < fill_start)
-                            {
-                                ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8),
-                                                fill_start - lxi);
-			    }
-
-                            /* Update fill_end */
-                            if (rxi < fill_end)
-                            {
-                                ADD_SATURATE_8 (ap + rxi,
-                                                fill_size * N_X_FRAC (8),
-                                                fill_end - rxi);
-                                fill_end = rxi;
-			    }
-                            else if (fill_end < rxi)
-                            {
-                                ADD_SATURATE_8 (ap + fill_end,
-                                                N_X_FRAC (8),
-                                                rxi - fill_end);
-			    }
-                            fill_size++;
-			}
-		    }
-		}
-                else
-                {
-                    ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), rxi - lxi);
-		}
-
-                WRITE (image, ap + rxi, clip255 (READ (image, ap + rxi) + rxs));
-	    }
-	}
-
-        if (y == b)
-        {
-            /* We're done, make sure we clean up any remaining fill. */
-            if (fill_start != fill_end)
-            {
-                if (fill_size == N_Y_FRAC (8))
-                {
-                    MEMSET_WRAPPED (image, ap + fill_start,
-				    0xff, fill_end - fill_start);
-		}
-                else
-                {
-                    ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8),
-                                    fill_end - fill_start);
-		}
-	    }
-            break;
-	}
-
-        if (pixman_fixed_frac (y) != Y_FRAC_LAST (8))
-        {
-            RENDER_EDGE_STEP_SMALL (l);
-            RENDER_EDGE_STEP_SMALL (r);
-            y += STEP_Y_SMALL (8);
-	}
-        else
-        {
-            RENDER_EDGE_STEP_BIG (l);
-            RENDER_EDGE_STEP_BIG (r);
-            y += STEP_Y_BIG (8);
-            if (fill_start != fill_end)
-            {
-                if (fill_size == N_Y_FRAC (8))
-                {
-                    MEMSET_WRAPPED (image, ap + fill_start,
-				    0xff, fill_end - fill_start);
-		}
-                else
-                {
-                    ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8),
-                                    fill_end - fill_start);
-		}
-		
-                fill_start = fill_end = -1;
-                fill_size = 0;
-	    }
-	    
-            line += stride;
-	}
-    }
-}
-
-#ifndef PIXMAN_FB_ACCESSORS
-static
-#endif
-void
-PIXMAN_RASTERIZE_EDGES (pixman_image_t *image,
-                        pixman_edge_t * l,
-                        pixman_edge_t * r,
-                        pixman_fixed_t  t,
-                        pixman_fixed_t  b)
-{
-    switch (PIXMAN_FORMAT_BPP (image->bits.format))
-    {
-    case 1:
-	rasterize_edges_1 (image, l, r, t, b);
-	break;
-
-    case 4:
-	rasterize_edges_4 (image, l, r, t, b);
-	break;
-
-    case 8:
-	rasterize_edges_8 (image, l, r, t, b);
-	break;
-
-    default:
-        break;
-    }
-}
-
-#ifndef PIXMAN_FB_ACCESSORS
-
-PIXMAN_EXPORT void
-pixman_rasterize_edges (pixman_image_t *image,
-                        pixman_edge_t * l,
-                        pixman_edge_t * r,
-                        pixman_fixed_t  t,
-                        pixman_fixed_t  b)
-{
-    return_if_fail (image->type == BITS);
-    return_if_fail (PIXMAN_FORMAT_TYPE (image->bits.format) == PIXMAN_TYPE_A);
-    
-    if (image->bits.read_func || image->bits.write_func)
-	pixman_rasterize_edges_accessors (image, l, r, t, b);
-    else
-	pixman_rasterize_edges_no_accessors (image, l, r, t, b);
-}
-
-#endif
diff --git a/vendor/pixman/pixman/pixman-fast-path.c b/vendor/pixman/pixman/pixman-fast-path.c
deleted file mode 100644
index 4579fce99..000000000
--- a/vendor/pixman/pixman/pixman-fast-path.c
+++ /dev/null
@@ -1,3298 +0,0 @@
-/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  SuSE makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Author:  Keith Packard, SuSE, Inc.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-#include <string.h>
-#include <stdlib.h>
-#include "pixman-private.h"
-#include "pixman-combine32.h"
-#include "pixman-inlines.h"
-
-static force_inline uint32_t
-fetch_24 (uint8_t *a)
-{
-    if (((uintptr_t)a) & 1)
-    {
-#ifdef WORDS_BIGENDIAN
-	return (*a << 16) | (*(uint16_t *)(a + 1));
-#else
-	return *a | (*(uint16_t *)(a + 1) << 8);
-#endif
-    }
-    else
-    {
-#ifdef WORDS_BIGENDIAN
-	return (*(uint16_t *)a << 8) | *(a + 2);
-#else
-	return *(uint16_t *)a | (*(a + 2) << 16);
-#endif
-    }
-}
-
-static force_inline void
-store_24 (uint8_t *a,
-          uint32_t v)
-{
-    if (((uintptr_t)a) & 1)
-    {
-#ifdef WORDS_BIGENDIAN
-	*a = (uint8_t) (v >> 16);
-	*(uint16_t *)(a + 1) = (uint16_t) (v);
-#else
-	*a = (uint8_t) (v);
-	*(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
-#endif
-    }
-    else
-    {
-#ifdef WORDS_BIGENDIAN
-	*(uint16_t *)a = (uint16_t)(v >> 8);
-	*(a + 2) = (uint8_t)v;
-#else
-	*(uint16_t *)a = (uint16_t)v;
-	*(a + 2) = (uint8_t)(v >> 16);
-#endif
-    }
-}
-
-static force_inline uint32_t
-over (uint32_t src,
-      uint32_t dest)
-{
-    uint32_t a = ~src >> 24;
-
-    UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
-
-    return dest;
-}
-
-static force_inline uint32_t
-in (uint32_t x,
-    uint8_t  y)
-{
-    uint16_t a = y;
-
-    UN8x4_MUL_UN8 (x, a);
-
-    return x;
-}
-
-/*
- * Naming convention:
- *
- *  op_src_mask_dest
- */
-static void
-fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
-                                 pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t    *src, *src_line;
-    uint32_t    *dst, *dst_line;
-    uint8_t     *mask, *mask_line;
-    int src_stride, mask_stride, dst_stride;
-    uint8_t m;
-    uint32_t s, d;
-    int32_t w;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-	src = src_line;
-	src_line += src_stride;
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-
-	w = width;
-	while (w--)
-	{
-	    m = *mask++;
-	    if (m)
-	    {
-		s = *src | 0xff000000;
-
-		if (m == 0xff)
-		{
-		    *dst = s;
-		}
-		else
-		{
-		    d = in (s, m);
-		    *dst = over (d, *dst);
-		}
-	    }
-	    src++;
-	    dst++;
-	}
-    }
-}
-
-static void
-fast_composite_in_n_8_8 (pixman_implementation_t *imp,
-                         pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src, srca;
-    uint8_t     *dst_line, *dst;
-    uint8_t     *mask_line, *mask, m;
-    int dst_stride, mask_stride;
-    int32_t w;
-    uint16_t t;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    srca = src >> 24;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    if (srca == 0xff)
-    {
-	while (height--)
-	{
-	    dst = dst_line;
-	    dst_line += dst_stride;
-	    mask = mask_line;
-	    mask_line += mask_stride;
-	    w = width;
-
-	    while (w--)
-	    {
-		m = *mask++;
-
-		if (m == 0)
-		    *dst = 0;
-		else if (m != 0xff)
-		    *dst = MUL_UN8 (m, *dst, t);
-
-		dst++;
-	    }
-	}
-    }
-    else
-    {
-	while (height--)
-	{
-	    dst = dst_line;
-	    dst_line += dst_stride;
-	    mask = mask_line;
-	    mask_line += mask_stride;
-	    w = width;
-
-	    while (w--)
-	    {
-		m = *mask++;
-		m = MUL_UN8 (m, srca, t);
-
-		if (m == 0)
-		    *dst = 0;
-		else if (m != 0xff)
-		    *dst = MUL_UN8 (m, *dst, t);
-
-		dst++;
-	    }
-	}
-    }
-}
-
-static void
-fast_composite_in_8_8 (pixman_implementation_t *imp,
-                       pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint8_t     *dst_line, *dst;
-    uint8_t     *src_line, *src;
-    int dst_stride, src_stride;
-    int32_t w;
-    uint8_t s;
-    uint16_t t;
-
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w--)
-	{
-	    s = *src++;
-
-	    if (s == 0)
-		*dst = 0;
-	    else if (s != 0xff)
-		*dst = MUL_UN8 (s, *dst, t);
-
-	    dst++;
-	}
-    }
-}
-
-static void
-fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
-                              pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src, srca;
-    uint32_t    *dst_line, *dst, d;
-    uint8_t     *mask_line, *mask, m;
-    int dst_stride, mask_stride;
-    int32_t w;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    srca = src >> 24;
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	while (w--)
-	{
-	    m = *mask++;
-	    if (m == 0xff)
-	    {
-		if (srca == 0xff)
-		    *dst = src;
-		else
-		    *dst = over (src, *dst);
-	    }
-	    else if (m)
-	    {
-		d = in (src, m);
-		*dst = over (d, *dst);
-	    }
-	    dst++;
-	}
-    }
-}
-
-static void
-fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
-				   pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src, s;
-    uint32_t    *dst_line, *dst, d;
-    uint32_t    *mask_line, *mask, ma;
-    int dst_stride, mask_stride;
-    int32_t w;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	while (w--)
-	{
-	    ma = *mask++;
-
-	    if (ma)
-	    {
-		d = *dst;
-		s = src;
-
-		UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
-
-		*dst = s;
-	    }
-
-	    dst++;
-	}
-    }
-}
-
-static void
-fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
-                                    pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src, srca, s;
-    uint32_t    *dst_line, *dst, d;
-    uint32_t    *mask_line, *mask, ma;
-    int dst_stride, mask_stride;
-    int32_t w;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    srca = src >> 24;
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	while (w--)
-	{
-	    ma = *mask++;
-	    if (ma == 0xffffffff)
-	    {
-		if (srca == 0xff)
-		    *dst = src;
-		else
-		    *dst = over (src, *dst);
-	    }
-	    else if (ma)
-	    {
-		d = *dst;
-		s = src;
-
-		UN8x4_MUL_UN8x4 (s, ma);
-		UN8x4_MUL_UN8 (ma, srca);
-		ma = ~ma;
-		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
-
-		*dst = d;
-	    }
-
-	    dst++;
-	}
-    }
-}
-
-static void
-fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
-                              pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src, srca;
-    uint8_t     *dst_line, *dst;
-    uint32_t d;
-    uint8_t     *mask_line, *mask, m;
-    int dst_stride, mask_stride;
-    int32_t w;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    srca = src >> 24;
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	while (w--)
-	{
-	    m = *mask++;
-	    if (m == 0xff)
-	    {
-		if (srca == 0xff)
-		{
-		    d = src;
-		}
-		else
-		{
-		    d = fetch_24 (dst);
-		    d = over (src, d);
-		}
-		store_24 (dst, d);
-	    }
-	    else if (m)
-	    {
-		d = over (in (src, m), fetch_24 (dst));
-		store_24 (dst, d);
-	    }
-	    dst += 3;
-	}
-    }
-}
-
-static void
-fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
-                              pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src, srca;
-    uint16_t    *dst_line, *dst;
-    uint32_t d;
-    uint8_t     *mask_line, *mask, m;
-    int dst_stride, mask_stride;
-    int32_t w;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    srca = src >> 24;
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	while (w--)
-	{
-	    m = *mask++;
-	    if (m == 0xff)
-	    {
-		if (srca == 0xff)
-		{
-		    d = src;
-		}
-		else
-		{
-		    d = *dst;
-		    d = over (src, convert_0565_to_0888 (d));
-		}
-		*dst = convert_8888_to_0565 (d);
-	    }
-	    else if (m)
-	    {
-		d = *dst;
-		d = over (in (src, m), convert_0565_to_0888 (d));
-		*dst = convert_8888_to_0565 (d);
-	    }
-	    dst++;
-	}
-    }
-}
-
-static void
-fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
-                                    pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t  src, srca, s;
-    uint16_t  src16;
-    uint16_t *dst_line, *dst;
-    uint32_t  d;
-    uint32_t *mask_line, *mask, ma;
-    int dst_stride, mask_stride;
-    int32_t w;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    srca = src >> 24;
-    if (src == 0)
-	return;
-
-    src16 = convert_8888_to_0565 (src);
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	while (w--)
-	{
-	    ma = *mask++;
-	    if (ma == 0xffffffff)
-	    {
-		if (srca == 0xff)
-		{
-		    *dst = src16;
-		}
-		else
-		{
-		    d = *dst;
-		    d = over (src, convert_0565_to_0888 (d));
-		    *dst = convert_8888_to_0565 (d);
-		}
-	    }
-	    else if (ma)
-	    {
-		d = *dst;
-		d = convert_0565_to_0888 (d);
-
-		s = src;
-
-		UN8x4_MUL_UN8x4 (s, ma);
-		UN8x4_MUL_UN8 (ma, srca);
-		ma = ~ma;
-		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
-
-		*dst = convert_8888_to_0565 (d);
-	    }
-	    dst++;
-	}
-    }
-}
-
-static void
-fast_composite_over_8888_8888 (pixman_implementation_t *imp,
-                               pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src, s;
-    int dst_stride, src_stride;
-    uint8_t a;
-    int32_t w;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w--)
-	{
-	    s = *src++;
-	    a = s >> 24;
-	    if (a == 0xff)
-		*dst = s;
-	    else if (s)
-		*dst = over (s, *dst);
-	    dst++;
-	}
-    }
-}
-
-static void
-fast_composite_src_x888_8888 (pixman_implementation_t *imp,
-			      pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w--)
-	    *dst++ = (*src++) | 0xff000000;
-    }
-}
-
-#if 0
-static void
-fast_composite_over_8888_0888 (pixman_implementation_t *imp,
-			       pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint8_t     *dst_line, *dst;
-    uint32_t d;
-    uint32_t    *src_line, *src, s;
-    uint8_t a;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w--)
-	{
-	    s = *src++;
-	    a = s >> 24;
-	    if (a)
-	    {
-		if (a == 0xff)
-		    d = s;
-		else
-		    d = over (s, fetch_24 (dst));
-
-		store_24 (dst, d);
-	    }
-	    dst += 3;
-	}
-    }
-}
-#endif
-
-static void
-fast_composite_over_8888_0565 (pixman_implementation_t *imp,
-                               pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint16_t    *dst_line, *dst;
-    uint32_t d;
-    uint32_t    *src_line, *src, s;
-    uint8_t a;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w--)
-	{
-	    s = *src++;
-	    a = s >> 24;
-	    if (s)
-	    {
-		if (a == 0xff)
-		{
-		    d = s;
-		}
-		else
-		{
-		    d = *dst;
-		    d = over (s, convert_0565_to_0888 (d));
-		}
-		*dst = convert_8888_to_0565 (d);
-	    }
-	    dst++;
-	}
-    }
-}
-
-static void
-fast_composite_add_8_8 (pixman_implementation_t *imp,
-			pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint8_t     *dst_line, *dst;
-    uint8_t     *src_line, *src;
-    int dst_stride, src_stride;
-    int32_t w;
-    uint8_t s, d;
-    uint16_t t;
-
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w--)
-	{
-	    s = *src++;
-	    if (s)
-	    {
-		if (s != 0xff)
-		{
-		    d = *dst;
-		    t = d + s;
-		    s = t | (0 - (t >> 8));
-		}
-		*dst = s;
-	    }
-	    dst++;
-	}
-    }
-}
-
-static void
-fast_composite_add_0565_0565 (pixman_implementation_t *imp,
-                              pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint16_t    *dst_line, *dst;
-    uint32_t	d;
-    uint16_t    *src_line, *src;
-    uint32_t	s;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w--)
-	{
-	    s = *src++;
-	    if (s)
-	    {
-		d = *dst;
-		s = convert_0565_to_8888 (s);
-		if (d)
-		{
-		    d = convert_0565_to_8888 (d);
-		    UN8x4_ADD_UN8x4 (s, d);
-		}
-		*dst = convert_8888_to_0565 (s);
-	    }
-	    dst++;
-	}
-    }
-}
-
-static void
-fast_composite_add_8888_8888 (pixman_implementation_t *imp,
-                              pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    int dst_stride, src_stride;
-    int32_t w;
-    uint32_t s, d;
-
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w--)
-	{
-	    s = *src++;
-	    if (s)
-	    {
-		if (s != 0xffffffff)
-		{
-		    d = *dst;
-		    if (d)
-			UN8x4_ADD_UN8x4 (s, d);
-		}
-		*dst = s;
-	    }
-	    dst++;
-	}
-    }
-}
-
-static void
-fast_composite_add_n_8_8 (pixman_implementation_t *imp,
-			  pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint8_t     *dst_line, *dst;
-    uint8_t     *mask_line, *mask;
-    int dst_stride, mask_stride;
-    int32_t w;
-    uint32_t src;
-    uint8_t sa;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-    sa = (src >> 24);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	while (w--)
-	{
-	    uint16_t tmp;
-	    uint16_t a;
-	    uint32_t m, d;
-	    uint32_t r;
-
-	    a = *mask++;
-	    d = *dst;
-
-	    m = MUL_UN8 (sa, a, tmp);
-	    r = ADD_UN8 (m, d, tmp);
-
-	    *dst++ = r;
-	}
-    }
-}
-
-#ifdef WORDS_BIGENDIAN
-#define CREATE_BITMASK(n) (0x80000000 >> (n))
-#define UPDATE_BITMASK(n) ((n) >> 1)
-#else
-#define CREATE_BITMASK(n) (1U << (n))
-#define UPDATE_BITMASK(n) ((n) << 1)
-#endif
-
-#define TEST_BIT(p, n)					\
-    (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
-#define SET_BIT(p, n)							\
-    do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
-
-static void
-fast_composite_add_1_1 (pixman_implementation_t *imp,
-			pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t     *dst_line, *dst;
-    uint32_t     *src_line, *src;
-    int           dst_stride, src_stride;
-    int32_t       w;
-
-    PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
-                           src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t,
-                           dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w--)
-	{
-	    /*
-	     * TODO: improve performance by processing uint32_t data instead
-	     *       of individual bits
-	     */
-	    if (TEST_BIT (src, src_x + w))
-		SET_BIT (dst, dest_x + w);
-	}
-    }
-}
-
-static void
-fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
-                              pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t     src, srca;
-    uint32_t    *dst, *dst_line;
-    uint32_t    *mask, *mask_line;
-    int          mask_stride, dst_stride;
-    uint32_t     bitcache, bitmask;
-    int32_t      w;
-
-    if (width <= 0)
-	return;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-    srca = src >> 24;
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t,
-                           dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
-                           mask_stride, mask_line, 1);
-    mask_line += mask_x >> 5;
-
-    if (srca == 0xff)
-    {
-	while (height--)
-	{
-	    dst = dst_line;
-	    dst_line += dst_stride;
-	    mask = mask_line;
-	    mask_line += mask_stride;
-	    w = width;
-
-	    bitcache = *mask++;
-	    bitmask = CREATE_BITMASK (mask_x & 31);
-
-	    while (w--)
-	    {
-		if (bitmask == 0)
-		{
-		    bitcache = *mask++;
-		    bitmask = CREATE_BITMASK (0);
-		}
-		if (bitcache & bitmask)
-		    *dst = src;
-		bitmask = UPDATE_BITMASK (bitmask);
-		dst++;
-	    }
-	}
-    }
-    else
-    {
-	while (height--)
-	{
-	    dst = dst_line;
-	    dst_line += dst_stride;
-	    mask = mask_line;
-	    mask_line += mask_stride;
-	    w = width;
-
-	    bitcache = *mask++;
-	    bitmask = CREATE_BITMASK (mask_x & 31);
-
-	    while (w--)
-	    {
-		if (bitmask == 0)
-		{
-		    bitcache = *mask++;
-		    bitmask = CREATE_BITMASK (0);
-		}
-		if (bitcache & bitmask)
-		    *dst = over (src, *dst);
-		bitmask = UPDATE_BITMASK (bitmask);
-		dst++;
-	    }
-	}
-    }
-}
-
-static void
-fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
-                              pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t     src, srca;
-    uint16_t    *dst, *dst_line;
-    uint32_t    *mask, *mask_line;
-    int          mask_stride, dst_stride;
-    uint32_t     bitcache, bitmask;
-    int32_t      w;
-    uint32_t     d;
-    uint16_t     src565;
-
-    if (width <= 0)
-	return;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-    srca = src >> 24;
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t,
-                           dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
-                           mask_stride, mask_line, 1);
-    mask_line += mask_x >> 5;
-
-    if (srca == 0xff)
-    {
-	src565 = convert_8888_to_0565 (src);
-	while (height--)
-	{
-	    dst = dst_line;
-	    dst_line += dst_stride;
-	    mask = mask_line;
-	    mask_line += mask_stride;
-	    w = width;
-
-	    bitcache = *mask++;
-	    bitmask = CREATE_BITMASK (mask_x & 31);
-
-	    while (w--)
-	    {
-		if (bitmask == 0)
-		{
-		    bitcache = *mask++;
-		    bitmask = CREATE_BITMASK (0);
-		}
-		if (bitcache & bitmask)
-		    *dst = src565;
-		bitmask = UPDATE_BITMASK (bitmask);
-		dst++;
-	    }
-	}
-    }
-    else
-    {
-	while (height--)
-	{
-	    dst = dst_line;
-	    dst_line += dst_stride;
-	    mask = mask_line;
-	    mask_line += mask_stride;
-	    w = width;
-
-	    bitcache = *mask++;
-	    bitmask = CREATE_BITMASK (mask_x & 31);
-
-	    while (w--)
-	    {
-		if (bitmask == 0)
-		{
-		    bitcache = *mask++;
-		    bitmask = CREATE_BITMASK (0);
-		}
-		if (bitcache & bitmask)
-		{
-		    d = over (src, convert_0565_to_0888 (*dst));
-		    *dst = convert_8888_to_0565 (d);
-		}
-		bitmask = UPDATE_BITMASK (bitmask);
-		dst++;
-	    }
-	}
-    }
-}
-
-/*
- * Simple bitblt
- */
-
-static void
-fast_composite_solid_fill (pixman_implementation_t *imp,
-                           pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    if (dest_image->bits.format == PIXMAN_a1)
-    {
-	src = src >> 31;
-    }
-    else if (dest_image->bits.format == PIXMAN_a8)
-    {
-	src = src >> 24;
-    }
-    else if (dest_image->bits.format == PIXMAN_r5g6b5 ||
-             dest_image->bits.format == PIXMAN_b5g6r5)
-    {
-	src = convert_8888_to_0565 (src);
-    }
-
-    pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
-                 PIXMAN_FORMAT_BPP (dest_image->bits.format),
-                 dest_x, dest_y,
-                 width, height,
-                 src);
-}
-
-static void
-fast_composite_src_memcpy (pixman_implementation_t *imp,
-			   pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8;
-    uint32_t n_bytes = width * bpp;
-    int dst_stride, src_stride;
-    uint8_t    *dst;
-    uint8_t    *src;
-
-    src_stride = src_image->bits.rowstride * 4;
-    dst_stride = dest_image->bits.rowstride * 4;
-
-    src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
-    dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
-
-    while (height--)
-    {
-	memcpy (dst, src, n_bytes);
-
-	dst += dst_stride;
-	src += src_stride;
-    }
-}
-
-FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
-FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
-FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
-FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
-FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER)
-FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD)
-FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL)
-FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
-FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
-FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
-FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
-FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
-FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
-FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
-FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
-FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
-FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
-FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
-FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
-FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
-
-#define REPEAT_MIN_WIDTH    32
-
-static void
-fast_composite_tiled_repeat (pixman_implementation_t *imp,
-			     pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    pixman_composite_func_t func;
-    pixman_format_code_t mask_format;
-    uint32_t src_flags, mask_flags;
-    int32_t sx, sy;
-    int32_t width_remain;
-    int32_t num_pixels;
-    int32_t src_width;
-    int32_t i, j;
-    pixman_image_t extended_src_image;
-    uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
-    pixman_bool_t need_src_extension;
-    uint32_t *src_line;
-    int32_t src_stride;
-    int32_t src_bpp;
-    pixman_composite_info_t info2 = *info;
-
-    src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
-		    FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
-
-    if (mask_image)
-    {
-	mask_format = mask_image->common.extended_format_code;
-	mask_flags = info->mask_flags;
-    }
-    else
-    {
-	mask_format = PIXMAN_null;
-	mask_flags = FAST_PATH_IS_OPAQUE;
-    }
-
-    _pixman_implementation_lookup_composite (
-	imp->toplevel, info->op,
-	src_image->common.extended_format_code, src_flags,
-	mask_format, mask_flags,
-	dest_image->common.extended_format_code, info->dest_flags,
-	&imp, &func);
-
-    src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
-
-    if (src_image->bits.width < REPEAT_MIN_WIDTH		&&
-	(src_bpp == 32 || src_bpp == 16 || src_bpp == 8)	&&
-	!src_image->bits.indexed)
-    {
-	sx = src_x;
-	sx = MOD (sx, src_image->bits.width);
-	sx += width;
-	src_width = 0;
-
-	while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
-	    src_width += src_image->bits.width;
-
-	src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
-
-	/* Initialize/validate stack-allocated temporary image */
-	_pixman_bits_image_init (&extended_src_image, src_image->bits.format,
-				 src_width, 1, &extended_src[0], src_stride,
-				 FALSE);
-	_pixman_image_validate (&extended_src_image);
-
-	info2.src_image = &extended_src_image;
-	need_src_extension = TRUE;
-    }
-    else
-    {
-	src_width = src_image->bits.width;
-	need_src_extension = FALSE;
-    }
-
-    sx = src_x;
-    sy = src_y;
-
-    while (--height >= 0)
-    {
-	sx = MOD (sx, src_width);
-	sy = MOD (sy, src_image->bits.height);
-
-	if (need_src_extension)
-	{
-	    if (src_bpp == 32)
-	    {
-		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
-
-		for (i = 0; i < src_width; )
-		{
-		    for (j = 0; j < src_image->bits.width; j++, i++)
-			extended_src[i] = src_line[j];
-		}
-	    }
-	    else if (src_bpp == 16)
-	    {
-		uint16_t *src_line_16;
-
-		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
-				       src_line_16, 1);
-		src_line = (uint32_t*)src_line_16;
-
-		for (i = 0; i < src_width; )
-		{
-		    for (j = 0; j < src_image->bits.width; j++, i++)
-			((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
-		}
-	    }
-	    else if (src_bpp == 8)
-	    {
-		uint8_t *src_line_8;
-
-		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
-				       src_line_8, 1);
-		src_line = (uint32_t*)src_line_8;
-
-		for (i = 0; i < src_width; )
-		{
-		    for (j = 0; j < src_image->bits.width; j++, i++)
-			((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
-		}
-	    }
-
-	    info2.src_y = 0;
-	}
-	else
-	{
-	    info2.src_y = sy;
-	}
-
-	width_remain = width;
-
-	while (width_remain > 0)
-	{
-	    num_pixels = src_width - sx;
-
-	    if (num_pixels > width_remain)
-		num_pixels = width_remain;
-
-	    info2.src_x = sx;
-	    info2.width = num_pixels;
-	    info2.height = 1;
-
-	    func (imp, &info2);
-
-	    width_remain -= num_pixels;
-	    info2.mask_x += num_pixels;
-	    info2.dest_x += num_pixels;
-	    sx = 0;
-	}
-
-	sx = src_x;
-	sy++;
-	info2.mask_x = info->mask_x;
-	info2.mask_y++;
-	info2.dest_x = info->dest_x;
-	info2.dest_y++;
-    }
-
-    if (need_src_extension)
-	_pixman_image_fini (&extended_src_image);
-}
-
-/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
-static force_inline void
-scaled_nearest_scanline_565_565_SRC (uint16_t *       dst,
-				     const uint16_t * src,
-				     int32_t          w,
-				     pixman_fixed_t   vx,
-				     pixman_fixed_t   unit_x,
-				     pixman_fixed_t   max_vx,
-				     pixman_bool_t    fully_transparent_src)
-{
-    uint16_t tmp1, tmp2, tmp3, tmp4;
-    while ((w -= 4) >= 0)
-    {
-	tmp1 = *(src + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	tmp2 = *(src + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	tmp3 = *(src + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	tmp4 = *(src + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	*dst++ = tmp1;
-	*dst++ = tmp2;
-	*dst++ = tmp3;
-	*dst++ = tmp4;
-    }
-    if (w & 2)
-    {
-	tmp1 = *(src + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	tmp2 = *(src + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	*dst++ = tmp1;
-	*dst++ = tmp2;
-    }
-    if (w & 1)
-	*dst = *(src + pixman_fixed_to_int (vx));
-}
-
-FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
-		       scaled_nearest_scanline_565_565_SRC,
-		       uint16_t, uint16_t, COVER)
-FAST_NEAREST_MAINLOOP (565_565_none_SRC,
-		       scaled_nearest_scanline_565_565_SRC,
-		       uint16_t, uint16_t, NONE)
-FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
-		       scaled_nearest_scanline_565_565_SRC,
-		       uint16_t, uint16_t, PAD)
-
-static force_inline uint32_t
-fetch_nearest (pixman_repeat_t src_repeat,
-	       pixman_format_code_t format,
-	       uint32_t *src, int x, int src_width)
-{
-    if (repeat (src_repeat, &x, src_width))
-    {
-	if (format == PIXMAN_x8r8g8b8 || format == PIXMAN_x8b8g8r8)
-	    return *(src + x) | 0xff000000;
-	else
-	    return *(src + x);
-    }
-    else
-    {
-	return 0;
-    }
-}
-
-static force_inline void
-combine_over (uint32_t s, uint32_t *dst)
-{
-    if (s)
-    {
-	uint8_t ia = 0xff - (s >> 24);
-
-	if (ia)
-	    UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
-	else
-	    *dst = s;
-    }
-}
-
-static force_inline void
-combine_src (uint32_t s, uint32_t *dst)
-{
-    *dst = s;
-}
-
-static void
-fast_composite_scaled_nearest (pixman_implementation_t *imp,
-			       pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t       *dst_line;
-    uint32_t       *src_line;
-    int             dst_stride, src_stride;
-    int		    src_width, src_height;
-    pixman_repeat_t src_repeat;
-    pixman_fixed_t unit_x, unit_y;
-    pixman_format_code_t src_format;
-    pixman_vector_t v;
-    pixman_fixed_t vy;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
-     * transformed from destination space to source space
-     */
-    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
-
-    /* reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
-    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
-    v.vector[2] = pixman_fixed_1;
-
-    if (!pixman_transform_point_3d (src_image->common.transform, &v))
-	return;
-
-    unit_x = src_image->common.transform->matrix[0][0];
-    unit_y = src_image->common.transform->matrix[1][1];
-
-    /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
-    v.vector[0] -= pixman_fixed_e;
-    v.vector[1] -= pixman_fixed_e;
-
-    src_height = src_image->bits.height;
-    src_width = src_image->bits.width;
-    src_repeat = src_image->common.repeat;
-    src_format = src_image->bits.format;
-
-    vy = v.vector[1];
-    while (height--)
-    {
-        pixman_fixed_t vx = v.vector[0];
-	int y = pixman_fixed_to_int (vy);
-	uint32_t *dst = dst_line;
-
-	dst_line += dst_stride;
-
-        /* adjust the y location by a unit vector in the y direction
-         * this is equivalent to transforming y+1 of the destination point to source space */
-        vy += unit_y;
-
-	if (!repeat (src_repeat, &y, src_height))
-	{
-	    if (op == PIXMAN_OP_SRC)
-		memset (dst, 0, sizeof (*dst) * width);
-	}
-	else
-	{
-	    int w = width;
-
-	    uint32_t *src = src_line + y * src_stride;
-
-	    while (w >= 2)
-	    {
-		uint32_t s1, s2;
-		int x1, x2;
-
-		x1 = pixman_fixed_to_int (vx);
-		vx += unit_x;
-
-		x2 = pixman_fixed_to_int (vx);
-		vx += unit_x;
-
-		w -= 2;
-
-		s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
-		s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
-
-		if (op == PIXMAN_OP_OVER)
-		{
-		    combine_over (s1, dst++);
-		    combine_over (s2, dst++);
-		}
-		else
-		{
-		    combine_src (s1, dst++);
-		    combine_src (s2, dst++);
-		}
-	    }
-
-	    while (w--)
-	    {
-		uint32_t s;
-		int x;
-
-		x = pixman_fixed_to_int (vx);
-		vx += unit_x;
-
-		s = fetch_nearest (src_repeat, src_format, src, x, src_width);
-
-		if (op == PIXMAN_OP_OVER)
-		    combine_over (s, dst++);
-		else
-		    combine_src (s, dst++);
-	    }
-	}
-    }
-}
-
-#define CACHE_LINE_SIZE 64
-
-#define FAST_SIMPLE_ROTATE(suffix, pix_type)                                  \
-                                                                              \
-static void                                                                   \
-blt_rotated_90_trivial_##suffix (pix_type       *dst,                         \
-				 int             dst_stride,                  \
-				 const pix_type *src,                         \
-				 int             src_stride,                  \
-				 int             w,                           \
-				 int             h)                           \
-{                                                                             \
-    int x, y;                                                                 \
-    for (y = 0; y < h; y++)                                                   \
-    {                                                                         \
-	const pix_type *s = src + (h - y - 1);                                \
-	pix_type *d = dst + dst_stride * y;                                   \
-	for (x = 0; x < w; x++)                                               \
-	{                                                                     \
-	    *d++ = *s;                                                        \
-	    s += src_stride;                                                  \
-	}                                                                     \
-    }                                                                         \
-}                                                                             \
-                                                                              \
-static void                                                                   \
-blt_rotated_270_trivial_##suffix (pix_type       *dst,                        \
-				  int             dst_stride,                 \
-				  const pix_type *src,                        \
-				  int             src_stride,                 \
-				  int             w,                          \
-				  int             h)                          \
-{                                                                             \
-    int x, y;                                                                 \
-    for (y = 0; y < h; y++)                                                   \
-    {                                                                         \
-	const pix_type *s = src + src_stride * (w - 1) + y;                   \
-	pix_type *d = dst + dst_stride * y;                                   \
-	for (x = 0; x < w; x++)                                               \
-	{                                                                     \
-	    *d++ = *s;                                                        \
-	    s -= src_stride;                                                  \
-	}                                                                     \
-    }                                                                         \
-}                                                                             \
-                                                                              \
-static void                                                                   \
-blt_rotated_90_##suffix (pix_type       *dst,                                 \
-			 int             dst_stride,                          \
-			 const pix_type *src,                                 \
-			 int             src_stride,                          \
-			 int             W,                                   \
-			 int             H)                                   \
-{                                                                             \
-    int x;                                                                    \
-    int leading_pixels = 0, trailing_pixels = 0;                              \
-    const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
-                                                                              \
-    /*                                                                        \
-     * split processing into handling destination as TILE_SIZExH cache line   \
-     * aligned vertical stripes (optimistically assuming that destination     \
-     * stride is a multiple of cache line, if not - it will be just a bit     \
-     * slower)                                                                \
-     */                                                                       \
-                                                                              \
-    if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
-    {                                                                         \
-	leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
-			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
-	if (leading_pixels > W)                                               \
-	    leading_pixels = W;                                               \
-                                                                              \
-	/* unaligned leading part NxH (where N < TILE_SIZE) */                \
-	blt_rotated_90_trivial_##suffix (                                     \
-	    dst,                                                              \
-	    dst_stride,                                                       \
-	    src,                                                              \
-	    src_stride,                                                       \
-	    leading_pixels,                                                   \
-	    H);                                                               \
-	                                                                      \
-	dst += leading_pixels;                                                \
-	src += leading_pixels * src_stride;                                   \
-	W -= leading_pixels;                                                  \
-    }                                                                         \
-                                                                              \
-    if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
-    {                                                                         \
-	trailing_pixels = (((uintptr_t)(dst + W) &                            \
-			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
-	if (trailing_pixels > W)                                              \
-	    trailing_pixels = W;                                              \
-	W -= trailing_pixels;                                                 \
-    }                                                                         \
-                                                                              \
-    for (x = 0; x < W; x += TILE_SIZE)                                        \
-    {                                                                         \
-	/* aligned middle part TILE_SIZExH */                                 \
-	blt_rotated_90_trivial_##suffix (                                     \
-	    dst + x,                                                          \
-	    dst_stride,                                                       \
-	    src + src_stride * x,                                             \
-	    src_stride,                                                       \
-	    TILE_SIZE,                                                        \
-	    H);                                                               \
-    }                                                                         \
-                                                                              \
-    if (trailing_pixels)                                                      \
-    {                                                                         \
-	/* unaligned trailing part NxH (where N < TILE_SIZE) */               \
-	blt_rotated_90_trivial_##suffix (                                     \
-	    dst + W,                                                          \
-	    dst_stride,                                                       \
-	    src + W * src_stride,                                             \
-	    src_stride,                                                       \
-	    trailing_pixels,                                                  \
-	    H);                                                               \
-    }                                                                         \
-}                                                                             \
-                                                                              \
-static void                                                                   \
-blt_rotated_270_##suffix (pix_type       *dst,                                \
-			  int             dst_stride,                         \
-			  const pix_type *src,                                \
-			  int             src_stride,                         \
-			  int             W,                                  \
-			  int             H)                                  \
-{                                                                             \
-    int x;                                                                    \
-    int leading_pixels = 0, trailing_pixels = 0;                              \
-    const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
-                                                                              \
-    /*                                                                        \
-     * split processing into handling destination as TILE_SIZExH cache line   \
-     * aligned vertical stripes (optimistically assuming that destination     \
-     * stride is a multiple of cache line, if not - it will be just a bit     \
-     * slower)                                                                \
-     */                                                                       \
-                                                                              \
-    if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
-    {                                                                         \
-	leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
-			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
-	if (leading_pixels > W)                                               \
-	    leading_pixels = W;                                               \
-                                                                              \
-	/* unaligned leading part NxH (where N < TILE_SIZE) */                \
-	blt_rotated_270_trivial_##suffix (                                    \
-	    dst,                                                              \
-	    dst_stride,                                                       \
-	    src + src_stride * (W - leading_pixels),                          \
-	    src_stride,                                                       \
-	    leading_pixels,                                                   \
-	    H);                                                               \
-	                                                                      \
-	dst += leading_pixels;                                                \
-	W -= leading_pixels;                                                  \
-    }                                                                         \
-                                                                              \
-    if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
-    {                                                                         \
-	trailing_pixels = (((uintptr_t)(dst + W) &                            \
-			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
-	if (trailing_pixels > W)                                              \
-	    trailing_pixels = W;                                              \
-	W -= trailing_pixels;                                                 \
-	src += trailing_pixels * src_stride;                                  \
-    }                                                                         \
-                                                                              \
-    for (x = 0; x < W; x += TILE_SIZE)                                        \
-    {                                                                         \
-	/* aligned middle part TILE_SIZExH */                                 \
-	blt_rotated_270_trivial_##suffix (                                    \
-	    dst + x,                                                          \
-	    dst_stride,                                                       \
-	    src + src_stride * (W - x - TILE_SIZE),                           \
-	    src_stride,                                                       \
-	    TILE_SIZE,                                                        \
-	    H);                                                               \
-    }                                                                         \
-                                                                              \
-    if (trailing_pixels)                                                      \
-    {                                                                         \
-	/* unaligned trailing part NxH (where N < TILE_SIZE) */               \
-	blt_rotated_270_trivial_##suffix (                                    \
-	    dst + W,                                                          \
-	    dst_stride,                                                       \
-	    src - trailing_pixels * src_stride,                               \
-	    src_stride,                                                       \
-	    trailing_pixels,                                                  \
-	    H);                                                               \
-    }                                                                         \
-}                                                                             \
-                                                                              \
-static void                                                                   \
-fast_composite_rotate_90_##suffix (pixman_implementation_t *imp,              \
-				   pixman_composite_info_t *info)	      \
-{									      \
-    PIXMAN_COMPOSITE_ARGS (info);					      \
-    pix_type       *dst_line;						      \
-    pix_type       *src_line;                                                 \
-    int             dst_stride, src_stride;                                   \
-    int             src_x_t, src_y_t;                                         \
-                                                                              \
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
-			   dst_stride, dst_line, 1);                          \
-    src_x_t = -src_y + pixman_fixed_to_int (                                  \
-				src_image->common.transform->matrix[0][2] +   \
-				pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
-    src_y_t = src_x + pixman_fixed_to_int (                                   \
-				src_image->common.transform->matrix[1][2] +   \
-				pixman_fixed_1 / 2 - pixman_fixed_e);         \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
-			   src_stride, src_line, 1);                          \
-    blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride,      \
-			     width, height);                                  \
-}                                                                             \
-                                                                              \
-static void                                                                   \
-fast_composite_rotate_270_##suffix (pixman_implementation_t *imp,             \
-				    pixman_composite_info_t *info)            \
-{                                                                             \
-    PIXMAN_COMPOSITE_ARGS (info);					      \
-    pix_type       *dst_line;						      \
-    pix_type       *src_line;                                                 \
-    int             dst_stride, src_stride;                                   \
-    int             src_x_t, src_y_t;                                         \
-                                                                              \
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
-			   dst_stride, dst_line, 1);                          \
-    src_x_t = src_y + pixman_fixed_to_int (                                   \
-				src_image->common.transform->matrix[0][2] +   \
-				pixman_fixed_1 / 2 - pixman_fixed_e);         \
-    src_y_t = -src_x + pixman_fixed_to_int (                                  \
-				src_image->common.transform->matrix[1][2] +   \
-				pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
-			   src_stride, src_line, 1);                          \
-    blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride,     \
-			      width, height);                                 \
-}
-
-FAST_SIMPLE_ROTATE (8, uint8_t)
-FAST_SIMPLE_ROTATE (565, uint16_t)
-FAST_SIMPLE_ROTATE (8888, uint32_t)
-
-static const pixman_fast_path_t c_fast_paths[] =
-{
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5,   fast_composite_over_n_1_0565),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5,   fast_composite_over_n_1_0565),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
-    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
-    PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, fast_composite_add_0565_0565),
-    PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, fast_composite_add_0565_0565),
-    PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
-    PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
-    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
-    PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1),
-    PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
-    PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
-    PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
-    PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
-    PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
-    PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
-    PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
-    PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
-    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
-    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
-    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
-    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
-    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
-    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
-    PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
-    PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
-    PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
-    PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
-    PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
-    PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
-    PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
-    PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
-    PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
-    PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
-    PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
-    PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
-
-    SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
-
-    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
-
-    SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
-    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
-
-    SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
-
-    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
-    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
-    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
-    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
-    SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
-    SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
-
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
-
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
-
-#define NEAREST_FAST_PATH(op,s,d)		\
-    {   PIXMAN_OP_ ## op,			\
-	PIXMAN_ ## s, SCALED_NEAREST_FLAGS,	\
-	PIXMAN_null, 0,				\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,	\
-	fast_composite_scaled_nearest,		\
-    }
-
-    NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
-    NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
-    NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
-    NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
-
-    NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
-    NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
-    NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
-    NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
-
-    NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
-    NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
-    NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
-    NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
-
-    NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
-    NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
-    NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
-    NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
-
-#define SIMPLE_ROTATE_FLAGS(angle)					  \
-    (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM	|			  \
-     FAST_PATH_NEAREST_FILTER			|			  \
-     FAST_PATH_SAMPLES_COVER_CLIP_NEAREST	|			  \
-     FAST_PATH_STANDARD_FLAGS)
-
-#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix)				  \
-    {   PIXMAN_OP_ ## op,						  \
-	PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90),				  \
-	PIXMAN_null, 0,							  \
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				  \
-	fast_composite_rotate_90_##suffix,				  \
-    },									  \
-    {   PIXMAN_OP_ ## op,						  \
-	PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270),			  \
-	PIXMAN_null, 0,							  \
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				  \
-	fast_composite_rotate_270_##suffix,				  \
-    }
-
-    SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
-    SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
-    SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
-    SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
-    SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
-
-    /* Simple repeat fast path entry. */
-    {	PIXMAN_OP_any,
-	PIXMAN_any,
-	(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
-	 FAST_PATH_NORMAL_REPEAT),
-	PIXMAN_any, 0,
-	PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
-	fast_composite_tiled_repeat
-    },
-
-    {   PIXMAN_OP_NONE	},
-};
-
-#ifdef WORDS_BIGENDIAN
-#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n)))
-#else
-#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs))
-#endif
-
-static force_inline void
-pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
-{
-    if (offs)
-    {
-	int leading_pixels = 32 - offs;
-	if (leading_pixels >= width)
-	{
-	    if (v)
-		*dst |= A1_FILL_MASK (width, offs);
-	    else
-		*dst &= ~A1_FILL_MASK (width, offs);
-	    return;
-	}
-	else
-	{
-	    if (v)
-		*dst++ |= A1_FILL_MASK (leading_pixels, offs);
-	    else
-		*dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
-	    width -= leading_pixels;
-	}
-    }
-    while (width >= 32)
-    {
-	if (v)
-	    *dst++ = 0xFFFFFFFF;
-	else
-	    *dst++ = 0;
-	width -= 32;
-    }
-    if (width > 0)
-    {
-	if (v)
-	    *dst |= A1_FILL_MASK (width, 0);
-	else
-	    *dst &= ~A1_FILL_MASK (width, 0);
-    }
-}
-
-static void
-pixman_fill1 (uint32_t *bits,
-              int       stride,
-              int       x,
-              int       y,
-              int       width,
-              int       height,
-              uint32_t  filler)
-{
-    uint32_t *dst = bits + y * stride + (x >> 5);
-    int offs = x & 31;
-
-    if (filler & 1)
-    {
-	while (height--)
-	{
-	    pixman_fill1_line (dst, offs, width, 1);
-	    dst += stride;
-	}
-    }
-    else
-    {
-	while (height--)
-	{
-	    pixman_fill1_line (dst, offs, width, 0);
-	    dst += stride;
-	}
-    }
-}
-
-static void
-pixman_fill8 (uint32_t *bits,
-              int       stride,
-              int       x,
-              int       y,
-              int       width,
-              int       height,
-              uint32_t  filler)
-{
-    int byte_stride = stride * (int) sizeof (uint32_t);
-    uint8_t *dst = (uint8_t *) bits;
-    uint8_t v = filler & 0xff;
-    int i;
-
-    dst = dst + y * byte_stride + x;
-
-    while (height--)
-    {
-	for (i = 0; i < width; ++i)
-	    dst[i] = v;
-
-	dst += byte_stride;
-    }
-}
-
-static void
-pixman_fill16 (uint32_t *bits,
-               int       stride,
-               int       x,
-               int       y,
-               int       width,
-               int       height,
-               uint32_t  filler)
-{
-    int short_stride =
-	(stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
-    uint16_t *dst = (uint16_t *)bits;
-    uint16_t v = filler & 0xffff;
-    int i;
-
-    dst = dst + y * short_stride + x;
-
-    while (height--)
-    {
-	for (i = 0; i < width; ++i)
-	    dst[i] = v;
-
-	dst += short_stride;
-    }
-}
-
-static void
-pixman_fill32 (uint32_t *bits,
-               int       stride,
-               int       x,
-               int       y,
-               int       width,
-               int       height,
-               uint32_t  filler)
-{
-    int i;
-
-    bits = bits + y * stride + x;
-
-    while (height--)
-    {
-	for (i = 0; i < width; ++i)
-	    bits[i] = filler;
-
-	bits += stride;
-    }
-}
-
-static pixman_bool_t
-fast_path_fill (pixman_implementation_t *imp,
-                uint32_t *               bits,
-                int                      stride,
-                int                      bpp,
-                int                      x,
-                int                      y,
-                int                      width,
-                int                      height,
-                uint32_t		 filler)
-{
-    switch (bpp)
-    {
-    case 1:
-	pixman_fill1 (bits, stride, x, y, width, height, filler);
-	break;
-
-    case 8:
-	pixman_fill8 (bits, stride, x, y, width, height, filler);
-	break;
-
-    case 16:
-	pixman_fill16 (bits, stride, x, y, width, height, filler);
-	break;
-
-    case 32:
-	pixman_fill32 (bits, stride, x, y, width, height, filler);
-	break;
-
-    default:
-	return FALSE;
-    }
-
-    return TRUE;
-}
-
-/*****************************************************************************/
-
-static uint32_t *
-fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
-{
-    int32_t w = iter->width;
-    uint32_t *dst = iter->buffer;
-    const uint16_t *src = (const uint16_t *)iter->bits;
-
-    iter->bits += iter->stride;
-
-    /* Align the source buffer at 4 bytes boundary */
-    if (w > 0 && ((uintptr_t)src & 3))
-    {
-	*dst++ = convert_0565_to_8888 (*src++);
-	w--;
-    }
-    /* Process two pixels per iteration */
-    while ((w -= 2) >= 0)
-    {
-	uint32_t sr, sb, sg, t0, t1;
-	uint32_t s = *(const uint32_t *)src;
-	src += 2;
-	sr = (s >> 8) & 0x00F800F8;
-	sb = (s << 3) & 0x00F800F8;
-	sg = (s >> 3) & 0x00FC00FC;
-	sr |= sr >> 5;
-	sb |= sb >> 5;
-	sg |= sg >> 6;
-	t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) |
-	     (sb & 0xFF) | 0xFF000000;
-	t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) |
-	     (sb >> 16) | 0xFF000000;
-#ifdef WORDS_BIGENDIAN
-	*dst++ = t1;
-	*dst++ = t0;
-#else
-	*dst++ = t0;
-	*dst++ = t1;
-#endif
-    }
-    if (w & 1)
-    {
-	*dst = convert_0565_to_8888 (*src);
-    }
-
-    return iter->buffer;
-}
-
-static uint32_t *
-fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask)
-{
-    iter->bits += iter->stride;
-    return iter->buffer;
-}
-
-/* Helper function for a workaround, which tries to ensure that 0x1F001F
- * constant is always allocated in a register on RISC architectures.
- */
-static force_inline uint32_t
-convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F)
-{
-    uint32_t a, b;
-    a = (s >> 3) & x1F001F;
-    b = s & 0xFC00;
-    a |= a >> 5;
-    a |= b >> 5;
-    return a;
-}
-
-static void
-fast_write_back_r5g6b5 (pixman_iter_t *iter)
-{
-    int32_t w = iter->width;
-    uint16_t *dst = (uint16_t *)(iter->bits - iter->stride);
-    const uint32_t *src = iter->buffer;
-    /* Workaround to ensure that x1F001F variable is allocated in a register */
-    static volatile uint32_t volatile_x1F001F = 0x1F001F;
-    uint32_t x1F001F = volatile_x1F001F;
-
-    while ((w -= 4) >= 0)
-    {
-	uint32_t s1 = *src++;
-	uint32_t s2 = *src++;
-	uint32_t s3 = *src++;
-	uint32_t s4 = *src++;
-	*dst++ = convert_8888_to_0565_workaround (s1, x1F001F);
-	*dst++ = convert_8888_to_0565_workaround (s2, x1F001F);
-	*dst++ = convert_8888_to_0565_workaround (s3, x1F001F);
-	*dst++ = convert_8888_to_0565_workaround (s4, x1F001F);
-    }
-    if (w & 2)
-    {
-	*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
-	*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
-    }
-    if (w & 1)
-    {
-	*dst = convert_8888_to_0565_workaround (*src, x1F001F);
-    }
-}
-
-typedef struct
-{
-    int		y;
-    uint64_t *	buffer;
-} line_t;
-
-typedef struct
-{
-    line_t		lines[2];
-    pixman_fixed_t	y;
-    pixman_fixed_t	x;
-    uint64_t		data[1];
-} bilinear_info_t;
-
-static void
-fetch_horizontal (bits_image_t *image, line_t *line,
-		  int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
-{
-    uint32_t *bits = image->bits + y * image->rowstride;
-    int i;
-
-    for (i = 0; i < n; ++i)
-    {
-	int x0 = pixman_fixed_to_int (x);
-	int x1 = x0 + 1;
-	int32_t dist_x;
-
-	uint32_t left = *(bits + x0);
-	uint32_t right = *(bits + x1);
-
-	dist_x = pixman_fixed_to_bilinear_weight (x);
-	dist_x <<= (8 - BILINEAR_INTERPOLATION_BITS);
-
-#if SIZEOF_LONG <= 4
-	{
-	    uint32_t lag, rag, ag;
-	    uint32_t lrb, rrb, rb;
-
-	    lag = (left & 0xff00ff00) >> 8;
-	    rag = (right & 0xff00ff00) >> 8;
-	    ag = (lag << 8) + dist_x * (rag - lag);
-
-	    lrb = (left & 0x00ff00ff);
-	    rrb = (right & 0x00ff00ff);
-	    rb = (lrb << 8) + dist_x * (rrb - lrb);
-
-	    *((uint32_t *)(line->buffer + i)) = ag;
-	    *((uint32_t *)(line->buffer + i) + 1) = rb;
-	}
-#else
-	{
-	    uint64_t lagrb, ragrb;
-	    uint32_t lag, rag;
-	    uint32_t lrb, rrb;
-
-	    lag = (left & 0xff00ff00);
-	    lrb = (left & 0x00ff00ff);
-	    rag = (right & 0xff00ff00);
-	    rrb = (right & 0x00ff00ff);
-	    lagrb = (((uint64_t)lag) << 24) | lrb;
-	    ragrb = (((uint64_t)rag) << 24) | rrb;
-
-	    line->buffer[i] = (lagrb << 8) + dist_x * (ragrb - lagrb);
-	}
-#endif
-
-	x += ux;
-    }
-
-    line->y = y;
-}
-
-static uint32_t *
-fast_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask)
-{
-    pixman_fixed_t fx, ux;
-    bilinear_info_t *info = iter->data;
-    line_t *line0, *line1;
-    int y0, y1;
-    int32_t dist_y;
-    int i;
-
-    COMPILE_TIME_ASSERT (BILINEAR_INTERPOLATION_BITS < 8);
-
-    fx = info->x;
-    ux = iter->image->common.transform->matrix[0][0];
-
-    y0 = pixman_fixed_to_int (info->y);
-    y1 = y0 + 1;
-    dist_y = pixman_fixed_to_bilinear_weight (info->y);
-    dist_y <<= (8 - BILINEAR_INTERPOLATION_BITS);
-
-    line0 = &info->lines[y0 & 0x01];
-    line1 = &info->lines[y1 & 0x01];
-
-    if (line0->y != y0)
-    {
-	fetch_horizontal (
-	    &iter->image->bits, line0, y0, fx, ux, iter->width);
-    }
-
-    if (line1->y != y1)
-    {
-	fetch_horizontal (
-	    &iter->image->bits, line1, y1, fx, ux, iter->width);
-    }
-
-    for (i = 0; i < iter->width; ++i)
-    {
-#if SIZEOF_LONG <= 4
-	uint32_t ta, tr, tg, tb;
-	uint32_t ba, br, bg, bb;
-	uint32_t tag, trb;
-	uint32_t bag, brb;
-	uint32_t a, r, g, b;
-
-	tag = *((uint32_t *)(line0->buffer + i));
-	trb = *((uint32_t *)(line0->buffer + i) + 1);
-	bag = *((uint32_t *)(line1->buffer + i));
-	brb = *((uint32_t *)(line1->buffer + i) + 1);
-
-	ta = tag >> 16;
-	ba = bag >> 16;
-	a = (ta << 8) + dist_y * (ba - ta);
-
-	tr = trb >> 16;
-	br = brb >> 16;
-	r = (tr << 8) + dist_y * (br - tr);
-
-	tg = tag & 0xffff;
-	bg = bag & 0xffff;
-	g = (tg << 8) + dist_y * (bg - tg);
-	
-	tb = trb & 0xffff;
-	bb = brb & 0xffff;
-	b = (tb << 8) + dist_y * (bb - tb);
-
-	a = (a <<  8) & 0xff000000;
-	r = (r <<  0) & 0x00ff0000;
-	g = (g >>  8) & 0x0000ff00;
-	b = (b >> 16) & 0x000000ff;
-#else
-	uint64_t top = line0->buffer[i];
-	uint64_t bot = line1->buffer[i];
-	uint64_t tar = (top & 0xffff0000ffff0000ULL) >> 16;
-	uint64_t bar = (bot & 0xffff0000ffff0000ULL) >> 16;
-	uint64_t tgb = (top & 0x0000ffff0000ffffULL);
-	uint64_t bgb = (bot & 0x0000ffff0000ffffULL);
-	uint64_t ar, gb;
-	uint32_t a, r, g, b;
-
-	ar = (tar << 8) + dist_y * (bar - tar);
-	gb = (tgb << 8) + dist_y * (bgb - tgb);
-
-	a = ((ar >> 24) & 0xff000000);
-	r = ((ar >>  0) & 0x00ff0000);
-	g = ((gb >> 40) & 0x0000ff00);
-	b = ((gb >> 16) & 0x000000ff);
-#endif
-
-	iter->buffer[i] = a | r | g | b;
-    }
-
-    info->y += iter->image->common.transform->matrix[1][1];
-
-    return iter->buffer;
-}
-
-static void
-bilinear_cover_iter_fini (pixman_iter_t *iter)
-{
-    free (iter->data);
-}
-
-static void
-fast_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info)
-{
-    int width = iter->width;
-    bilinear_info_t *info;
-    pixman_vector_t v;
-
-    /* Reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2;
-    v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2;
-    v.vector[2] = pixman_fixed_1;
-
-    if (!pixman_transform_point_3d (iter->image->common.transform, &v))
-	goto fail;
-
-    info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t));
-    if (!info)
-	goto fail;
-
-    info->x = v.vector[0] - pixman_fixed_1 / 2;
-    info->y = v.vector[1] - pixman_fixed_1 / 2;
-
-    /* It is safe to set the y coordinates to -1 initially
-     * because COVER_CLIP_BILINEAR ensures that we will only
-     * be asked to fetch lines in the [0, height) interval
-     */
-    info->lines[0].y = -1;
-    info->lines[0].buffer = &(info->data[0]);
-    info->lines[1].y = -1;
-    info->lines[1].buffer = &(info->data[width]);
-
-    iter->get_scanline = fast_fetch_bilinear_cover;
-    iter->fini = bilinear_cover_iter_fini;
-
-    iter->data = info;
-    return;
-
-fail:
-    /* Something went wrong, either a bad matrix or OOM; in such cases,
-     * we don't guarantee any particular rendering.
-     */
-    _pixman_log_error (
-	FUNC, "Allocation failure or bad matrix, skipping rendering\n");
-    
-    iter->get_scanline = _pixman_iter_get_scanline_noop;
-    iter->fini = NULL;
-}
-
-static uint32_t *
-bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter,
-					  const uint32_t *mask)
-{
-
-    pixman_image_t * ima = iter->image;
-    int              offset = iter->x;
-    int              line = iter->y++;
-    int              width = iter->width;
-    uint32_t *       buffer = iter->buffer;
-
-    bits_image_t *bits = &ima->bits;
-    pixman_fixed_t x_top, x_bottom, x;
-    pixman_fixed_t ux_top, ux_bottom, ux;
-    pixman_vector_t v;
-    uint32_t top_mask, bottom_mask;
-    uint32_t *top_row;
-    uint32_t *bottom_row;
-    uint32_t *end;
-    uint32_t zero[2] = { 0, 0 };
-    uint32_t one = 1;
-    int y, y1, y2;
-    int disty;
-    int mask_inc;
-    int w;
-
-    /* reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
-    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
-    v.vector[2] = pixman_fixed_1;
-
-    if (!pixman_transform_point_3d (bits->common.transform, &v))
-	return iter->buffer;
-
-    ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0];
-    x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2;
-
-    y = v.vector[1] - pixman_fixed_1/2;
-    disty = pixman_fixed_to_bilinear_weight (y);
-
-    /* Load the pointers to the first and second lines from the source
-     * image that bilinear code must read.
-     *
-     * The main trick in this code is about the check if any line are
-     * outside of the image;
-     *
-     * When I realize that a line (any one) is outside, I change
-     * the pointer to a dummy area with zeros. Once I change this, I
-     * must be sure the pointer will not change, so I set the
-     * variables to each pointer increments inside the loop.
-     */
-    y1 = pixman_fixed_to_int (y);
-    y2 = y1 + 1;
-
-    if (y1 < 0 || y1 >= bits->height)
-    {
-	top_row = zero;
-	x_top = 0;
-	ux_top = 0;
-    }
-    else
-    {
-	top_row = bits->bits + y1 * bits->rowstride;
-	x_top = x;
-	ux_top = ux;
-    }
-
-    if (y2 < 0 || y2 >= bits->height)
-    {
-	bottom_row = zero;
-	x_bottom = 0;
-	ux_bottom = 0;
-    }
-    else
-    {
-	bottom_row = bits->bits + y2 * bits->rowstride;
-	x_bottom = x;
-	ux_bottom = ux;
-    }
-
-    /* Instead of checking whether the operation uses the mast in
-     * each loop iteration, verify this only once and prepare the
-     * variables to make the code smaller inside the loop.
-     */
-    if (!mask)
-    {
-        mask_inc = 0;
-        mask = &one;
-    }
-    else
-    {
-        /* If have a mask, prepare the variables to check it */
-        mask_inc = 1;
-    }
-
-    /* If both are zero, then the whole thing is zero */
-    if (top_row == zero && bottom_row == zero)
-    {
-	memset (buffer, 0, width * sizeof (uint32_t));
-	return iter->buffer;
-    }
-    else if (bits->format == PIXMAN_x8r8g8b8)
-    {
-	if (top_row == zero)
-	{
-	    top_mask = 0;
-	    bottom_mask = 0xff000000;
-	}
-	else if (bottom_row == zero)
-	{
-	    top_mask = 0xff000000;
-	    bottom_mask = 0;
-	}
-	else
-	{
-	    top_mask = 0xff000000;
-	    bottom_mask = 0xff000000;
-	}
-    }
-    else
-    {
-	top_mask = 0;
-	bottom_mask = 0;
-    }
-
-    end = buffer + width;
-
-    /* Zero fill to the left of the image */
-    while (buffer < end && x < pixman_fixed_minus_1)
-    {
-	*buffer++ = 0;
-	x += ux;
-	x_top += ux_top;
-	x_bottom += ux_bottom;
-	mask += mask_inc;
-    }
-
-    /* Left edge
-     */
-    while (buffer < end && x < 0)
-    {
-	uint32_t tr, br;
-	int32_t distx;
-
-	tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask;
-	br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
-
-	distx = pixman_fixed_to_bilinear_weight (x);
-
-	*buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty);
-
-	x += ux;
-	x_top += ux_top;
-	x_bottom += ux_bottom;
-	mask += mask_inc;
-    }
-
-    /* Main part */
-    w = pixman_int_to_fixed (bits->width - 1);
-
-    while (buffer < end  &&  x < w)
-    {
-	if (*mask)
-	{
-	    uint32_t tl, tr, bl, br;
-	    int32_t distx;
-
-	    tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
-	    tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask;
-	    bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
-	    br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
-
-	    distx = pixman_fixed_to_bilinear_weight (x);
-
-	    *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty);
-	}
-
-	buffer++;
-	x += ux;
-	x_top += ux_top;
-	x_bottom += ux_bottom;
-	mask += mask_inc;
-    }
-
-    /* Right Edge */
-    w = pixman_int_to_fixed (bits->width);
-    while (buffer < end  &&  x < w)
-    {
-	if (*mask)
-	{
-	    uint32_t tl, bl;
-	    int32_t distx;
-
-	    tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
-	    bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
-
-	    distx = pixman_fixed_to_bilinear_weight (x);
-
-	    *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty);
-	}
-
-	buffer++;
-	x += ux;
-	x_top += ux_top;
-	x_bottom += ux_bottom;
-	mask += mask_inc;
-    }
-
-    /* Zero fill to the left of the image */
-    while (buffer < end)
-	*buffer++ = 0;
-
-    return iter->buffer;
-}
-
-typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x);
-
-static force_inline void
-bits_image_fetch_separable_convolution_affine (pixman_image_t * image,
-					       int              offset,
-					       int              line,
-					       int              width,
-					       uint32_t *       buffer,
-					       const uint32_t * mask,
-
-					       convert_pixel_t	convert_pixel,
-					       pixman_format_code_t	format,
-					       pixman_repeat_t	repeat_mode)
-{
-    bits_image_t *bits = &image->bits;
-    pixman_fixed_t *params = image->common.filter_params;
-    int cwidth = pixman_fixed_to_int (params[0]);
-    int cheight = pixman_fixed_to_int (params[1]);
-    int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1;
-    int y_off = ((cheight << 16) - pixman_fixed_1) >> 1;
-    int x_phase_bits = pixman_fixed_to_int (params[2]);
-    int y_phase_bits = pixman_fixed_to_int (params[3]);
-    int x_phase_shift = 16 - x_phase_bits;
-    int y_phase_shift = 16 - y_phase_bits;
-    pixman_fixed_t vx, vy;
-    pixman_fixed_t ux, uy;
-    pixman_vector_t v;
-    int k;
-
-    /* reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
-    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
-    v.vector[2] = pixman_fixed_1;
-
-    if (!pixman_transform_point_3d (image->common.transform, &v))
-	return;
-
-    ux = image->common.transform->matrix[0][0];
-    uy = image->common.transform->matrix[1][0];
-
-    vx = v.vector[0];
-    vy = v.vector[1];
-
-    for (k = 0; k < width; ++k)
-    {
-	pixman_fixed_t *y_params;
-	int satot, srtot, sgtot, sbtot;
-	pixman_fixed_t x, y;
-	int32_t x1, x2, y1, y2;
-	int32_t px, py;
-	int i, j;
-
-	if (mask && !mask[k])
-	    goto next;
-
-	/* Round x and y to the middle of the closest phase before continuing. This
-	 * ensures that the convolution matrix is aligned right, since it was
-	 * positioned relative to a particular phase (and not relative to whatever
-	 * exact fraction we happen to get here).
-	 */
-	x = ((vx >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1);
-	y = ((vy >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1);
-
-	px = (x & 0xffff) >> x_phase_shift;
-	py = (y & 0xffff) >> y_phase_shift;
-
-	x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
-	y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
-	x2 = x1 + cwidth;
-	y2 = y1 + cheight;
-
-	satot = srtot = sgtot = sbtot = 0;
-
-	y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight;
-
-	for (i = y1; i < y2; ++i)
-	{
-	    pixman_fixed_t fy = *y_params++;
-
-	    if (fy)
-	    {
-		pixman_fixed_t *x_params = params + 4 + px * cwidth;
-
-		for (j = x1; j < x2; ++j)
-		{
-		    pixman_fixed_t fx = *x_params++;
-		    int rx = j;
-		    int ry = i;
-		    
-		    if (fx)
-		    {
-			pixman_fixed_t f;
-			uint32_t pixel, mask;
-			uint8_t *row;
-
-			mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
-
-			if (repeat_mode != PIXMAN_REPEAT_NONE)
-			{
-			    repeat (repeat_mode, &rx, bits->width);
-			    repeat (repeat_mode, &ry, bits->height);
-
-			    row = (uint8_t *)(bits->bits + bits->rowstride * ry);
-			    pixel = convert_pixel (row, rx) | mask;
-			}
-			else
-			{
-			    if (rx < 0 || ry < 0 || rx >= bits->width || ry >= bits->height)
-			    {
-				pixel = 0;
-			    }
-			    else
-			    {
-				row = (uint8_t *)(bits->bits + bits->rowstride * ry);
-				pixel = convert_pixel (row, rx) | mask;
-			    }
-			}
-
-			f = ((pixman_fixed_32_32_t)fx * fy + 0x8000) >> 16;
-			srtot += (int)RED_8 (pixel) * f;
-			sgtot += (int)GREEN_8 (pixel) * f;
-			sbtot += (int)BLUE_8 (pixel) * f;
-			satot += (int)ALPHA_8 (pixel) * f;
-		    }
-		}
-	    }
-	}
-
-	satot = (satot + 0x8000) >> 16;
-	srtot = (srtot + 0x8000) >> 16;
-	sgtot = (sgtot + 0x8000) >> 16;
-	sbtot = (sbtot + 0x8000) >> 16;
-
-	satot = CLIP (satot, 0, 0xff);
-	srtot = CLIP (srtot, 0, 0xff);
-	sgtot = CLIP (sgtot, 0, 0xff);
-	sbtot = CLIP (sbtot, 0, 0xff);
-
-#ifdef WORDS_BIGENDIAN
-	buffer[k] = (satot << 0) | (srtot << 8) | (sgtot << 16) | (sbtot << 24);
-#else
-	buffer[k] = (satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot << 0);
-#endif
-
-    next:
-	vx += ux;
-	vy += uy;
-    }
-}
-
-static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
-
-static force_inline void
-bits_image_fetch_bilinear_affine (pixman_image_t * image,
-				  int              offset,
-				  int              line,
-				  int              width,
-				  uint32_t *       buffer,
-				  const uint32_t * mask,
-
-				  convert_pixel_t	convert_pixel,
-				  pixman_format_code_t	format,
-				  pixman_repeat_t	repeat_mode)
-{
-    pixman_fixed_t x, y;
-    pixman_fixed_t ux, uy;
-    pixman_vector_t v;
-    bits_image_t *bits = &image->bits;
-    int i;
-
-    /* reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
-    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
-    v.vector[2] = pixman_fixed_1;
-
-    if (!pixman_transform_point_3d (image->common.transform, &v))
-	return;
-
-    ux = image->common.transform->matrix[0][0];
-    uy = image->common.transform->matrix[1][0];
-
-    x = v.vector[0];
-    y = v.vector[1];
-
-    for (i = 0; i < width; ++i)
-    {
-	int x1, y1, x2, y2;
-	uint32_t tl, tr, bl, br;
-	int32_t distx, disty;
-	int width = image->bits.width;
-	int height = image->bits.height;
-	const uint8_t *row1;
-	const uint8_t *row2;
-
-	if (mask && !mask[i])
-	    goto next;
-
-	x1 = x - pixman_fixed_1 / 2;
-	y1 = y - pixman_fixed_1 / 2;
-
-	distx = pixman_fixed_to_bilinear_weight (x1);
-	disty = pixman_fixed_to_bilinear_weight (y1);
-
-	y1 = pixman_fixed_to_int (y1);
-	y2 = y1 + 1;
-	x1 = pixman_fixed_to_int (x1);
-	x2 = x1 + 1;
-
-	if (repeat_mode != PIXMAN_REPEAT_NONE)
-	{
-	    uint32_t mask;
-
-	    mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
-
-	    repeat (repeat_mode, &x1, width);
-	    repeat (repeat_mode, &y1, height);
-	    repeat (repeat_mode, &x2, width);
-	    repeat (repeat_mode, &y2, height);
-
-	    row1 = (uint8_t *)(bits->bits + bits->rowstride * y1);
-	    row2 = (uint8_t *)(bits->bits + bits->rowstride * y2);
-
-	    tl = convert_pixel (row1, x1) | mask;
-	    tr = convert_pixel (row1, x2) | mask;
-	    bl = convert_pixel (row2, x1) | mask;
-	    br = convert_pixel (row2, x2) | mask;
-	}
-	else
-	{
-	    uint32_t mask1, mask2;
-	    int bpp;
-
-	    /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value,
-	     * which means if you use it in expressions, those
-	     * expressions become unsigned themselves. Since
-	     * the variables below can be negative in some cases,
-	     * that will lead to crashes on 64 bit architectures.
-	     *
-	     * So this line makes sure bpp is signed
-	     */
-	    bpp = PIXMAN_FORMAT_BPP (format);
-
-	    if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0)
-	    {
-		buffer[i] = 0;
-		goto next;
-	    }
-
-	    if (y2 == 0)
-	    {
-		row1 = zero;
-		mask1 = 0;
-	    }
-	    else
-	    {
-		row1 = (uint8_t *)(bits->bits + bits->rowstride * y1);
-		row1 += bpp / 8 * x1;
-
-		mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
-	    }
-
-	    if (y1 == height - 1)
-	    {
-		row2 = zero;
-		mask2 = 0;
-	    }
-	    else
-	    {
-		row2 = (uint8_t *)(bits->bits + bits->rowstride * y2);
-		row2 += bpp / 8 * x1;
-
-		mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
-	    }
-
-	    if (x2 == 0)
-	    {
-		tl = 0;
-		bl = 0;
-	    }
-	    else
-	    {
-		tl = convert_pixel (row1, 0) | mask1;
-		bl = convert_pixel (row2, 0) | mask2;
-	    }
-
-	    if (x1 == width - 1)
-	    {
-		tr = 0;
-		br = 0;
-	    }
-	    else
-	    {
-		tr = convert_pixel (row1, 1) | mask1;
-		br = convert_pixel (row2, 1) | mask2;
-	    }
-	}
-
-	buffer[i] = bilinear_interpolation (
-	    tl, tr, bl, br, distx, disty);
-
-    next:
-	x += ux;
-	y += uy;
-    }
-}
-
-static force_inline void
-bits_image_fetch_nearest_affine (pixman_image_t * image,
-				 int              offset,
-				 int              line,
-				 int              width,
-				 uint32_t *       buffer,
-				 const uint32_t * mask,
-				 
-				 convert_pixel_t	convert_pixel,
-				 pixman_format_code_t	format,
-				 pixman_repeat_t	repeat_mode)
-{
-    pixman_fixed_t x, y;
-    pixman_fixed_t ux, uy;
-    pixman_vector_t v;
-    bits_image_t *bits = &image->bits;
-    int i;
-
-    /* reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
-    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
-    v.vector[2] = pixman_fixed_1;
-
-    if (!pixman_transform_point_3d (image->common.transform, &v))
-	return;
-
-    ux = image->common.transform->matrix[0][0];
-    uy = image->common.transform->matrix[1][0];
-
-    x = v.vector[0];
-    y = v.vector[1];
-
-    for (i = 0; i < width; ++i)
-    {
-	int width, height, x0, y0;
-	const uint8_t *row;
-
-	if (mask && !mask[i])
-	    goto next;
-	
-	width = image->bits.width;
-	height = image->bits.height;
-	x0 = pixman_fixed_to_int (x - pixman_fixed_e);
-	y0 = pixman_fixed_to_int (y - pixman_fixed_e);
-
-	if (repeat_mode == PIXMAN_REPEAT_NONE &&
-	    (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width))
-	{
-	    buffer[i] = 0;
-	}
-	else
-	{
-	    uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
-
-	    if (repeat_mode != PIXMAN_REPEAT_NONE)
-	    {
-		repeat (repeat_mode, &x0, width);
-		repeat (repeat_mode, &y0, height);
-	    }
-
-	    row = (uint8_t *)(bits->bits + bits->rowstride * y0);
-
-	    buffer[i] = convert_pixel (row, x0) | mask;
-	}
-
-    next:
-	x += ux;
-	y += uy;
-    }
-}
-
-static force_inline uint32_t
-convert_a8r8g8b8 (const uint8_t *row, int x)
-{
-    return *(((uint32_t *)row) + x);
-}
-
-static force_inline uint32_t
-convert_x8r8g8b8 (const uint8_t *row, int x)
-{
-    return *(((uint32_t *)row) + x);
-}
-
-static force_inline uint32_t
-convert_a8 (const uint8_t *row, int x)
-{
-    return (uint32_t) *(row + x) << 24;
-}
-
-static force_inline uint32_t
-convert_r5g6b5 (const uint8_t *row, int x)
-{
-    return convert_0565_to_0888 (*((uint16_t *)row + x));
-}
-
-#define MAKE_SEPARABLE_CONVOLUTION_FETCHER(name, format, repeat_mode)  \
-    static uint32_t *							\
-    bits_image_fetch_separable_convolution_affine_ ## name (pixman_iter_t   *iter, \
-							    const uint32_t * mask) \
-    {									\
-	bits_image_fetch_separable_convolution_affine (                 \
-	    iter->image,                                                \
-	    iter->x, iter->y++,                                         \
-	    iter->width,                                                \
-	    iter->buffer, mask,                                         \
-	    convert_ ## format,                                         \
-	    PIXMAN_ ## format,                                          \
-	    repeat_mode);                                               \
-									\
-	return iter->buffer;                                            \
-    }
-
-#define MAKE_BILINEAR_FETCHER(name, format, repeat_mode)		\
-    static uint32_t *							\
-    bits_image_fetch_bilinear_affine_ ## name (pixman_iter_t   *iter,	\
-					       const uint32_t * mask)	\
-    {									\
-	bits_image_fetch_bilinear_affine (iter->image,			\
-					  iter->x, iter->y++,		\
-					  iter->width,			\
-					  iter->buffer, mask,		\
-					  convert_ ## format,		\
-					  PIXMAN_ ## format,		\
-					  repeat_mode);			\
-	return iter->buffer;						\
-    }
-
-#define MAKE_NEAREST_FETCHER(name, format, repeat_mode)			\
-    static uint32_t *							\
-    bits_image_fetch_nearest_affine_ ## name (pixman_iter_t   *iter,	\
-					      const uint32_t * mask)	\
-    {									\
-	bits_image_fetch_nearest_affine (iter->image,			\
-					 iter->x, iter->y++,		\
-					 iter->width,			\
-					 iter->buffer, mask,		\
-					 convert_ ## format,		\
-					 PIXMAN_ ## format,		\
-					 repeat_mode);			\
-	return iter->buffer;						\
-    }
-
-#define MAKE_FETCHERS(name, format, repeat_mode)			\
-    MAKE_NEAREST_FETCHER (name, format, repeat_mode)			\
-    MAKE_BILINEAR_FETCHER (name, format, repeat_mode)			\
-    MAKE_SEPARABLE_CONVOLUTION_FETCHER (name, format, repeat_mode)
-
-MAKE_FETCHERS (pad_a8r8g8b8,     a8r8g8b8, PIXMAN_REPEAT_PAD)
-MAKE_FETCHERS (none_a8r8g8b8,    a8r8g8b8, PIXMAN_REPEAT_NONE)
-MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT)
-MAKE_FETCHERS (normal_a8r8g8b8,  a8r8g8b8, PIXMAN_REPEAT_NORMAL)
-MAKE_FETCHERS (pad_x8r8g8b8,     x8r8g8b8, PIXMAN_REPEAT_PAD)
-MAKE_FETCHERS (none_x8r8g8b8,    x8r8g8b8, PIXMAN_REPEAT_NONE)
-MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT)
-MAKE_FETCHERS (normal_x8r8g8b8,  x8r8g8b8, PIXMAN_REPEAT_NORMAL)
-MAKE_FETCHERS (pad_a8,           a8,       PIXMAN_REPEAT_PAD)
-MAKE_FETCHERS (none_a8,          a8,       PIXMAN_REPEAT_NONE)
-MAKE_FETCHERS (reflect_a8,	 a8,       PIXMAN_REPEAT_REFLECT)
-MAKE_FETCHERS (normal_a8,	 a8,       PIXMAN_REPEAT_NORMAL)
-MAKE_FETCHERS (pad_r5g6b5,       r5g6b5,   PIXMAN_REPEAT_PAD)
-MAKE_FETCHERS (none_r5g6b5,      r5g6b5,   PIXMAN_REPEAT_NONE)
-MAKE_FETCHERS (reflect_r5g6b5,   r5g6b5,   PIXMAN_REPEAT_REFLECT)
-MAKE_FETCHERS (normal_r5g6b5,    r5g6b5,   PIXMAN_REPEAT_NORMAL)
-
-#define IMAGE_FLAGS							\
-    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |		\
-     FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
-
-static const pixman_iter_info_t fast_iters[] = 
-{
-    { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW | ITER_SRC,
-      _pixman_iter_init_bits_stride, fast_fetch_r5g6b5, NULL },
-
-    { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS,
-      ITER_NARROW | ITER_DEST,
-      _pixman_iter_init_bits_stride,
-      fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
-    
-    { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS,
-      ITER_NARROW | ITER_DEST | ITER_IGNORE_RGB | ITER_IGNORE_ALPHA,
-      _pixman_iter_init_bits_stride,
-      fast_dest_fetch_noop, fast_write_back_r5g6b5 },
-
-    { PIXMAN_a8r8g8b8,
-      (FAST_PATH_STANDARD_FLAGS			|
-       FAST_PATH_SCALE_TRANSFORM		|
-       FAST_PATH_BILINEAR_FILTER		|
-       FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR),
-      ITER_NARROW | ITER_SRC,
-      fast_bilinear_cover_iter_init,
-      NULL, NULL
-    },
-
-#define FAST_BILINEAR_FLAGS						\
-    (FAST_PATH_NO_ALPHA_MAP		|				\
-     FAST_PATH_NO_ACCESSORS		|				\
-     FAST_PATH_HAS_TRANSFORM		|				\
-     FAST_PATH_AFFINE_TRANSFORM		|				\
-     FAST_PATH_X_UNIT_POSITIVE		|				\
-     FAST_PATH_Y_UNIT_ZERO		|				\
-     FAST_PATH_NONE_REPEAT		|				\
-     FAST_PATH_BILINEAR_FILTER)
-
-    { PIXMAN_a8r8g8b8,
-      FAST_BILINEAR_FLAGS,
-      ITER_NARROW | ITER_SRC,
-      NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL
-    },
-
-    { PIXMAN_x8r8g8b8,
-      FAST_BILINEAR_FLAGS,
-      ITER_NARROW | ITER_SRC,
-      NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL
-    },
-
-#define GENERAL_BILINEAR_FLAGS						\
-    (FAST_PATH_NO_ALPHA_MAP		|				\
-     FAST_PATH_NO_ACCESSORS		|				\
-     FAST_PATH_HAS_TRANSFORM		|				\
-     FAST_PATH_AFFINE_TRANSFORM		|				\
-     FAST_PATH_BILINEAR_FILTER)
-
-#define GENERAL_NEAREST_FLAGS						\
-    (FAST_PATH_NO_ALPHA_MAP		|				\
-     FAST_PATH_NO_ACCESSORS		|				\
-     FAST_PATH_HAS_TRANSFORM		|				\
-     FAST_PATH_AFFINE_TRANSFORM		|				\
-     FAST_PATH_NEAREST_FILTER)
-
-#define GENERAL_SEPARABLE_CONVOLUTION_FLAGS				\
-    (FAST_PATH_NO_ALPHA_MAP            |				\
-     FAST_PATH_NO_ACCESSORS            |				\
-     FAST_PATH_HAS_TRANSFORM           |				\
-     FAST_PATH_AFFINE_TRANSFORM        |				\
-     FAST_PATH_SEPARABLE_CONVOLUTION_FILTER)
-    
-#define SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat)   \
-    { PIXMAN_ ## format,						\
-      GENERAL_SEPARABLE_CONVOLUTION_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
-      ITER_NARROW | ITER_SRC,						\
-      NULL, bits_image_fetch_separable_convolution_affine_ ## name, NULL \
-    },
-
-#define BILINEAR_AFFINE_FAST_PATH(name, format, repeat)			\
-    { PIXMAN_ ## format,						\
-      GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,		\
-      ITER_NARROW | ITER_SRC,						\
-      NULL, bits_image_fetch_bilinear_affine_ ## name, NULL,		\
-    },
-
-#define NEAREST_AFFINE_FAST_PATH(name, format, repeat)			\
-    { PIXMAN_ ## format,						\
-      GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,		\
-      ITER_NARROW | ITER_SRC,						\
-      NULL, bits_image_fetch_nearest_affine_ ## name, NULL		\
-    },
-
-#define AFFINE_FAST_PATHS(name, format, repeat)				\
-    NEAREST_AFFINE_FAST_PATH(name, format, repeat)			\
-    BILINEAR_AFFINE_FAST_PATH(name, format, repeat)			\
-    SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat)
-    
-    AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD)
-    AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE)
-    AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT)
-    AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL)
-    AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD)
-    AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE)
-    AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT)
-    AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL)
-    AFFINE_FAST_PATHS (pad_a8, a8, PAD)
-    AFFINE_FAST_PATHS (none_a8, a8, NONE)
-    AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT)
-    AFFINE_FAST_PATHS (normal_a8, a8, NORMAL)
-    AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD)
-    AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE)
-    AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT)
-    AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL)
-
-    { PIXMAN_null },
-};
-
-pixman_implementation_t *
-_pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
-{
-    pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
-
-    imp->fill = fast_path_fill;
-    imp->iter_info = fast_iters;
-
-    return imp;
-}
diff --git a/vendor/pixman/pixman/pixman-filter.c b/vendor/pixman/pixman/pixman-filter.c
deleted file mode 100644
index 33327df83..000000000
--- a/vendor/pixman/pixman/pixman-filter.c
+++ /dev/null
@@ -1,491 +0,0 @@
-/*
- * Copyright 2012, Red Hat, Inc.
- * Copyright 2012, Soren Sandmann
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author: Soren Sandmann <soren.sandmann@gmail.com>
- */
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include <assert.h>
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-#include "pixman-private.h"
-
-typedef double (* kernel_func_t) (double x);
-
-typedef struct
-{
-    pixman_kernel_t	kernel;
-    kernel_func_t	func;
-    double		width;
-} filter_info_t;
-
-static double
-impulse_kernel (double x)
-{
-    return (x == 0.0)? 1.0 : 0.0;
-}
-
-static double
-box_kernel (double x)
-{
-    return 1;
-}
-
-static double
-linear_kernel (double x)
-{
-    return 1 - fabs (x);
-}
-
-static double
-gaussian_kernel (double x)
-{
-#define SQRT2 (1.4142135623730950488016887242096980785696718753769480)
-#define SIGMA (SQRT2 / 2.0)
-    
-    return exp (- x * x / (2 * SIGMA * SIGMA)) / (SIGMA * sqrt (2.0 * M_PI));
-}
-
-static double
-sinc (double x)
-{
-    if (x == 0.0)
-	return 1.0;
-    else
-	return sin (M_PI * x) / (M_PI * x);
-}
-
-static double
-lanczos (double x, int n)
-{
-    return sinc (x) * sinc (x * (1.0 / n));
-}
-
-static double
-lanczos2_kernel (double x)
-{
-    return lanczos (x, 2);
-}
-
-static double
-lanczos3_kernel (double x)
-{
-    return lanczos (x, 3);
-}
-
-static double
-nice_kernel (double x)
-{
-    return lanczos3_kernel (x * 0.75);
-}
-
-static double
-general_cubic (double x, double B, double C)
-{
-    double ax = fabs(x);
-
-    if (ax < 1)
-    {
-	return (((12 - 9 * B - 6 * C) * ax +
-		 (-18 + 12 * B + 6 * C)) * ax * ax +
-		(6 - 2 * B)) / 6;
-    }
-    else if (ax < 2)
-    {
-	return ((((-B - 6 * C) * ax +
-		  (6 * B + 30 * C)) * ax +
-		 (-12 * B - 48 * C)) * ax +
-		(8 * B + 24 * C)) / 6;
-    }
-    else
-    {
-	return 0;
-    }
-}
-
-static double
-cubic_kernel (double x)
-{
-    /* This is the Mitchell-Netravali filter.
-     *
-     * (0.0, 0.5) would give us the Catmull-Rom spline,
-     * but that one seems to be indistinguishable from Lanczos2.
-     */
-    return general_cubic (x, 1/3.0, 1/3.0);
-}
-
-static const filter_info_t filters[] =
-{
-    { PIXMAN_KERNEL_IMPULSE,	        impulse_kernel,   0.0 },
-    { PIXMAN_KERNEL_BOX,	        box_kernel,       1.0 },
-    { PIXMAN_KERNEL_LINEAR,	        linear_kernel,    2.0 },
-    { PIXMAN_KERNEL_CUBIC,		cubic_kernel,     4.0 },
-    { PIXMAN_KERNEL_GAUSSIAN,	        gaussian_kernel,  5.0 },
-    { PIXMAN_KERNEL_LANCZOS2,	        lanczos2_kernel,  4.0 },
-    { PIXMAN_KERNEL_LANCZOS3,	        lanczos3_kernel,  6.0 },
-    { PIXMAN_KERNEL_LANCZOS3_STRETCHED, nice_kernel,      8.0 },
-};
-
-/* This function scales @kernel2 by @scale, then
- * aligns @x1 in @kernel1 with @x2 in @kernel2 and
- * and integrates the product of the kernels across @width.
- *
- * This function assumes that the intervals are within
- * the kernels in question. E.g., the caller must not
- * try to integrate a linear kernel ouside of [-1:1]
- */
-static double
-integral (pixman_kernel_t kernel1, double x1,
-	  pixman_kernel_t kernel2, double scale, double x2,
-	  double width)
-{
-    if (kernel1 == PIXMAN_KERNEL_BOX && kernel2 == PIXMAN_KERNEL_BOX)
-    {
-	return width;
-    }
-    /* The LINEAR filter is not differentiable at 0, so if the
-     * integration interval crosses zero, break it into two
-     * separate integrals.
-     */
-    else if (kernel1 == PIXMAN_KERNEL_LINEAR && x1 < 0 && x1 + width > 0)
-    {
-	return
-	    integral (kernel1, x1, kernel2, scale, x2, - x1) +
-	    integral (kernel1, 0, kernel2, scale, x2 - x1, width + x1);
-    }
-    else if (kernel2 == PIXMAN_KERNEL_LINEAR && x2 < 0 && x2 + width > 0)
-    {
-	return
-	    integral (kernel1, x1, kernel2, scale, x2, - x2) +
-	    integral (kernel1, x1 - x2, kernel2, scale, 0, width + x2);
-    }
-    else if (kernel1 == PIXMAN_KERNEL_IMPULSE)
-    {
-	assert (width == 0.0);
-	return filters[kernel2].func (x2 * scale);
-    }
-    else if (kernel2 == PIXMAN_KERNEL_IMPULSE)
-    {
-	assert (width == 0.0);
-	return filters[kernel1].func (x1);
-    }
-    else
-    {
-	/* Integration via Simpson's rule
-	 * See http://www.intmath.com/integration/6-simpsons-rule.php
-	 * 12 segments (6 cubic approximations) seems to produce best
-	 * result for lanczos3.linear, which was the combination that
-	 * showed the most errors.  This makes sense as the lanczos3
-	 * filter is 6 wide.
-	 */
-#define N_SEGMENTS 12
-#define SAMPLE(a1, a2)							\
-	(filters[kernel1].func ((a1)) * filters[kernel2].func ((a2) * scale))
-	
-	double s = 0.0;
-	double h = width / N_SEGMENTS;
-	int i;
-
-	s = SAMPLE (x1, x2);
-
-	for (i = 1; i < N_SEGMENTS; i += 2)
-	{
-	    double a1 = x1 + h * i;
-	    double a2 = x2 + h * i;
-	    s += 4 * SAMPLE (a1, a2);
-	}
-
-	for (i = 2; i < N_SEGMENTS; i += 2)
-	{
-	    double a1 = x1 + h * i;
-	    double a2 = x2 + h * i;
-	    s += 2 * SAMPLE (a1, a2);
-	}
-
-	s += SAMPLE (x1 + width, x2 + width);
-	
-	return h * s * (1.0 / 3.0);
-    }
-}
-
-static void
-create_1d_filter (int              width,
-		  pixman_kernel_t  reconstruct,
-		  pixman_kernel_t  sample,
-		  double           scale,
-		  int              n_phases,
-		  pixman_fixed_t *pstart,
-		  pixman_fixed_t *pend
-		  )
-{
-    pixman_fixed_t *p = pstart;
-    double step;
-    int i;
-    if(width <= 0) return;
-    step = 1.0 / n_phases;
-
-    for (i = 0; i < n_phases; ++i)
-    {
-        double frac = step / 2.0 + i * step;
-	pixman_fixed_t new_total;
-        int x, x1, x2;
-	double total, e;
-
-	/* Sample convolution of reconstruction and sampling
-	 * filter. See rounding.txt regarding the rounding
-	 * and sample positions.
-	 */
-
-	x1 = ceil (frac - width / 2.0 - 0.5);
-	x2 = x1 + width;
-    assert( p >= pstart && p + (x2 - x1) <= pend ); /* assert validity of the following loop */
-	total = 0;
-        for (x = x1; x < x2; ++x)
-        {
-	    double pos = x + 0.5 - frac;
-	    double rlow = - filters[reconstruct].width / 2.0;
-	    double rhigh = rlow + filters[reconstruct].width;
-	    double slow = pos - scale * filters[sample].width / 2.0;
-	    double shigh = slow + scale * filters[sample].width;
-	    double c = 0.0;
-	    double ilow, ihigh;
-
-	    if (rhigh >= slow && rlow <= shigh)
-	    {
-		ilow = MAX (slow, rlow);
-		ihigh = MIN (shigh, rhigh);
-
-		c = integral (reconstruct, ilow,
-			      sample, 1.0 / scale, ilow - pos,
-			      ihigh - ilow);
-	    }
-
-            *p = (pixman_fixed_t)floor (c * 65536.0 + 0.5);
-	    total += *p;
-	    p++;
-        }
-
-	/* Normalize, with error diffusion */
-	p -= width;
-	assert(p >= pstart && p + (x2 - x1) <= pend); /* assert validity of the following loop */
-
-    total = 65536.0 / total;
-    new_total = 0;
-	e = 0.0;
-	for (x = x1; x < x2; ++x)
-	{
-	    double v = (*p) * total + e;
-	    pixman_fixed_t t = floor (v + 0.5);
-
-	    e = v - t;
-	    new_total += t;
-	    *p++ = t;
-	}
-
-	/* pixman_fixed_e's worth of error may remain; put it
-	 * at the first sample, since that is the only one that
-	 * hasn't had any error diffused into it.
-	 */
-
-	assert(p - width >= pstart && p - width < pend); /* assert... */
-	*(p - width) += pixman_fixed_1 - new_total;
-    }
-}
-
-
-static int
-filter_width (pixman_kernel_t reconstruct, pixman_kernel_t sample, double size)
-{
-    return ceil (filters[reconstruct].width + size * filters[sample].width);
-}
-
-#ifdef PIXMAN_GNUPLOT
-
-/* If enable-gnuplot is configured, then you can pipe the output of a
- * pixman-using program to gnuplot and get a continuously-updated plot
- * of the horizontal filter. This works well with demos/scale to test
- * the filter generation.
- *
- * The plot is all the different subposition filters shuffled
- * together. This is misleading in a few cases:
- *
- *  IMPULSE.BOX - goes up and down as the subfilters have different
- *		  numbers of non-zero samples
- *  IMPULSE.TRIANGLE - somewhat crooked for the same reason
- *  1-wide filters - looks triangular, but a 1-wide box would be more
- *		     accurate
- */
-static void
-gnuplot_filter (int width, int n_phases, const pixman_fixed_t* p)
-{
-    double step;
-    int i, j;
-    int first;
-
-    step = 1.0 / n_phases;
-
-    printf ("set style line 1 lc rgb '#0060ad' lt 1 lw 0.5 pt 7 pi 1 ps 0.5\n");
-    printf ("plot [x=%g:%g] '-' with linespoints ls 1\n", -width*0.5, width*0.5);
-    /* Print a point at the origin so that y==0 line is included: */
-    printf ("0 0\n\n");
-
-    /* The position of the first sample of the phase corresponding to
-     * frac is given by:
-     * 
-     *     ceil (frac - width / 2.0 - 0.5) + 0.5 - frac
-     * 
-     * We have to find the frac that minimizes this expression.
-     * 
-     * For odd widths, we have
-     * 
-     *     ceil (frac - width / 2.0 - 0.5) + 0.5 - frac
-     *   = ceil (frac) + K - frac
-     *   = 1 + K - frac
-     * 
-     * for some K, so this is minimized when frac is maximized and
-     * strictly growing with frac. So for odd widths, we can simply
-     * start at the last phase and go backwards.
-     * 
-     * For even widths, we have
-     * 
-     *     ceil (frac - width / 2.0 - 0.5) + 0.5 - frac
-     *   = ceil (frac - 0.5) + K - frac
-     * 
-     * The graph for this function (ignoring K) looks like this:
-     * 
-     *        0.5
-     *           |    |\ 
-     *           |    | \ 
-     *           |    |  \ 
-     *         0 |    |   \ 
-     *           |\   |
-     *           | \  |
-     *           |  \ |
-     *      -0.5 |   \|
-     *   ---------------------------------
-     *           0    0.5   1
-     * 
-     * So in this case we need to start with the phase whose frac is
-     * less than, but as close as possible to 0.5, then go backwards
-     * until we hit the first phase, then wrap around to the last
-     * phase and continue backwards.
-     * 
-     * Which phase is as close as possible 0.5? The locations of the
-     * sampling point corresponding to the kth phase is given by
-     * 1/(2 * n_phases) + k / n_phases:
-     * 
-     *         1/(2 * n_phases) + k / n_phases = 0.5
-     *  
-     * from which it follows that
-     * 
-     *         k = (n_phases - 1) / 2
-     * 
-     * rounded down is the phase in question.
-     */
-    if (width & 1)
-	first = n_phases - 1;
-    else
-	first = (n_phases - 1) / 2;
-
-    for (j = 0; j < width; ++j)
-    {
-	for (i = 0; i < n_phases; ++i)
-	{
-	    int phase = first - i;
-	    double frac, pos;
-
-	    if (phase < 0)
-		phase = n_phases + phase;
-
-	    frac = step / 2.0 + phase * step;
-	    pos = ceil (frac - width / 2.0 - 0.5) + 0.5 - frac + j;
-
-	    printf ("%g %g\n",
-		    pos,
-		    pixman_fixed_to_double (*(p + phase * width + j)));
-	}
-    }
-
-    printf ("e\n");
-    fflush (stdout);
-}
-
-#endif
-
-/* Create the parameter list for a SEPARABLE_CONVOLUTION filter
- * with the given kernels and scale parameters
- */
-PIXMAN_EXPORT pixman_fixed_t *
-pixman_filter_create_separable_convolution (int             *n_values,
-					    pixman_fixed_t   scale_x,
-					    pixman_fixed_t   scale_y,
-					    pixman_kernel_t  reconstruct_x,
-					    pixman_kernel_t  reconstruct_y,
-					    pixman_kernel_t  sample_x,
-					    pixman_kernel_t  sample_y,
-					    int              subsample_bits_x,
-					    int	             subsample_bits_y)
-{
-    double sx = fabs (pixman_fixed_to_double (scale_x));
-    double sy = fabs (pixman_fixed_to_double (scale_y));
-    pixman_fixed_t *params;
-    int subsample_x, subsample_y;
-    int width, height;
-
-    width = filter_width (reconstruct_x, sample_x, sx);
-    subsample_x = (1 << subsample_bits_x);
-
-    height = filter_width (reconstruct_y, sample_y, sy);
-    subsample_y = (1 << subsample_bits_y);
-
-    *n_values = 4 + width * subsample_x + height * subsample_y;
-    
-    params = malloc (*n_values * sizeof (pixman_fixed_t));
-    if (!params)
-	return NULL;
-
-    params[0] = pixman_int_to_fixed (width);
-    params[1] = pixman_int_to_fixed (height);
-    params[2] = pixman_int_to_fixed (subsample_bits_x);
-    params[3] = pixman_int_to_fixed (subsample_bits_y);
-
-    {
-        pixman_fixed_t
-            *xparams = params+4,
-            *yparams = xparams + width*subsample_x,
-            *endparams = params + *n_values;
-        create_1d_filter(width, reconstruct_x, sample_x, sx, subsample_x,
-                         xparams, yparams);
-        create_1d_filter(height, reconstruct_y, sample_y, sy, subsample_y,
-                         yparams, endparams);
-    }
-
-#ifdef PIXMAN_GNUPLOT
-    gnuplot_filter(width, subsample_x, params + 4);
-#endif
-
-    return params;
-}
diff --git a/vendor/pixman/pixman/pixman-general.c b/vendor/pixman/pixman/pixman-general.c
deleted file mode 100644
index b4450cbec..000000000
--- a/vendor/pixman/pixman/pixman-general.c
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Copyright © 2009 Red Hat, Inc.
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
- *             2005 Lars Knoll & Zack Rusin, Trolltech
- *             2008 Aaron Plattner, NVIDIA Corporation
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Red Hat not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  Red Hat makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- */
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "pixman-private.h"
-
-static void
-general_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *info)
-{
-    pixman_image_t *image = iter->image;
-
-    switch (image->type)
-    {
-    case BITS:
-        if ((iter->iter_flags & ITER_SRC) == ITER_SRC)
-            _pixman_bits_image_src_iter_init (image, iter);
-        else
-            _pixman_bits_image_dest_iter_init (image, iter);
-        break;
-
-    case LINEAR:
-        _pixman_linear_gradient_iter_init (image, iter);
-        break;
-
-    case RADIAL:
-	_pixman_radial_gradient_iter_init (image, iter);
-        break;
-
-    case CONICAL:
-	_pixman_conical_gradient_iter_init (image, iter);
-        break;
-
-    case SOLID:
-        _pixman_log_error (FUNC, "Solid image not handled by noop");
-        break;
-
-    default:
-	_pixman_log_error (FUNC, "Pixman bug: unknown image type\n");
-        break;
-    }
-}
-
-static const pixman_iter_info_t general_iters[] =
-{
-    { PIXMAN_any, 0, 0, general_iter_init, NULL, NULL },
-    { PIXMAN_null },
-};
-
-typedef struct op_info_t op_info_t;
-struct op_info_t
-{
-    uint8_t src, dst;
-};
-
-#define ITER_IGNORE_BOTH						\
-    (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_LOCALIZED_ALPHA)
-
-static const op_info_t op_flags[PIXMAN_N_OPERATORS] =
-{
-    /* Src                   Dst                   */
-    { ITER_IGNORE_BOTH,      ITER_IGNORE_BOTH      }, /* CLEAR */
-    { ITER_LOCALIZED_ALPHA,  ITER_IGNORE_BOTH      }, /* SRC */
-    { ITER_IGNORE_BOTH,      ITER_LOCALIZED_ALPHA  }, /* DST */
-    { 0,                     ITER_LOCALIZED_ALPHA  }, /* OVER */
-    { ITER_LOCALIZED_ALPHA,  0                     }, /* OVER_REVERSE */
-    { ITER_LOCALIZED_ALPHA,  ITER_IGNORE_RGB       }, /* IN */
-    { ITER_IGNORE_RGB,       ITER_LOCALIZED_ALPHA  }, /* IN_REVERSE */
-    { ITER_LOCALIZED_ALPHA,  ITER_IGNORE_RGB       }, /* OUT */
-    { ITER_IGNORE_RGB,       ITER_LOCALIZED_ALPHA  }, /* OUT_REVERSE */
-    { 0,                     0                     }, /* ATOP */
-    { 0,                     0                     }, /* ATOP_REVERSE */
-    { 0,                     0                     }, /* XOR */
-    { ITER_LOCALIZED_ALPHA,  ITER_LOCALIZED_ALPHA  }, /* ADD */
-    { 0,                     0                     }, /* SATURATE */
-};
-
-#define SCANLINE_BUFFER_LENGTH 8192
-
-static pixman_bool_t
-operator_needs_division (pixman_op_t op)
-{
-    static const uint8_t needs_division[] =
-    {
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, /* SATURATE */
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, /* DISJOINT */
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, /* CONJOINT */
-	0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, /* blend ops */
-    };
-
-    return needs_division[op];
-}
-
-static void
-general_composite_rect  (pixman_implementation_t *imp,
-                         pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint8_t stack_scanline_buffer[3 * SCANLINE_BUFFER_LENGTH];
-    uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer;
-    uint8_t *src_buffer, *mask_buffer, *dest_buffer;
-    pixman_iter_t src_iter, mask_iter, dest_iter;
-    pixman_combine_32_func_t compose;
-    pixman_bool_t component_alpha;
-    iter_flags_t width_flag, src_iter_flags;
-    int Bpp;
-    int i;
-
-    if ((src_image->common.flags & FAST_PATH_NARROW_FORMAT)		     &&
-	(!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT)  &&
-	(dest_image->common.flags & FAST_PATH_NARROW_FORMAT)		     &&
-	!(operator_needs_division (op))                                      &&
-	(dest_image->bits.dither == PIXMAN_DITHER_NONE))
-    {
-	width_flag = ITER_NARROW;
-	Bpp = 4;
-    }
-    else
-    {
-	width_flag = ITER_WIDE;
-	Bpp = 16;
-    }
-
-#define ALIGN(addr)							\
-    ((uint8_t *)((((uintptr_t)(addr)) + 15) & (~15)))
-
-    if (width <= 0 || _pixman_multiply_overflows_int (width, Bpp * 3))
-	return;
-
-    if (width * Bpp * 3 > sizeof (stack_scanline_buffer) - 15 * 3)
-    {
-	scanline_buffer = pixman_malloc_ab_plus_c (width, Bpp * 3, 15 * 3);
-
-	if (!scanline_buffer)
-	    return;
-
-	memset (scanline_buffer, 0, width * Bpp * 3 + 15 * 3);
-    }
-    else
-    {
-	memset (stack_scanline_buffer, 0, sizeof (stack_scanline_buffer));
-    }
-
-    src_buffer = ALIGN (scanline_buffer);
-    mask_buffer = ALIGN (src_buffer + width * Bpp);
-    dest_buffer = ALIGN (mask_buffer + width * Bpp);
-
-    if (width_flag == ITER_WIDE)
-    {
-	/* To make sure there aren't any NANs in the buffers */
-	memset (src_buffer, 0, width * Bpp);
-	memset (mask_buffer, 0, width * Bpp);
-	memset (dest_buffer, 0, width * Bpp);
-    }
-    
-    /* src iter */
-    src_iter_flags = width_flag | op_flags[op].src | ITER_SRC;
-
-    _pixman_implementation_iter_init (imp->toplevel, &src_iter, src_image,
-                                      src_x, src_y, width, height,
-                                      src_buffer, src_iter_flags,
-                                      info->src_flags);
-
-    /* mask iter */
-    if ((src_iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
-	(ITER_IGNORE_ALPHA | ITER_IGNORE_RGB))
-    {
-	/* If it doesn't matter what the source is, then it doesn't matter
-	 * what the mask is
-	 */
-	mask_image = NULL;
-    }
-
-    component_alpha = mask_image && mask_image->common.component_alpha;
-
-    _pixman_implementation_iter_init (
-	imp->toplevel, &mask_iter,
-	mask_image, mask_x, mask_y, width, height, mask_buffer,
-	ITER_SRC | width_flag | (component_alpha? 0 : ITER_IGNORE_RGB),
-	info->mask_flags);
-
-    /* dest iter */
-    _pixman_implementation_iter_init (
-	imp->toplevel, &dest_iter, dest_image, dest_x, dest_y, width, height,
-	dest_buffer, ITER_DEST | width_flag | op_flags[op].dst, info->dest_flags);
-
-    compose = _pixman_implementation_lookup_combiner (
-	imp->toplevel, op, component_alpha, width_flag != ITER_WIDE);
-
-    for (i = 0; i < height; ++i)
-    {
-	uint32_t *s, *m, *d;
-
-	m = mask_iter.get_scanline (&mask_iter, NULL);
-	s = src_iter.get_scanline (&src_iter, m);
-	d = dest_iter.get_scanline (&dest_iter, NULL);
-
-	compose (imp->toplevel, op, d, s, m, width);
-
-	dest_iter.write_back (&dest_iter);
-    }
-
-    if (src_iter.fini)
-	src_iter.fini (&src_iter);
-    if (mask_iter.fini)
-	mask_iter.fini (&mask_iter);
-    if (dest_iter.fini)
-	dest_iter.fini (&dest_iter);
-    
-    if (scanline_buffer != (uint8_t *) stack_scanline_buffer)
-	free (scanline_buffer);
-}
-
-static const pixman_fast_path_t general_fast_path[] =
-{
-    { PIXMAN_OP_any, PIXMAN_any, 0, PIXMAN_any,	0, PIXMAN_any, 0, general_composite_rect },
-    { PIXMAN_OP_NONE }
-};
-
-pixman_implementation_t *
-_pixman_implementation_create_general (void)
-{
-    pixman_implementation_t *imp = _pixman_implementation_create (NULL, general_fast_path);
-
-    _pixman_setup_combiner_functions_32 (imp);
-    _pixman_setup_combiner_functions_float (imp);
-
-    imp->iter_info = general_iters;
-
-    return imp;
-}
-
diff --git a/vendor/pixman/pixman/pixman-glyph.c b/vendor/pixman/pixman/pixman-glyph.c
deleted file mode 100644
index dc9041180..000000000
--- a/vendor/pixman/pixman/pixman-glyph.c
+++ /dev/null
@@ -1,676 +0,0 @@
-/*
- * Copyright 2010, 2012, Soren Sandmann <sandmann@cs.au.dk>
- * Copyright 2010, 2011, 2012, Red Hat, Inc
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author: Soren Sandmann <sandmann@cs.au.dk>
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-#include "pixman-private.h"
-
-#include <stdlib.h>
-
-typedef struct glyph_metrics_t glyph_metrics_t;
-typedef struct glyph_t glyph_t;
-
-#define TOMBSTONE ((glyph_t *)0x1)
-
-/* XXX: These numbers are arbitrary---we've never done any measurements.
- */
-#define N_GLYPHS_HIGH_WATER  (16384)
-#define N_GLYPHS_LOW_WATER   (8192)
-#define HASH_SIZE (2 * N_GLYPHS_HIGH_WATER)
-#define HASH_MASK (HASH_SIZE - 1)
-
-struct glyph_t
-{
-    void *		font_key;
-    void *		glyph_key;
-    int			origin_x;
-    int			origin_y;
-    pixman_image_t *	image;
-    pixman_link_t	mru_link;
-};
-
-struct pixman_glyph_cache_t
-{
-    int			n_glyphs;
-    int			n_tombstones;
-    int			freeze_count;
-    pixman_list_t	mru;
-    glyph_t *		glyphs[HASH_SIZE];
-};
-
-static void
-free_glyph (glyph_t *glyph)
-{
-    pixman_list_unlink (&glyph->mru_link);
-    pixman_image_unref (glyph->image);
-    free (glyph);
-}
-
-static unsigned int
-hash (const void *font_key, const void *glyph_key)
-{
-    size_t key = (size_t)font_key + (size_t)glyph_key;
-
-    /* This hash function is based on one found on Thomas Wang's
-     * web page at
-     *
-     *    http://www.concentric.net/~Ttwang/tech/inthash.htm
-     *
-     */
-    key = (key << 15) - key - 1;
-    key = key ^ (key >> 12);
-    key = key + (key << 2);
-    key = key ^ (key >> 4);
-    key = key + (key << 3) + (key << 11);
-    key = key ^ (key >> 16);
-
-    return key;
-}
-
-static glyph_t *
-lookup_glyph (pixman_glyph_cache_t *cache,
-	      void                 *font_key,
-	      void                 *glyph_key)
-{
-    unsigned idx;
-    glyph_t *g;
-
-    idx = hash (font_key, glyph_key);
-    while ((g = cache->glyphs[idx++ & HASH_MASK]))
-    {
-	if (g != TOMBSTONE			&&
-	    g->font_key == font_key		&&
-	    g->glyph_key == glyph_key)
-	{
-	    return g;
-	}
-    }
-
-    return NULL;
-}
-
-static void
-insert_glyph (pixman_glyph_cache_t *cache,
-	      glyph_t              *glyph)
-{
-    unsigned idx;
-    glyph_t **loc;
-
-    idx = hash (glyph->font_key, glyph->glyph_key);
-
-    /* Note: we assume that there is room in the table. If there isn't,
-     * this will be an infinite loop.
-     */
-    do
-    {
-	loc = &cache->glyphs[idx++ & HASH_MASK];
-    } while (*loc && *loc != TOMBSTONE);
-
-    if (*loc == TOMBSTONE)
-	cache->n_tombstones--;
-    cache->n_glyphs++;
-
-    *loc = glyph;
-}
-
-static void
-remove_glyph (pixman_glyph_cache_t *cache,
-	      glyph_t              *glyph)
-{
-    unsigned idx;
-
-    idx = hash (glyph->font_key, glyph->glyph_key);
-    while (cache->glyphs[idx & HASH_MASK] != glyph)
-	idx++;
-
-    cache->glyphs[idx & HASH_MASK] = TOMBSTONE;
-    cache->n_tombstones++;
-    cache->n_glyphs--;
-
-    /* Eliminate tombstones if possible */
-    if (cache->glyphs[(idx + 1) & HASH_MASK] == NULL)
-    {
-	while (cache->glyphs[idx & HASH_MASK] == TOMBSTONE)
-	{
-	    cache->glyphs[idx & HASH_MASK] = NULL;
-	    cache->n_tombstones--;
-	    idx--;
-	}
-    }
-}
-
-static void
-clear_table (pixman_glyph_cache_t *cache)
-{
-    int i;
-
-    for (i = 0; i < HASH_SIZE; ++i)
-    {
-	glyph_t *glyph = cache->glyphs[i];
-
-	if (glyph && glyph != TOMBSTONE)
-	    free_glyph (glyph);
-
-	cache->glyphs[i] = NULL;
-    }
-
-    cache->n_glyphs = 0;
-    cache->n_tombstones = 0;
-}
-
-PIXMAN_EXPORT pixman_glyph_cache_t *
-pixman_glyph_cache_create (void)
-{
-    pixman_glyph_cache_t *cache;
-
-    if (!(cache = malloc (sizeof *cache)))
-	return NULL;
-
-    memset (cache->glyphs, 0, sizeof (cache->glyphs));
-    cache->n_glyphs = 0;
-    cache->n_tombstones = 0;
-    cache->freeze_count = 0;
-
-    pixman_list_init (&cache->mru);
-
-    return cache;
-}
-
-PIXMAN_EXPORT void
-pixman_glyph_cache_destroy (pixman_glyph_cache_t *cache)
-{
-    return_if_fail (cache->freeze_count == 0);
-
-    clear_table (cache);
-
-    free (cache);
-}
-
-PIXMAN_EXPORT void
-pixman_glyph_cache_freeze (pixman_glyph_cache_t  *cache)
-{
-    cache->freeze_count++;
-}
-
-PIXMAN_EXPORT void
-pixman_glyph_cache_thaw (pixman_glyph_cache_t  *cache)
-{
-    if (--cache->freeze_count == 0					&&
-	cache->n_glyphs + cache->n_tombstones > N_GLYPHS_HIGH_WATER)
-    {
-	if (cache->n_tombstones > N_GLYPHS_HIGH_WATER)
-	{
-	    /* More than half the entries are
-	     * tombstones. Just dump the whole table.
-	     */
-	    clear_table (cache);
-	}
-
-	while (cache->n_glyphs > N_GLYPHS_LOW_WATER)
-	{
-	    glyph_t *glyph = CONTAINER_OF (glyph_t, mru_link, cache->mru.tail);
-
-	    remove_glyph (cache, glyph);
-	    free_glyph (glyph);
-	}
-    }
-}
-
-PIXMAN_EXPORT const void *
-pixman_glyph_cache_lookup (pixman_glyph_cache_t  *cache,
-			   void                  *font_key,
-			   void                  *glyph_key)
-{
-    return lookup_glyph (cache, font_key, glyph_key);
-}
-
-PIXMAN_EXPORT const void *
-pixman_glyph_cache_insert (pixman_glyph_cache_t  *cache,
-			   void                  *font_key,
-			   void                  *glyph_key,
-			   int			  origin_x,
-			   int                    origin_y,
-			   pixman_image_t        *image)
-{
-    glyph_t *glyph;
-    int32_t width, height;
-
-    return_val_if_fail (cache->freeze_count > 0, NULL);
-    return_val_if_fail (image->type == BITS, NULL);
-
-    width = image->bits.width;
-    height = image->bits.height;
-
-    if (cache->n_glyphs >= HASH_SIZE)
-	return NULL;
-
-    if (!(glyph = malloc (sizeof *glyph)))
-	return NULL;
-
-    glyph->font_key = font_key;
-    glyph->glyph_key = glyph_key;
-    glyph->origin_x = origin_x;
-    glyph->origin_y = origin_y;
-
-    if (!(glyph->image = pixman_image_create_bits (
-	      image->bits.format, width, height, NULL, -1)))
-    {
-	free (glyph);
-	return NULL;
-    }
-
-    pixman_image_composite32 (PIXMAN_OP_SRC,
-			      image, NULL, glyph->image, 0, 0, 0, 0, 0, 0,
-			      width, height);
-
-    if (PIXMAN_FORMAT_A   (glyph->image->bits.format) != 0	&&
-	PIXMAN_FORMAT_RGB (glyph->image->bits.format) != 0)
-    {
-	pixman_image_set_component_alpha (glyph->image, TRUE);
-    }
-
-    pixman_list_prepend (&cache->mru, &glyph->mru_link);
-
-    _pixman_image_validate (glyph->image);
-    insert_glyph (cache, glyph);
-
-    return glyph;
-}
-
-PIXMAN_EXPORT void
-pixman_glyph_cache_remove (pixman_glyph_cache_t  *cache,
-			   void                  *font_key,
-			   void                  *glyph_key)
-{
-    glyph_t *glyph;
-
-    if ((glyph = lookup_glyph (cache, font_key, glyph_key)))
-    {
-	remove_glyph (cache, glyph);
-
-	free_glyph (glyph);
-    }
-}
-
-PIXMAN_EXPORT void
-pixman_glyph_get_extents (pixman_glyph_cache_t *cache,
-			  int                   n_glyphs,
-			  pixman_glyph_t       *glyphs,
-			  pixman_box32_t       *extents)
-{
-    int i;
-
-    extents->x1 = extents->y1 = INT32_MAX;
-    extents->x2 = extents->y2 = INT32_MIN;
-
-    for (i = 0; i < n_glyphs; ++i)
-    {
-	glyph_t *glyph = (glyph_t *)glyphs[i].glyph;
-	int x1, y1, x2, y2;
-
-	x1 = glyphs[i].x - glyph->origin_x;
-	y1 = glyphs[i].y - glyph->origin_y;
-	x2 = glyphs[i].x - glyph->origin_x + glyph->image->bits.width;
-	y2 = glyphs[i].y - glyph->origin_y + glyph->image->bits.height;
-
-	if (x1 < extents->x1)
-	    extents->x1 = x1;
-	if (y1 < extents->y1)
-	    extents->y1 = y1;
-	if (x2 > extents->x2)
-	    extents->x2 = x2;
-	if (y2 > extents->y2)
-	    extents->y2 = y2;
-    }
-}
-
-/* This function returns a format that is suitable for use as a mask for the
- * set of glyphs in question.
- */
-PIXMAN_EXPORT pixman_format_code_t
-pixman_glyph_get_mask_format (pixman_glyph_cache_t *cache,
-			      int		    n_glyphs,
-			      const pixman_glyph_t *glyphs)
-{
-    pixman_format_code_t format = PIXMAN_a1;
-    int i;
-
-    for (i = 0; i < n_glyphs; ++i)
-    {
-	const glyph_t *glyph = glyphs[i].glyph;
-	pixman_format_code_t glyph_format = glyph->image->bits.format;
-
-	if (PIXMAN_FORMAT_TYPE (glyph_format) == PIXMAN_TYPE_A)
-	{
-	    if (PIXMAN_FORMAT_A (glyph_format) > PIXMAN_FORMAT_A (format))
-		format = glyph_format;
-	}
-	else
-	{
-	    return PIXMAN_a8r8g8b8;
-	}
-    }
-
-    return format;
-}
-
-static pixman_bool_t
-box32_intersect (pixman_box32_t *dest,
-		 const pixman_box32_t *box1,
-		 const pixman_box32_t *box2)
-{
-    dest->x1 = MAX (box1->x1, box2->x1);
-    dest->y1 = MAX (box1->y1, box2->y1);
-    dest->x2 = MIN (box1->x2, box2->x2);
-    dest->y2 = MIN (box1->y2, box2->y2);
-
-    return dest->x2 > dest->x1 && dest->y2 > dest->y1;
-}
-
-#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
-__attribute__((__force_align_arg_pointer__))
-#endif
-PIXMAN_EXPORT void
-pixman_composite_glyphs_no_mask (pixman_op_t            op,
-				 pixman_image_t        *src,
-				 pixman_image_t        *dest,
-				 int32_t                src_x,
-				 int32_t                src_y,
-				 int32_t                dest_x,
-				 int32_t                dest_y,
-				 pixman_glyph_cache_t  *cache,
-				 int                    n_glyphs,
-				 const pixman_glyph_t  *glyphs)
-{
-    pixman_region32_t region;
-    pixman_format_code_t glyph_format = PIXMAN_null;
-    uint32_t glyph_flags = 0;
-    pixman_format_code_t dest_format;
-    uint32_t dest_flags;
-    pixman_composite_func_t func = NULL;
-    pixman_implementation_t *implementation = NULL;
-    pixman_composite_info_t info;
-    int i;
-
-    _pixman_image_validate (src);
-    _pixman_image_validate (dest);
-    
-    dest_format = dest->common.extended_format_code;
-    dest_flags = dest->common.flags;
-    
-    pixman_region32_init (&region);
-    if (!_pixman_compute_composite_region32 (
-	    &region,
-	    src, NULL, dest,
-	    src_x - dest_x, src_y - dest_y, 0, 0, 0, 0,
-	    dest->bits.width, dest->bits.height))
-    {
-	goto out;
-    }
-
-    info.op = op;
-    info.src_image = src;
-    info.dest_image = dest;
-    info.src_flags = src->common.flags;
-    info.dest_flags = dest->common.flags;
-
-    for (i = 0; i < n_glyphs; ++i)
-    {
-	glyph_t *glyph = (glyph_t *)glyphs[i].glyph;
-	pixman_image_t *glyph_img = glyph->image;
-	pixman_box32_t glyph_box;
-	pixman_box32_t *pbox;
-	uint32_t extra = FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
-	pixman_box32_t composite_box;
-	int n;
-
-	glyph_box.x1 = dest_x + glyphs[i].x - glyph->origin_x;
-	glyph_box.y1 = dest_y + glyphs[i].y - glyph->origin_y;
-	glyph_box.x2 = glyph_box.x1 + glyph->image->bits.width;
-	glyph_box.y2 = glyph_box.y1 + glyph->image->bits.height;
-	
-	pbox = pixman_region32_rectangles (&region, &n);
-	
-	info.mask_image = glyph_img;
-
-	while (n--)
-	{
-	    if (box32_intersect (&composite_box, pbox, &glyph_box))
-	    {
-		if (glyph_img->common.extended_format_code != glyph_format	||
-		    glyph_img->common.flags != glyph_flags)
-		{
-		    glyph_format = glyph_img->common.extended_format_code;
-		    glyph_flags = glyph_img->common.flags;
-
-		    _pixman_implementation_lookup_composite (
-			get_implementation(), op,
-			src->common.extended_format_code, src->common.flags,
-			glyph_format, glyph_flags | extra,
-			dest_format, dest_flags,
-			&implementation, &func);
-		}
-
-		info.src_x = src_x + composite_box.x1 - dest_x;
-		info.src_y = src_y + composite_box.y1 - dest_y;
-		info.mask_x = composite_box.x1 - (dest_x + glyphs[i].x - glyph->origin_x);
-		info.mask_y = composite_box.y1 - (dest_y + glyphs[i].y - glyph->origin_y);
-		info.dest_x = composite_box.x1;
-		info.dest_y = composite_box.y1;
-		info.width = composite_box.x2 - composite_box.x1;
-		info.height = composite_box.y2 - composite_box.y1;
-
-		info.mask_flags = glyph_flags;
-
-		func (implementation, &info);
-	    }
-
-	    pbox++;
-	}
-	pixman_list_move_to_front (&cache->mru, &glyph->mru_link);
-    }
-
-out:
-    pixman_region32_fini (&region);
-}
-
-static void
-add_glyphs (pixman_glyph_cache_t *cache,
-	    pixman_image_t *dest,
-	    int off_x, int off_y,
-	    int n_glyphs, const pixman_glyph_t *glyphs)
-{
-    pixman_format_code_t glyph_format = PIXMAN_null;
-    uint32_t glyph_flags = 0;
-    pixman_composite_func_t func = NULL;
-    pixman_implementation_t *implementation = NULL;
-    pixman_format_code_t dest_format;
-    uint32_t dest_flags;
-    pixman_box32_t dest_box;
-    pixman_composite_info_t info;
-    pixman_image_t *white_img = NULL;
-    pixman_bool_t white_src = FALSE;
-    int i;
-
-    _pixman_image_validate (dest);
-
-    dest_format = dest->common.extended_format_code;
-    dest_flags = dest->common.flags;
-
-    info.op = PIXMAN_OP_ADD;
-    info.dest_image = dest;
-    info.src_x = 0;
-    info.src_y = 0;
-    info.dest_flags = dest_flags;
-
-    dest_box.x1 = 0;
-    dest_box.y1 = 0;
-    dest_box.x2 = dest->bits.width;
-    dest_box.y2 = dest->bits.height;
-
-    for (i = 0; i < n_glyphs; ++i)
-    {
-	glyph_t *glyph = (glyph_t *)glyphs[i].glyph;
-	pixman_image_t *glyph_img = glyph->image;
-	pixman_box32_t glyph_box;
-	pixman_box32_t composite_box;
-
-	if (glyph_img->common.extended_format_code != glyph_format	||
-	    glyph_img->common.flags != glyph_flags)
-	{
-	    pixman_format_code_t src_format, mask_format;
-
-	    glyph_format = glyph_img->common.extended_format_code;
-	    glyph_flags = glyph_img->common.flags;
-
-	    if (glyph_format == dest->bits.format)
-	    {
-		src_format = glyph_format;
-		mask_format = PIXMAN_null;
-		info.src_flags = glyph_flags | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
-		info.mask_flags = FAST_PATH_IS_OPAQUE;
-		info.mask_image = NULL;
-		white_src = FALSE;
-	    }
-	    else
-	    {
-		if (!white_img)
-		{
-		    static const pixman_color_t white = { 0xffff, 0xffff, 0xffff, 0xffff };
-
-		    if (!(white_img = pixman_image_create_solid_fill (&white)))
-			goto out;
-
-		    _pixman_image_validate (white_img);
-		}
-
-		src_format = PIXMAN_solid;
-		mask_format = glyph_format;
-		info.src_flags = white_img->common.flags;
-		info.mask_flags = glyph_flags | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
-		info.src_image = white_img;
-		white_src = TRUE;
-	    }
-
-	    _pixman_implementation_lookup_composite (
-		get_implementation(), PIXMAN_OP_ADD,
-		src_format, info.src_flags,
-		mask_format, info.mask_flags,
-		dest_format, dest_flags,
-		&implementation, &func);
-	}
-
-	glyph_box.x1 = glyphs[i].x - glyph->origin_x + off_x;
-	glyph_box.y1 = glyphs[i].y - glyph->origin_y + off_y;
-	glyph_box.x2 = glyph_box.x1 + glyph->image->bits.width;
-	glyph_box.y2 = glyph_box.y1 + glyph->image->bits.height;
-	
-	if (box32_intersect (&composite_box, &glyph_box, &dest_box))
-	{
-	    int src_x = composite_box.x1 - glyph_box.x1;
-	    int src_y = composite_box.y1 - glyph_box.y1;
-
-	    if (white_src)
-		info.mask_image = glyph_img;
-	    else
-		info.src_image = glyph_img;
-
-	    info.mask_x = info.src_x = src_x;
-	    info.mask_y = info.src_y = src_y;
-	    info.dest_x = composite_box.x1;
-	    info.dest_y = composite_box.y1;
-	    info.width = composite_box.x2 - composite_box.x1;
-	    info.height = composite_box.y2 - composite_box.y1;
-
-	    func (implementation, &info);
-
-	    pixman_list_move_to_front (&cache->mru, &glyph->mru_link);
-	}
-    }
-
-out:
-    if (white_img)
-	pixman_image_unref (white_img);
-}
-
-/* Conceptually, for each glyph, (white IN glyph) is PIXMAN_OP_ADDed to an
- * infinitely big mask image at the position such that the glyph origin point
- * is positioned at the (glyphs[i].x, glyphs[i].y) point.
- *
- * Then (mask_x, mask_y) in the infinite mask and (src_x, src_y) in the source
- * image are both aligned with (dest_x, dest_y) in the destination image. Then
- * these three images are composited within the 
- *
- *       (dest_x, dest_y, dst_x + width, dst_y + height)
- *
- * rectangle.
- *
- * TODO:
- *   - Trim the mask to the destination clip/image?
- *   - Trim composite region based on sources, when the op ignores 0s.
- */
-#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
-__attribute__((__force_align_arg_pointer__))
-#endif
-PIXMAN_EXPORT void
-pixman_composite_glyphs (pixman_op_t            op,
-			 pixman_image_t        *src,
-			 pixman_image_t        *dest,
-			 pixman_format_code_t   mask_format,
-			 int32_t                src_x,
-			 int32_t                src_y,
-			 int32_t		mask_x,
-			 int32_t		mask_y,
-			 int32_t                dest_x,
-			 int32_t                dest_y,
-			 int32_t                width,
-			 int32_t                height,
-			 pixman_glyph_cache_t  *cache,
-			 int			n_glyphs,
-			 const pixman_glyph_t  *glyphs)
-{
-    pixman_image_t *mask;
-
-    if (!(mask = pixman_image_create_bits (mask_format, width, height, NULL, -1)))
-	return;
-
-    if (PIXMAN_FORMAT_A   (mask_format) != 0 &&
-	PIXMAN_FORMAT_RGB (mask_format) != 0)
-    {
-	pixman_image_set_component_alpha (mask, TRUE);
-    }
-
-    add_glyphs (cache, mask, - mask_x, - mask_y, n_glyphs, glyphs);
-
-    pixman_image_composite32 (op, src, mask, dest,
-			      src_x, src_y,
-			      0, 0,
-			      dest_x, dest_y,
-			      width, height);
-
-    pixman_image_unref (mask);
-}
diff --git a/vendor/pixman/pixman/pixman-gradient-walker.c b/vendor/pixman/pixman/pixman-gradient-walker.c
deleted file mode 100644
index b31d5ad7a..000000000
--- a/vendor/pixman/pixman/pixman-gradient-walker.c
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- *
- * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
- *             2005 Lars Knoll & Zack Rusin, Trolltech
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Keith Packard makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-#include "pixman-private.h"
-
-void
-_pixman_gradient_walker_init (pixman_gradient_walker_t *walker,
-                              gradient_t *              gradient,
-                              pixman_repeat_t		repeat)
-{
-    walker->num_stops = gradient->n_stops;
-    walker->stops     = gradient->stops;
-    walker->left_x    = 0;
-    walker->right_x   = 0x10000;
-    walker->a_s       = 0.0f;
-    walker->a_b       = 0.0f;
-    walker->r_s       = 0.0f;
-    walker->r_b       = 0.0f;
-    walker->g_s       = 0.0f;
-    walker->g_b       = 0.0f;
-    walker->b_s       = 0.0f;
-    walker->b_b       = 0.0f;
-    walker->repeat    = repeat;
-
-    walker->need_reset = TRUE;
-}
-
-static void
-gradient_walker_reset (pixman_gradient_walker_t *walker,
-		       pixman_fixed_48_16_t      pos)
-{
-    int64_t x, left_x, right_x;
-    pixman_color_t *left_c, *right_c;
-    int n, count = walker->num_stops;
-    pixman_gradient_stop_t *stops = walker->stops;
-    float la, lr, lg, lb;
-    float ra, rr, rg, rb;
-    float lx, rx;
-
-    if (walker->repeat == PIXMAN_REPEAT_NORMAL)
-    {
-	x = (int32_t)pos & 0xffff;
-    }
-    else if (walker->repeat == PIXMAN_REPEAT_REFLECT)
-    {
-	x = (int32_t)pos & 0xffff;
-	if ((int32_t)pos & 0x10000)
-	    x = 0x10000 - x;
-    }
-    else
-    {
-	x = pos;
-    }
-    
-    for (n = 0; n < count; n++)
-    {
-	if (x < stops[n].x)
-	    break;
-    }
-    
-    left_x =  stops[n - 1].x;
-    left_c = &stops[n - 1].color;
-    
-    right_x =  stops[n].x;
-    right_c = &stops[n].color;
-
-    if (walker->repeat == PIXMAN_REPEAT_NORMAL)
-    {
-	left_x  += (pos - x);
-	right_x += (pos - x);
-    }
-    else if (walker->repeat == PIXMAN_REPEAT_REFLECT)
-    {
-	if ((int32_t)pos & 0x10000)
-	{
-	    pixman_color_t  *tmp_c;
-	    int32_t tmp_x;
-
-	    tmp_x   = 0x10000 - right_x;
-	    right_x = 0x10000 - left_x;
-	    left_x  = tmp_x;
-
-	    tmp_c   = right_c;
-	    right_c = left_c;
-	    left_c  = tmp_c;
-
-	    x = 0x10000 - x;
-	}
-	left_x  += (pos - x);
-	right_x += (pos - x);
-    }
-    else if (walker->repeat == PIXMAN_REPEAT_NONE)
-    {
-	if (n == 0)
-	    right_c = left_c;
-	else if (n == count)
-	    left_c = right_c;
-    }
-
-    /* The alpha/red/green/blue channels are scaled to be in [0, 1].
-     * This ensures that after premultiplication all channels will
-     * be in the [0, 1] interval.
-     */
-    la = (left_c->alpha * (1.0f/257.0f));
-    lr = (left_c->red * (1.0f/257.0f));
-    lg = (left_c->green * (1.0f/257.0f));
-    lb = (left_c->blue * (1.0f/257.0f));
-
-    ra = (right_c->alpha * (1.0f/257.0f));
-    rr = (right_c->red * (1.0f/257.0f));
-    rg = (right_c->green * (1.0f/257.0f));
-    rb = (right_c->blue * (1.0f/257.0f));
-    
-    lx = left_x * (1.0f/65536.0f);
-    rx = right_x * (1.0f/65536.0f);
-    
-    if (FLOAT_IS_ZERO (rx - lx) || left_x == INT32_MIN || right_x == INT32_MAX)
-    {
-	walker->a_s = walker->r_s = walker->g_s = walker->b_s = 0.0f;
-	walker->a_b = (la + ra) / 510.0f;
-	walker->r_b = (lr + rr) / 510.0f;
-	walker->g_b = (lg + rg) / 510.0f;
-	walker->b_b = (lb + rb) / 510.0f;
-    }
-    else
-    {
-	float w_rec = 1.0f / (rx - lx);
-
-	walker->a_b = (la * rx - ra * lx) * w_rec * (1.0f/255.0f);
-	walker->r_b = (lr * rx - rr * lx) * w_rec * (1.0f/255.0f);
-	walker->g_b = (lg * rx - rg * lx) * w_rec * (1.0f/255.0f);
-	walker->b_b = (lb * rx - rb * lx) * w_rec * (1.0f/255.0f);
-
-	walker->a_s = (ra - la) * w_rec * (1.0f/255.0f);
-	walker->r_s = (rr - lr) * w_rec * (1.0f/255.0f);
-	walker->g_s = (rg - lg) * w_rec * (1.0f/255.0f);
-	walker->b_s = (rb - lb) * w_rec * (1.0f/255.0f);
-    }
-   
-    walker->left_x = left_x;
-    walker->right_x = right_x;
-
-    walker->need_reset = FALSE;
-}
-
-static argb_t
-pixman_gradient_walker_pixel_float (pixman_gradient_walker_t *walker,
-				    pixman_fixed_48_16_t      x)
-{
-    argb_t f;
-    float y;
-
-    if (walker->need_reset || x < walker->left_x || x >= walker->right_x)
-	gradient_walker_reset (walker, x);
-
-    y = x * (1.0f / 65536.0f);
-
-    f.a = walker->a_s * y + walker->a_b;
-    f.r = f.a * (walker->r_s * y + walker->r_b);
-    f.g = f.a * (walker->g_s * y + walker->g_b);
-    f.b = f.a * (walker->b_s * y + walker->b_b);
-
-    return f;
-}
-
-static uint32_t
-pixman_gradient_walker_pixel_32 (pixman_gradient_walker_t *walker,
-				 pixman_fixed_48_16_t      x)
-{
-    argb_t f;
-    float y;
-
-    if (walker->need_reset || x < walker->left_x || x >= walker->right_x)
-	gradient_walker_reset (walker, x);
-
-    y = x * (1.0f / 65536.0f);
-
-    /* Instead of [0...1] for ARGB, we want [0...255],
-     * multiply alpha with 255 and the color channels
-     * also get multiplied by the alpha multiplier.
-     *
-     * We don't use pixman_contract_from_float because it causes a 2x
-     * slowdown to do so, and the values are already normalized,
-     * so we don't have to worry about values < 0.f or > 1.f
-     */
-    f.a = 255.f * (walker->a_s * y + walker->a_b);
-    f.r = f.a * (walker->r_s * y + walker->r_b);
-    f.g = f.a * (walker->g_s * y + walker->g_b);
-    f.b = f.a * (walker->b_s * y + walker->b_b);
-
-    return (((uint32_t)(f.a + .5f) << 24) & 0xff000000) |
-           (((uint32_t)(f.r + .5f) << 16) & 0x00ff0000) |
-           (((uint32_t)(f.g + .5f) <<  8) & 0x0000ff00) |
-           (((uint32_t)(f.b + .5f) >>  0) & 0x000000ff);
-}
-
-void
-_pixman_gradient_walker_write_narrow (pixman_gradient_walker_t *walker,
-				      pixman_fixed_48_16_t      x,
-				      uint32_t                 *buffer)
-{
-    *buffer = pixman_gradient_walker_pixel_32 (walker, x);
-}
-
-void
-_pixman_gradient_walker_write_wide (pixman_gradient_walker_t *walker,
-				    pixman_fixed_48_16_t      x,
-				    uint32_t                 *buffer)
-{
-    *(argb_t *)buffer = pixman_gradient_walker_pixel_float (walker, x);
-}
-
-void
-_pixman_gradient_walker_fill_narrow (pixman_gradient_walker_t *walker,
-				     pixman_fixed_48_16_t      x,
-				     uint32_t                 *buffer,
-				     uint32_t                 *end)
-{
-    register uint32_t color;
-
-    color = pixman_gradient_walker_pixel_32 (walker, x);
-    while (buffer < end)
-	*buffer++ = color;
-}
-
-void
-_pixman_gradient_walker_fill_wide (pixman_gradient_walker_t *walker,
-				   pixman_fixed_48_16_t      x,
-				   uint32_t                 *buffer,
-				   uint32_t                 *end)
-{
-    register argb_t color;
-    argb_t *buffer_wide = (argb_t *)buffer;
-    argb_t *end_wide    = (argb_t *)end;
-
-    color = pixman_gradient_walker_pixel_float (walker, x);
-    while (buffer_wide < end_wide)
-	*buffer_wide++ = color;
-}
diff --git a/vendor/pixman/pixman/pixman-image.c b/vendor/pixman/pixman/pixman-image.c
deleted file mode 100644
index 72796fc9c..000000000
--- a/vendor/pixman/pixman/pixman-image.c
+++ /dev/null
@@ -1,994 +0,0 @@
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  SuSE makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-#include "pixman-private.h"
-
-static const pixman_color_t transparent_black = { 0, 0, 0, 0 };
-
-static void
-gradient_property_changed (pixman_image_t *image)
-{
-    gradient_t *gradient = &image->gradient;
-    int n = gradient->n_stops;
-    pixman_gradient_stop_t *stops = gradient->stops;
-    pixman_gradient_stop_t *begin = &(gradient->stops[-1]);
-    pixman_gradient_stop_t *end = &(gradient->stops[n]);
-
-    switch (gradient->common.repeat)
-    {
-    default:
-    case PIXMAN_REPEAT_NONE:
-	begin->x = INT32_MIN;
-	begin->color = transparent_black;
-	end->x = INT32_MAX;
-	end->color = transparent_black;
-	break;
-
-    case PIXMAN_REPEAT_NORMAL:
-	begin->x = stops[n - 1].x - pixman_fixed_1;
-	begin->color = stops[n - 1].color;
-	end->x = stops[0].x + pixman_fixed_1;
-	end->color = stops[0].color;
-	break;
-
-    case PIXMAN_REPEAT_REFLECT:
-	begin->x = - stops[0].x;
-	begin->color = stops[0].color;
-	end->x = pixman_int_to_fixed (2) - stops[n - 1].x;
-	end->color = stops[n - 1].color;
-	break;
-
-    case PIXMAN_REPEAT_PAD:
-	begin->x = INT32_MIN;
-	begin->color = stops[0].color;
-	end->x = INT32_MAX;
-	end->color = stops[n - 1].color;
-	break;
-    }
-}
-
-pixman_bool_t
-_pixman_init_gradient (gradient_t *                  gradient,
-                       const pixman_gradient_stop_t *stops,
-                       int                           n_stops)
-{
-    return_val_if_fail (n_stops > 0, FALSE);
-
-    /* We allocate two extra stops, one before the beginning of the stop list,
-     * and one after the end. These stops are initialized to whatever color
-     * would be used for positions outside the range of the stop list.
-     *
-     * This saves a bit of computation in the gradient walker.
-     *
-     * The pointer we store in the gradient_t struct still points to the
-     * first user-supplied struct, so when freeing, we will have to
-     * subtract one.
-     */
-    gradient->stops =
-	pixman_malloc_ab (n_stops + 2, sizeof (pixman_gradient_stop_t));
-    if (!gradient->stops)
-	return FALSE;
-
-    gradient->stops += 1;
-    memcpy (gradient->stops, stops, n_stops * sizeof (pixman_gradient_stop_t));
-    gradient->n_stops = n_stops;
-
-    gradient->common.property_changed = gradient_property_changed;
-
-    return TRUE;
-}
-
-void
-_pixman_image_init (pixman_image_t *image)
-{
-    image_common_t *common = &image->common;
-
-    pixman_region32_init (&common->clip_region);
-
-    common->alpha_count = 0;
-    common->have_clip_region = FALSE;
-    common->clip_sources = FALSE;
-    common->transform = NULL;
-    common->repeat = PIXMAN_REPEAT_NONE;
-    common->filter = PIXMAN_FILTER_NEAREST;
-    common->filter_params = NULL;
-    common->n_filter_params = 0;
-    common->alpha_map = NULL;
-    common->component_alpha = FALSE;
-    common->ref_count = 1;
-    common->property_changed = NULL;
-    common->client_clip = FALSE;
-    common->destroy_func = NULL;
-    common->destroy_data = NULL;
-    common->dirty = TRUE;
-}
-
-pixman_bool_t
-_pixman_image_fini (pixman_image_t *image)
-{
-    image_common_t *common = (image_common_t *)image;
-
-    common->ref_count--;
-
-    if (common->ref_count == 0)
-    {
-	if (image->common.destroy_func)
-	    image->common.destroy_func (image, image->common.destroy_data);
-
-	pixman_region32_fini (&common->clip_region);
-
-	free (common->transform);
-	free (common->filter_params);
-
-	if (common->alpha_map)
-	    pixman_image_unref ((pixman_image_t *)common->alpha_map);
-
-	if (image->type == LINEAR ||
-	    image->type == RADIAL ||
-	    image->type == CONICAL)
-	{
-	    if (image->gradient.stops)
-	    {
-		/* See _pixman_init_gradient() for an explanation of the - 1 */
-		free (image->gradient.stops - 1);
-	    }
-
-	    /* This will trigger if someone adds a property_changed
-	     * method to the linear/radial/conical gradient overwriting
-	     * the general one.
-	     */
-	    assert (
-		image->common.property_changed == gradient_property_changed);
-	}
-
-	if (image->type == BITS && image->bits.free_me)
-	    free (image->bits.free_me);
-
-	return TRUE;
-    }
-
-    return FALSE;
-}
-
-pixman_image_t *
-_pixman_image_allocate (void)
-{
-    pixman_image_t *image = malloc (sizeof (pixman_image_t));
-
-    if (image)
-	_pixman_image_init (image);
-
-    return image;
-}
-
-static void
-image_property_changed (pixman_image_t *image)
-{
-    image->common.dirty = TRUE;
-}
-
-/* Ref Counting */
-PIXMAN_EXPORT pixman_image_t *
-pixman_image_ref (pixman_image_t *image)
-{
-    image->common.ref_count++;
-
-    return image;
-}
-
-/* returns TRUE when the image is freed */
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_unref (pixman_image_t *image)
-{
-    if (_pixman_image_fini (image))
-    {
-	free (image);
-	return TRUE;
-    }
-
-    return FALSE;
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_destroy_function (pixman_image_t *            image,
-                                   pixman_image_destroy_func_t func,
-                                   void *                      data)
-{
-    image->common.destroy_func = func;
-    image->common.destroy_data = data;
-}
-
-PIXMAN_EXPORT void *
-pixman_image_get_destroy_data (pixman_image_t *image)
-{
-  return image->common.destroy_data;
-}
-
-void
-_pixman_image_reset_clip_region (pixman_image_t *image)
-{
-    image->common.have_clip_region = FALSE;
-}
-
-/* Executive Summary: This function is a no-op that only exists
- * for historical reasons.
- *
- * There used to be a bug in the X server where it would rely on
- * out-of-bounds accesses when it was asked to composite with a
- * window as the source. It would create a pixman image pointing
- * to some bogus position in memory, but then set a clip region
- * to the position where the actual bits were.
- *
- * Due to a bug in old versions of pixman, where it would not clip
- * against the image bounds when a clip region was set, this would
- * actually work. So when the pixman bug was fixed, a workaround was
- * added to allow certain out-of-bound accesses. This function disabled
- * those workarounds.
- *
- * Since 0.21.2, pixman doesn't do these workarounds anymore, so now
- * this function is a no-op.
- */
-PIXMAN_EXPORT void
-pixman_disable_out_of_bounds_workaround (void)
-{
-}
-
-static void
-compute_image_info (pixman_image_t *image)
-{
-    pixman_format_code_t code;
-    uint32_t flags = 0;
-
-    /* Transform */
-    if (!image->common.transform)
-    {
-	flags |= (FAST_PATH_ID_TRANSFORM	|
-		  FAST_PATH_X_UNIT_POSITIVE	|
-		  FAST_PATH_Y_UNIT_ZERO		|
-		  FAST_PATH_AFFINE_TRANSFORM);
-    }
-    else
-    {
-	flags |= FAST_PATH_HAS_TRANSFORM;
-
-	if (image->common.transform->matrix[2][0] == 0			&&
-	    image->common.transform->matrix[2][1] == 0			&&
-	    image->common.transform->matrix[2][2] == pixman_fixed_1)
-	{
-	    flags |= FAST_PATH_AFFINE_TRANSFORM;
-
-	    if (image->common.transform->matrix[0][1] == 0 &&
-		image->common.transform->matrix[1][0] == 0)
-	    {
-		if (image->common.transform->matrix[0][0] == -pixman_fixed_1 &&
-		    image->common.transform->matrix[1][1] == -pixman_fixed_1)
-		{
-		    flags |= FAST_PATH_ROTATE_180_TRANSFORM;
-		}
-		flags |= FAST_PATH_SCALE_TRANSFORM;
-	    }
-	    else if (image->common.transform->matrix[0][0] == 0 &&
-	             image->common.transform->matrix[1][1] == 0)
-	    {
-		pixman_fixed_t m01 = image->common.transform->matrix[0][1];
-		pixman_fixed_t m10 = image->common.transform->matrix[1][0];
-
-		if (m01 == -pixman_fixed_1 && m10 == pixman_fixed_1)
-		    flags |= FAST_PATH_ROTATE_90_TRANSFORM;
-		else if (m01 == pixman_fixed_1 && m10 == -pixman_fixed_1)
-		    flags |= FAST_PATH_ROTATE_270_TRANSFORM;
-	    }
-	}
-
-	if (image->common.transform->matrix[0][0] > 0)
-	    flags |= FAST_PATH_X_UNIT_POSITIVE;
-
-	if (image->common.transform->matrix[1][0] == 0)
-	    flags |= FAST_PATH_Y_UNIT_ZERO;
-    }
-
-    /* Filter */
-    switch (image->common.filter)
-    {
-    case PIXMAN_FILTER_NEAREST:
-    case PIXMAN_FILTER_FAST:
-	flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
-	break;
-
-    case PIXMAN_FILTER_BILINEAR:
-    case PIXMAN_FILTER_GOOD:
-    case PIXMAN_FILTER_BEST:
-	flags |= (FAST_PATH_BILINEAR_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
-
-	/* Here we have a chance to optimize BILINEAR filter to NEAREST if
-	 * they are equivalent for the currently used transformation matrix.
-	 */
-	if (flags & FAST_PATH_ID_TRANSFORM)
-	{
-	    flags |= FAST_PATH_NEAREST_FILTER;
-	}
-	else if (flags & FAST_PATH_AFFINE_TRANSFORM)
-	{
-	    /* Suppose the transform is
-	     *
-	     *    [ t00, t01, t02 ]
-	     *    [ t10, t11, t12 ]
-	     *    [   0,   0,   1 ]
-	     *
-	     * and the destination coordinates are (n + 0.5, m + 0.5). Then
-	     * the transformed x coordinate is:
-	     *
-	     *     tx = t00 * (n + 0.5) + t01 * (m + 0.5) + t02
-	     *        = t00 * n + t01 * m + t02 + (t00 + t01) * 0.5
-	     *
-	     * which implies that if t00, t01 and t02 are all integers
-	     * and (t00 + t01) is odd, then tx will be an integer plus 0.5,
-	     * which means a BILINEAR filter will reduce to NEAREST. The same
-	     * applies in the y direction
-	     */
-	    pixman_fixed_t (*t)[3] = image->common.transform->matrix;
-
-	    if ((pixman_fixed_frac (
-		     t[0][0] | t[0][1] | t[0][2] |
-		     t[1][0] | t[1][1] | t[1][2]) == 0)			&&
-		(pixman_fixed_to_int (
-		    (t[0][0] + t[0][1]) & (t[1][0] + t[1][1])) % 2) == 1)
-	    {
-		/* FIXME: there are some affine-test failures, showing that
-		 * handling of BILINEAR and NEAREST filter is not quite
-		 * equivalent when getting close to 32K for the translation
-		 * components of the matrix. That's likely some bug, but for
-		 * now just skip BILINEAR->NEAREST optimization in this case.
-		 */
-		pixman_fixed_t magic_limit = pixman_int_to_fixed (30000);
-		if (image->common.transform->matrix[0][2] <= magic_limit  &&
-		    image->common.transform->matrix[1][2] <= magic_limit  &&
-		    image->common.transform->matrix[0][2] >= -magic_limit &&
-		    image->common.transform->matrix[1][2] >= -magic_limit)
-		{
-		    flags |= FAST_PATH_NEAREST_FILTER;
-		}
-	    }
-	}
-	break;
-
-    case PIXMAN_FILTER_CONVOLUTION:
-	break;
-
-    case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
-	flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER;
-	break;
-
-    default:
-	flags |= FAST_PATH_NO_CONVOLUTION_FILTER;
-	break;
-    }
-
-    /* Repeat mode */
-    switch (image->common.repeat)
-    {
-    case PIXMAN_REPEAT_NONE:
-	flags |=
-	    FAST_PATH_NO_REFLECT_REPEAT		|
-	    FAST_PATH_NO_PAD_REPEAT		|
-	    FAST_PATH_NO_NORMAL_REPEAT;
-	break;
-
-    case PIXMAN_REPEAT_REFLECT:
-	flags |=
-	    FAST_PATH_NO_PAD_REPEAT		|
-	    FAST_PATH_NO_NONE_REPEAT		|
-	    FAST_PATH_NO_NORMAL_REPEAT;
-	break;
-
-    case PIXMAN_REPEAT_PAD:
-	flags |=
-	    FAST_PATH_NO_REFLECT_REPEAT		|
-	    FAST_PATH_NO_NONE_REPEAT		|
-	    FAST_PATH_NO_NORMAL_REPEAT;
-	break;
-
-    default:
-	flags |=
-	    FAST_PATH_NO_REFLECT_REPEAT		|
-	    FAST_PATH_NO_PAD_REPEAT		|
-	    FAST_PATH_NO_NONE_REPEAT;
-	break;
-    }
-
-    /* Component alpha */
-    if (image->common.component_alpha)
-	flags |= FAST_PATH_COMPONENT_ALPHA;
-    else
-	flags |= FAST_PATH_UNIFIED_ALPHA;
-
-    flags |= (FAST_PATH_NO_ACCESSORS | FAST_PATH_NARROW_FORMAT);
-
-    /* Type specific checks */
-    switch (image->type)
-    {
-    case SOLID:
-	code = PIXMAN_solid;
-
-	if (image->solid.color.alpha == 0xffff)
-	    flags |= FAST_PATH_IS_OPAQUE;
-	break;
-
-    case BITS:
-	if (image->bits.width == 1	&&
-	    image->bits.height == 1	&&
-	    image->common.repeat != PIXMAN_REPEAT_NONE)
-	{
-	    code = PIXMAN_solid;
-	}
-	else
-	{
-	    code = image->bits.format;
-	    flags |= FAST_PATH_BITS_IMAGE;
-	}
-
-	if (!PIXMAN_FORMAT_A (image->bits.format)				&&
-	    PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_GRAY		&&
-	    PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_COLOR)
-	{
-	    flags |= FAST_PATH_SAMPLES_OPAQUE;
-
-	    if (image->common.repeat != PIXMAN_REPEAT_NONE)
-		flags |= FAST_PATH_IS_OPAQUE;
-	}
-
-	if (image->bits.read_func || image->bits.write_func)
-	    flags &= ~FAST_PATH_NO_ACCESSORS;
-
-	if (PIXMAN_FORMAT_IS_WIDE (image->bits.format))
-	    flags &= ~FAST_PATH_NARROW_FORMAT;
-	break;
-
-    case RADIAL:
-	code = PIXMAN_unknown;
-
-	/*
-	 * As explained in pixman-radial-gradient.c, every point of
-	 * the plane has a valid associated radius (and thus will be
-	 * colored) if and only if a is negative (i.e. one of the two
-	 * circles contains the other one).
-	 */
-
-        if (image->radial.a >= 0)
-	    break;
-
-	/* Fall through */
-
-    case CONICAL:
-    case LINEAR:
-	code = PIXMAN_unknown;
-
-	if (image->common.repeat != PIXMAN_REPEAT_NONE)
-	{
-	    int i;
-
-	    flags |= FAST_PATH_IS_OPAQUE;
-	    for (i = 0; i < image->gradient.n_stops; ++i)
-	    {
-		if (image->gradient.stops[i].color.alpha != 0xffff)
-		{
-		    flags &= ~FAST_PATH_IS_OPAQUE;
-		    break;
-		}
-	    }
-	}
-	break;
-
-    default:
-	code = PIXMAN_unknown;
-	break;
-    }
-
-    /* Alpha maps are only supported for BITS images, so it's always
-     * safe to ignore their presense for non-BITS images
-     */
-    if (!image->common.alpha_map || image->type != BITS)
-    {
-	flags |= FAST_PATH_NO_ALPHA_MAP;
-    }
-    else
-    {
-	if (PIXMAN_FORMAT_IS_WIDE (image->common.alpha_map->format))
-	    flags &= ~FAST_PATH_NARROW_FORMAT;
-    }
-
-    /* Both alpha maps and convolution filters can introduce
-     * non-opaqueness in otherwise opaque images. Also
-     * an image with component alpha turned on is only opaque
-     * if all channels are opaque, so we simply turn it off
-     * unconditionally for those images.
-     */
-    if (image->common.alpha_map						||
-	image->common.filter == PIXMAN_FILTER_CONVOLUTION		||
-        image->common.filter == PIXMAN_FILTER_SEPARABLE_CONVOLUTION     ||
-	image->common.component_alpha)
-    {
-	flags &= ~(FAST_PATH_IS_OPAQUE | FAST_PATH_SAMPLES_OPAQUE);
-    }
-
-    image->common.flags = flags;
-    image->common.extended_format_code = code;
-}
-
-void
-_pixman_image_validate (pixman_image_t *image)
-{
-    if (image->common.dirty)
-    {
-	compute_image_info (image);
-
-	/* It is important that property_changed is
-	 * called *after* compute_image_info() because
-	 * property_changed() can make use of the flags
-	 * to set up accessors etc.
-	 */
-	if (image->common.property_changed)
-	    image->common.property_changed (image);
-
-	image->common.dirty = FALSE;
-    }
-
-    if (image->common.alpha_map)
-	_pixman_image_validate ((pixman_image_t *)image->common.alpha_map);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_set_clip_region32 (pixman_image_t *   image,
-                                const pixman_region32_t *region)
-{
-    image_common_t *common = (image_common_t *)image;
-    pixman_bool_t result;
-
-    if (region)
-    {
-	if ((result = pixman_region32_copy (&common->clip_region, region)))
-	    image->common.have_clip_region = TRUE;
-    }
-    else
-    {
-	_pixman_image_reset_clip_region (image);
-
-	result = TRUE;
-    }
-
-    image_property_changed (image);
-
-    return result;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_set_clip_region (pixman_image_t *   image,
-                              const pixman_region16_t *region)
-{
-    image_common_t *common = (image_common_t *)image;
-    pixman_bool_t result;
-
-    if (region)
-    {
-	if ((result = pixman_region32_copy_from_region16 (&common->clip_region, region)))
-	    image->common.have_clip_region = TRUE;
-    }
-    else
-    {
-	_pixman_image_reset_clip_region (image);
-
-	result = TRUE;
-    }
-
-    image_property_changed (image);
-
-    return result;
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_has_client_clip (pixman_image_t *image,
-                                  pixman_bool_t   client_clip)
-{
-    image->common.client_clip = client_clip;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_set_transform (pixman_image_t *          image,
-                            const pixman_transform_t *transform)
-{
-    static const pixman_transform_t id =
-    {
-	{ { pixman_fixed_1, 0, 0 },
-	  { 0, pixman_fixed_1, 0 },
-	  { 0, 0, pixman_fixed_1 } }
-    };
-
-    image_common_t *common = (image_common_t *)image;
-    pixman_bool_t result;
-
-    if (common->transform == transform)
-	return TRUE;
-
-    if (!transform || memcmp (&id, transform, sizeof (pixman_transform_t)) == 0)
-    {
-	free (common->transform);
-	common->transform = NULL;
-	result = TRUE;
-
-	goto out;
-    }
-
-    if (common->transform &&
-	memcmp (common->transform, transform, sizeof (pixman_transform_t)) == 0)
-    {
-	return TRUE;
-    }
-
-    if (common->transform == NULL)
-	common->transform = malloc (sizeof (pixman_transform_t));
-
-    if (common->transform == NULL)
-    {
-	result = FALSE;
-
-	goto out;
-    }
-
-    memcpy (common->transform, transform, sizeof(pixman_transform_t));
-
-    result = TRUE;
-
-out:
-    image_property_changed (image);
-
-    return result;
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_repeat (pixman_image_t *image,
-                         pixman_repeat_t repeat)
-{
-    if (image->common.repeat == repeat)
-	return;
-
-    image->common.repeat = repeat;
-
-    image_property_changed (image);
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_dither (pixman_image_t *image,
-			 pixman_dither_t dither)
-{
-    if (image->type == BITS)
-    {
-	if (image->bits.dither == dither)
-	    return;
-
-	image->bits.dither = dither;
-
-	image_property_changed (image);
-    }
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_dither_offset (pixman_image_t *image,
-				int             offset_x,
-				int             offset_y)
-{
-    if (image->type == BITS)
-    {
-	if (image->bits.dither_offset_x == offset_x &&
-	    image->bits.dither_offset_y == offset_y)
-	{
-	    return;
-	}
-
-	image->bits.dither_offset_x = offset_x;
-	image->bits.dither_offset_y = offset_y;
-
-	image_property_changed (image);
-    }
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_set_filter (pixman_image_t *      image,
-                         pixman_filter_t       filter,
-                         const pixman_fixed_t *params,
-                         int                   n_params)
-{
-    image_common_t *common = (image_common_t *)image;
-    pixman_fixed_t *new_params;
-
-    if (params == common->filter_params && filter == common->filter)
-	return TRUE;
-
-    if (filter == PIXMAN_FILTER_SEPARABLE_CONVOLUTION)
-    {
-	int width = pixman_fixed_to_int (params[0]);
-	int height = pixman_fixed_to_int (params[1]);
-	int x_phase_bits = pixman_fixed_to_int (params[2]);
-	int y_phase_bits = pixman_fixed_to_int (params[3]);
-	int n_x_phases = (1 << x_phase_bits);
-	int n_y_phases = (1 << y_phase_bits);
-
-	return_val_if_fail (
-	    n_params == 4 + n_x_phases * width + n_y_phases * height, FALSE);
-    }
-    
-    new_params = NULL;
-    if (params)
-    {
-	new_params = pixman_malloc_ab (n_params, sizeof (pixman_fixed_t));
-	if (!new_params)
-	    return FALSE;
-
-	memcpy (new_params,
-	        params, n_params * sizeof (pixman_fixed_t));
-    }
-
-    common->filter = filter;
-
-    if (common->filter_params)
-	free (common->filter_params);
-
-    common->filter_params = new_params;
-    common->n_filter_params = n_params;
-
-    image_property_changed (image);
-    return TRUE;
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_source_clipping (pixman_image_t *image,
-                                  pixman_bool_t   clip_sources)
-{
-    if (image->common.clip_sources == clip_sources)
-	return;
-
-    image->common.clip_sources = clip_sources;
-
-    image_property_changed (image);
-}
-
-/* Unlike all the other property setters, this function does not
- * copy the content of indexed. Doing this copying is simply
- * way, way too expensive.
- */
-PIXMAN_EXPORT void
-pixman_image_set_indexed (pixman_image_t *        image,
-                          const pixman_indexed_t *indexed)
-{
-    bits_image_t *bits = (bits_image_t *)image;
-
-    if (bits->indexed == indexed)
-	return;
-
-    bits->indexed = indexed;
-
-    image_property_changed (image);
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_alpha_map (pixman_image_t *image,
-                            pixman_image_t *alpha_map,
-                            int16_t         x,
-                            int16_t         y)
-{
-    image_common_t *common = (image_common_t *)image;
-
-    return_if_fail (!alpha_map || alpha_map->type == BITS);
-
-    if (alpha_map && common->alpha_count > 0)
-    {
-	/* If this image is being used as an alpha map itself,
-	 * then you can't give it an alpha map of its own.
-	 */
-	return;
-    }
-
-    if (alpha_map && alpha_map->common.alpha_map)
-    {
-	/* If the image has an alpha map of its own,
-	 * then it can't be used as an alpha map itself
-	 */
-	return;
-    }
-
-    if (common->alpha_map != (bits_image_t *)alpha_map)
-    {
-	if (common->alpha_map)
-	{
-	    common->alpha_map->common.alpha_count--;
-
-	    pixman_image_unref ((pixman_image_t *)common->alpha_map);
-	}
-
-	if (alpha_map)
-	{
-	    common->alpha_map = (bits_image_t *)pixman_image_ref (alpha_map);
-
-	    common->alpha_map->common.alpha_count++;
-	}
-	else
-	{
-	    common->alpha_map = NULL;
-	}
-    }
-
-    common->alpha_origin_x = x;
-    common->alpha_origin_y = y;
-
-    image_property_changed (image);
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_component_alpha   (pixman_image_t *image,
-                                    pixman_bool_t   component_alpha)
-{
-    if (image->common.component_alpha == component_alpha)
-	return;
-
-    image->common.component_alpha = component_alpha;
-
-    image_property_changed (image);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_get_component_alpha   (pixman_image_t       *image)
-{
-    return image->common.component_alpha;
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_accessors (pixman_image_t *           image,
-                            pixman_read_memory_func_t  read_func,
-                            pixman_write_memory_func_t write_func)
-{
-    return_if_fail (image != NULL);
-
-    if (image->type == BITS)
-    {
-	/* Accessors only work for <= 32 bpp. */
-	if (PIXMAN_FORMAT_BPP(image->bits.format) > 32)
-	    return_if_fail (!read_func && !write_func);
-
-	image->bits.read_func = read_func;
-	image->bits.write_func = write_func;
-
-	image_property_changed (image);
-    }
-}
-
-PIXMAN_EXPORT uint32_t *
-pixman_image_get_data (pixman_image_t *image)
-{
-    if (image->type == BITS)
-	return image->bits.bits;
-
-    return NULL;
-}
-
-PIXMAN_EXPORT int
-pixman_image_get_width (pixman_image_t *image)
-{
-    if (image->type == BITS)
-	return image->bits.width;
-
-    return 0;
-}
-
-PIXMAN_EXPORT int
-pixman_image_get_height (pixman_image_t *image)
-{
-    if (image->type == BITS)
-	return image->bits.height;
-
-    return 0;
-}
-
-PIXMAN_EXPORT int
-pixman_image_get_stride (pixman_image_t *image)
-{
-    if (image->type == BITS)
-	return image->bits.rowstride * (int) sizeof (uint32_t);
-
-    return 0;
-}
-
-PIXMAN_EXPORT int
-pixman_image_get_depth (pixman_image_t *image)
-{
-    if (image->type == BITS)
-	return PIXMAN_FORMAT_DEPTH (image->bits.format);
-
-    return 0;
-}
-
-PIXMAN_EXPORT pixman_format_code_t
-pixman_image_get_format (pixman_image_t *image)
-{
-    if (image->type == BITS)
-	return image->bits.format;
-
-    return PIXMAN_null;
-}
-
-uint32_t
-_pixman_image_get_solid (pixman_implementation_t *imp,
-			 pixman_image_t *         image,
-                         pixman_format_code_t     format)
-{
-    uint32_t result;
-
-    if (image->type == SOLID)
-    {
-	result = image->solid.color_32;
-    }
-    else if (image->type == BITS)
-    {
-	if (image->bits.format == PIXMAN_a8r8g8b8)
-	    result = image->bits.bits[0];
-	else if (image->bits.format == PIXMAN_x8r8g8b8)
-	    result = image->bits.bits[0] | 0xff000000;
-	else if (image->bits.format == PIXMAN_a8)
-	    result = (uint32_t)(*(uint8_t *)image->bits.bits) << 24;
-	else
-	    goto otherwise;
-    }
-    else
-    {
-	pixman_iter_t iter;
-
-    otherwise:
-	_pixman_implementation_iter_init (
-	    imp, &iter, image, 0, 0, 1, 1,
-	    (uint8_t *)&result,
-	    ITER_NARROW | ITER_SRC, image->common.flags);
-	
-	result = *iter.get_scanline (&iter, NULL);
-
-	if (iter.fini)
-	    iter.fini (&iter);
-    }
-
-    /* If necessary, convert RGB <--> BGR. */
-    if (PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB
-	&& PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB_SRGB)
-    {
-	result = (((result & 0xff000000) >>  0) |
-	          ((result & 0x00ff0000) >> 16) |
-	          ((result & 0x0000ff00) >>  0) |
-	          ((result & 0x000000ff) << 16));
-    }
-
-    return result;
-}
diff --git a/vendor/pixman/pixman/pixman-implementation.c b/vendor/pixman/pixman/pixman-implementation.c
deleted file mode 100644
index 69fa70bc3..000000000
--- a/vendor/pixman/pixman/pixman-implementation.c
+++ /dev/null
@@ -1,417 +0,0 @@
-/*
- * Copyright © 2009 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Red Hat not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  Red Hat makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-#include <stdlib.h>
-#include "pixman-private.h"
-
-pixman_implementation_t *
-_pixman_implementation_create (pixman_implementation_t *fallback,
-			       const pixman_fast_path_t *fast_paths)
-{
-    pixman_implementation_t *imp;
-
-    assert (fast_paths);
-
-    if ((imp = malloc (sizeof (pixman_implementation_t))))
-    {
-	pixman_implementation_t *d;
-
-	memset (imp, 0, sizeof *imp);
-
-	imp->fallback = fallback;
-	imp->fast_paths = fast_paths;
-	
-	/* Make sure the whole fallback chain has the right toplevel */
-	for (d = imp; d != NULL; d = d->fallback)
-	    d->toplevel = imp;
-    }
-
-    return imp;
-}
-
-#define N_CACHED_FAST_PATHS 8
-
-typedef struct
-{
-    struct
-    {
-	pixman_implementation_t *	imp;
-	pixman_fast_path_t		fast_path;
-    } cache [N_CACHED_FAST_PATHS];
-} cache_t;
-
-PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache)
-
-static void
-dummy_composite_rect (pixman_implementation_t *imp,
-		      pixman_composite_info_t *info)
-{
-}
-
-void
-_pixman_implementation_lookup_composite (pixman_implementation_t  *toplevel,
-					 pixman_op_t               op,
-					 pixman_format_code_t      src_format,
-					 uint32_t                  src_flags,
-					 pixman_format_code_t      mask_format,
-					 uint32_t                  mask_flags,
-					 pixman_format_code_t      dest_format,
-					 uint32_t                  dest_flags,
-					 pixman_implementation_t **out_imp,
-					 pixman_composite_func_t  *out_func)
-{
-    pixman_implementation_t *imp;
-    cache_t *cache;
-    int i;
-
-    /* Check cache for fast paths */
-    cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache);
-
-    for (i = 0; i < N_CACHED_FAST_PATHS; ++i)
-    {
-	const pixman_fast_path_t *info = &(cache->cache[i].fast_path);
-
-	/* Note that we check for equality here, not whether
-	 * the cached fast path matches. This is to prevent
-	 * us from selecting an overly general fast path
-	 * when a more specific one would work.
-	 */
-	if (info->op == op			&&
-	    info->src_format == src_format	&&
-	    info->mask_format == mask_format	&&
-	    info->dest_format == dest_format	&&
-	    info->src_flags == src_flags	&&
-	    info->mask_flags == mask_flags	&&
-	    info->dest_flags == dest_flags	&&
-	    info->func)
-	{
-	    *out_imp = cache->cache[i].imp;
-	    *out_func = cache->cache[i].fast_path.func;
-
-	    goto update_cache;
-	}
-    }
-
-    for (imp = toplevel; imp != NULL; imp = imp->fallback)
-    {
-	const pixman_fast_path_t *info = imp->fast_paths;
-
-	while (info->op != PIXMAN_OP_NONE)
-	{
-	    if ((info->op == op || info->op == PIXMAN_OP_any)		&&
-		/* Formats */
-		((info->src_format == src_format) ||
-		 (info->src_format == PIXMAN_any))			&&
-		((info->mask_format == mask_format) ||
-		 (info->mask_format == PIXMAN_any))			&&
-		((info->dest_format == dest_format) ||
-		 (info->dest_format == PIXMAN_any))			&&
-		/* Flags */
-		(info->src_flags & src_flags) == info->src_flags	&&
-		(info->mask_flags & mask_flags) == info->mask_flags	&&
-		(info->dest_flags & dest_flags) == info->dest_flags)
-	    {
-		*out_imp = imp;
-		*out_func = info->func;
-
-		/* Set i to the last spot in the cache so that the
-		 * move-to-front code below will work
-		 */
-		i = N_CACHED_FAST_PATHS - 1;
-
-		goto update_cache;
-	    }
-
-	    ++info;
-	}
-    }
-
-    /* We should never reach this point */
-    _pixman_log_error (
-        FUNC,
-        "No composite function found\n"
-        "\n"
-        "The most likely cause of this is that this system has issues with\n"
-        "thread local storage\n");
-
-    *out_imp = NULL;
-    *out_func = dummy_composite_rect;
-    return;
-
-update_cache:
-    if (i)
-    {
-	while (i--)
-	    cache->cache[i + 1] = cache->cache[i];
-
-	cache->cache[0].imp = *out_imp;
-	cache->cache[0].fast_path.op = op;
-	cache->cache[0].fast_path.src_format = src_format;
-	cache->cache[0].fast_path.src_flags = src_flags;
-	cache->cache[0].fast_path.mask_format = mask_format;
-	cache->cache[0].fast_path.mask_flags = mask_flags;
-	cache->cache[0].fast_path.dest_format = dest_format;
-	cache->cache[0].fast_path.dest_flags = dest_flags;
-	cache->cache[0].fast_path.func = *out_func;
-    }
-}
-
-static void
-dummy_combine (pixman_implementation_t *imp,
-	       pixman_op_t              op,
-	       uint32_t *               pd,
-	       const uint32_t *         ps,
-	       const uint32_t *         pm,
-	       int                      w)
-{
-}
-
-pixman_combine_32_func_t
-_pixman_implementation_lookup_combiner (pixman_implementation_t *imp,
-					pixman_op_t		 op,
-					pixman_bool_t		 component_alpha,
-					pixman_bool_t		 narrow)
-{
-    while (imp)
-    {
-	pixman_combine_32_func_t f = NULL;
-
-	switch ((narrow << 1) | component_alpha)
-	{
-	case 0: /* not narrow, not component alpha */
-	    f = (pixman_combine_32_func_t)imp->combine_float[op];
-	    break;
-	    
-	case 1: /* not narrow, component_alpha */
-	    f = (pixman_combine_32_func_t)imp->combine_float_ca[op];
-	    break;
-
-	case 2: /* narrow, not component alpha */
-	    f = imp->combine_32[op];
-	    break;
-
-	case 3: /* narrow, component_alpha */
-	    f = imp->combine_32_ca[op];
-	    break;
-	}
-
-	if (f)
-	    return f;
-
-	imp = imp->fallback;
-    }
-
-    /* We should never reach this point */
-    _pixman_log_error (FUNC, "No known combine function\n");
-    return dummy_combine;
-}
-
-pixman_bool_t
-_pixman_implementation_blt (pixman_implementation_t * imp,
-                            uint32_t *                src_bits,
-                            uint32_t *                dst_bits,
-                            int                       src_stride,
-                            int                       dst_stride,
-                            int                       src_bpp,
-                            int                       dst_bpp,
-                            int                       src_x,
-                            int                       src_y,
-                            int                       dest_x,
-                            int                       dest_y,
-                            int                       width,
-                            int                       height)
-{
-    while (imp)
-    {
-	if (imp->blt &&
-	    (*imp->blt) (imp, src_bits, dst_bits, src_stride, dst_stride,
-			 src_bpp, dst_bpp, src_x, src_y, dest_x, dest_y,
-			 width, height))
-	{
-	    return TRUE;
-	}
-
-	imp = imp->fallback;
-    }
-
-    return FALSE;
-}
-
-pixman_bool_t
-_pixman_implementation_fill (pixman_implementation_t *imp,
-                             uint32_t *               bits,
-                             int                      stride,
-                             int                      bpp,
-                             int                      x,
-                             int                      y,
-                             int                      width,
-                             int                      height,
-                             uint32_t                 filler)
-{
-    while (imp)
-    {
-	if (imp->fill &&
-	    ((*imp->fill) (imp, bits, stride, bpp, x, y, width, height, filler)))
-	{
-	    return TRUE;
-	}
-
-	imp = imp->fallback;
-    }
-
-    return FALSE;
-}
-
-static uint32_t *
-get_scanline_null (pixman_iter_t *iter, const uint32_t *mask)
-{
-    return NULL;
-}
-
-void
-_pixman_implementation_iter_init (pixman_implementation_t *imp,
-                                  pixman_iter_t           *iter,
-                                  pixman_image_t          *image,
-                                  int                      x,
-                                  int                      y,
-                                  int                      width,
-                                  int                      height,
-                                  uint8_t                 *buffer,
-                                  iter_flags_t             iter_flags,
-                                  uint32_t                 image_flags)
-{
-    pixman_format_code_t format;
-
-    iter->image = image;
-    iter->buffer = (uint32_t *)buffer;
-    iter->x = x;
-    iter->y = y;
-    iter->width = width;
-    iter->height = height;
-    iter->iter_flags = iter_flags;
-    iter->image_flags = image_flags;
-    iter->fini = NULL;
-
-    if (!iter->image)
-    {
-	iter->get_scanline = get_scanline_null;
-	return;
-    }
-
-    format = iter->image->common.extended_format_code;
-
-    while (imp)
-    {
-        if (imp->iter_info)
-        {
-            const pixman_iter_info_t *info;
-
-            for (info = imp->iter_info; info->format != PIXMAN_null; ++info)
-            {
-                if ((info->format == PIXMAN_any || info->format == format) &&
-                    (info->image_flags & image_flags) == info->image_flags &&
-                    (info->iter_flags & iter_flags) == info->iter_flags)
-                {
-                    iter->get_scanline = info->get_scanline;
-                    iter->write_back = info->write_back;
-
-                    if (info->initializer)
-                        info->initializer (iter, info);
-                    return;
-                }
-            }
-        }
-
-        imp = imp->fallback;
-    }
-}
-
-pixman_bool_t
-_pixman_disabled (const char *name)
-{
-    const char *env;
-
-    if ((env = getenv ("PIXMAN_DISABLE")))
-    {
-	do
-	{
-	    const char *end;
-	    int len;
-
-	    if ((end = strchr (env, ' ')))
-		len = end - env;
-	    else
-		len = strlen (env);
-
-	    if (strlen (name) == len && strncmp (name, env, len) == 0)
-	    {
-		printf ("pixman: Disabled %s implementation\n", name);
-		return TRUE;
-	    }
-
-	    env += len;
-	}
-	while (*env++);
-    }
-
-    return FALSE;
-}
-
-static const pixman_fast_path_t empty_fast_path[] =
-{
-    { PIXMAN_OP_NONE }
-};
-
-pixman_implementation_t *
-_pixman_choose_implementation (void)
-{
-    pixman_implementation_t *imp;
-
-    imp = _pixman_implementation_create_general();
-
-    if (!_pixman_disabled ("fast"))
-	imp = _pixman_implementation_create_fast_path (imp);
-
-    imp = _pixman_x86_get_implementations (imp);
-    imp = _pixman_arm_get_implementations (imp);
-    imp = _pixman_ppc_get_implementations (imp);
-    imp = _pixman_mips_get_implementations (imp);
-
-    imp = _pixman_implementation_create_noop (imp);
-
-    if (_pixman_disabled ("wholeops"))
-    {
-        pixman_implementation_t *cur;
-
-        /* Disable all whole-operation paths except the general one,
-         * so that optimized iterators are used as much as possible.
-         */
-        for (cur = imp; cur->fallback; cur = cur->fallback)
-            cur->fast_paths = empty_fast_path;
-    }
-
-    return imp;
-}
diff --git a/vendor/pixman/pixman/pixman-inlines.h b/vendor/pixman/pixman/pixman-inlines.h
deleted file mode 100644
index f785910f8..000000000
--- a/vendor/pixman/pixman/pixman-inlines.h
+++ /dev/null
@@ -1,1365 +0,0 @@
-/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  SuSE makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Author:  Keith Packard, SuSE, Inc.
- */
-
-#ifndef PIXMAN_FAST_PATH_H__
-#define PIXMAN_FAST_PATH_H__
-
-#include "pixman-private.h"
-
-#define PIXMAN_REPEAT_COVER -1
-
-/* Flags describing input parameters to fast path macro template.
- * Turning on some flag values may indicate that
- * "some property X is available so template can use this" or
- * "some property X should be handled by template".
- *
- * FLAG_HAVE_SOLID_MASK
- *  Input mask is solid so template should handle this.
- *
- * FLAG_HAVE_NON_SOLID_MASK
- *  Input mask is bits mask so template should handle this.
- *
- * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
- * exclusive. (It's not allowed to turn both flags on)
- */
-#define FLAG_NONE				(0)
-#define FLAG_HAVE_SOLID_MASK			(1 <<   1)
-#define FLAG_HAVE_NON_SOLID_MASK		(1 <<   2)
-
-/* To avoid too short repeated scanline function calls, extend source
- * scanlines having width less than below constant value.
- */
-#define REPEAT_NORMAL_MIN_WIDTH			64
-
-static force_inline pixman_bool_t
-repeat (pixman_repeat_t repeat, int *c, int size)
-{
-    if (repeat == PIXMAN_REPEAT_NONE)
-    {
-	if (*c < 0 || *c >= size)
-	    return FALSE;
-    }
-    else if (repeat == PIXMAN_REPEAT_NORMAL)
-    {
-	while (*c >= size)
-	    *c -= size;
-	while (*c < 0)
-	    *c += size;
-    }
-    else if (repeat == PIXMAN_REPEAT_PAD)
-    {
-	*c = CLIP (*c, 0, size - 1);
-    }
-    else /* REFLECT */
-    {
-	*c = MOD (*c, size * 2);
-	if (*c >= size)
-	    *c = size * 2 - *c - 1;
-    }
-    return TRUE;
-}
-
-static force_inline int
-pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
-{
-    return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
-	   ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
-}
-
-#if BILINEAR_INTERPOLATION_BITS <= 4
-/* Inspired by Filter_32_opaque from Skia */
-static force_inline uint32_t
-bilinear_interpolation (uint32_t tl, uint32_t tr,
-			uint32_t bl, uint32_t br,
-			int distx, int disty)
-{
-    int distxy, distxiy, distixy, distixiy;
-    uint32_t lo, hi;
-
-    distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
-    disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
-
-    distxy = distx * disty;
-    distxiy = (distx << 4) - distxy;	/* distx * (16 - disty) */
-    distixy = (disty << 4) - distxy;	/* disty * (16 - distx) */
-    distixiy =
-	16 * 16 - (disty << 4) -
-	(distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
-
-    lo = (tl & 0xff00ff) * distixiy;
-    hi = ((tl >> 8) & 0xff00ff) * distixiy;
-
-    lo += (tr & 0xff00ff) * distxiy;
-    hi += ((tr >> 8) & 0xff00ff) * distxiy;
-
-    lo += (bl & 0xff00ff) * distixy;
-    hi += ((bl >> 8) & 0xff00ff) * distixy;
-
-    lo += (br & 0xff00ff) * distxy;
-    hi += ((br >> 8) & 0xff00ff) * distxy;
-
-    return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
-}
-
-#else
-#if SIZEOF_LONG > 4
-
-static force_inline uint32_t
-bilinear_interpolation (uint32_t tl, uint32_t tr,
-			uint32_t bl, uint32_t br,
-			int distx, int disty)
-{
-    uint64_t distxy, distxiy, distixy, distixiy;
-    uint64_t tl64, tr64, bl64, br64;
-    uint64_t f, r;
-
-    distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
-    disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
-
-    distxy = distx * disty;
-    distxiy = distx * (256 - disty);
-    distixy = (256 - distx) * disty;
-    distixiy = (256 - distx) * (256 - disty);
-
-    /* Alpha and Blue */
-    tl64 = tl & 0xff0000ff;
-    tr64 = tr & 0xff0000ff;
-    bl64 = bl & 0xff0000ff;
-    br64 = br & 0xff0000ff;
-
-    f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
-    r = f & 0x0000ff0000ff0000ull;
-
-    /* Red and Green */
-    tl64 = tl;
-    tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
-
-    tr64 = tr;
-    tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
-
-    bl64 = bl;
-    bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
-
-    br64 = br;
-    br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
-
-    f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
-    r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
-
-    return (uint32_t)(r >> 16);
-}
-
-#else
-
-static force_inline uint32_t
-bilinear_interpolation (uint32_t tl, uint32_t tr,
-			uint32_t bl, uint32_t br,
-			int distx, int disty)
-{
-    int distxy, distxiy, distixy, distixiy;
-    uint32_t f, r;
-
-    distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
-    disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
-
-    distxy = distx * disty;
-    distxiy = (distx << 8) - distxy;	/* distx * (256 - disty) */
-    distixy = (disty << 8) - distxy;	/* disty * (256 - distx) */
-    distixiy =
-	256 * 256 - (disty << 8) -
-	(distx << 8) + distxy;		/* (256 - distx) * (256 - disty) */
-
-    /* Blue */
-    r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
-      + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
-
-    /* Green */
-    f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
-      + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
-    r |= f & 0xff000000;
-
-    tl >>= 16;
-    tr >>= 16;
-    bl >>= 16;
-    br >>= 16;
-    r >>= 16;
-
-    /* Red */
-    f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
-      + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
-    r |= f & 0x00ff0000;
-
-    /* Alpha */
-    f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
-      + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
-    r |= f & 0xff000000;
-
-    return r;
-}
-
-#endif
-#endif // BILINEAR_INTERPOLATION_BITS <= 4
-
-static force_inline argb_t
-bilinear_interpolation_float (argb_t tl, argb_t tr,
-			      argb_t bl, argb_t br,
-			      float distx, float disty)
-{
-    float distxy, distxiy, distixy, distixiy;
-    argb_t r;
-
-    distxy = distx * disty;
-    distxiy = distx * (1.f - disty);
-    distixy = (1.f - distx) * disty;
-    distixiy = (1.f - distx) * (1.f - disty);
-
-    r.a = tl.a * distixiy + tr.a * distxiy +
-          bl.a * distixy  + br.a * distxy;
-    r.r = tl.r * distixiy + tr.r * distxiy +
-          bl.r * distixy  + br.r * distxy;
-    r.g = tl.g * distixiy + tr.g * distxiy +
-          bl.g * distixy  + br.g * distxy;
-    r.b = tl.b * distixiy + tr.b * distxiy +
-          bl.b * distixy  + br.b * distxy;
-
-    return r;
-}
-
-/*
- * For each scanline fetched from source image with PAD repeat:
- * - calculate how many pixels need to be padded on the left side
- * - calculate how many pixels need to be padded on the right side
- * - update width to only count pixels which are fetched from the image
- * All this information is returned via 'width', 'left_pad', 'right_pad'
- * arguments. The code is assuming that 'unit_x' is positive.
- *
- * Note: 64-bit math is used in order to avoid potential overflows, which
- *       is probably excessive in many cases. This particular function
- *       may need its own correctness test and performance tuning.
- */
-static force_inline void
-pad_repeat_get_scanline_bounds (int32_t         source_image_width,
-				pixman_fixed_t  vx,
-				pixman_fixed_t  unit_x,
-				int32_t *       width,
-				int32_t *       left_pad,
-				int32_t *       right_pad)
-{
-    int64_t max_vx = (int64_t) source_image_width << 16;
-    int64_t tmp;
-    if (vx < 0)
-    {
-	tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
-	if (tmp > *width)
-	{
-	    *left_pad = *width;
-	    *width = 0;
-	}
-	else
-	{
-	    *left_pad = (int32_t) tmp;
-	    *width -= (int32_t) tmp;
-	}
-    }
-    else
-    {
-	*left_pad = 0;
-    }
-    tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
-    if (tmp < 0)
-    {
-	*right_pad = *width;
-	*width = 0;
-    }
-    else if (tmp >= *width)
-    {
-	*right_pad = 0;
-    }
-    else
-    {
-	*right_pad = *width - (int32_t) tmp;
-	*width = (int32_t) tmp;
-    }
-}
-
-/* A macroified version of specialized nearest scalers for some
- * common 8888 and 565 formats. It supports SRC and OVER ops.
- *
- * There are two repeat versions, one that handles repeat normal,
- * and one without repeat handling that only works if the src region
- * used is completely covered by the pre-repeated source samples.
- *
- * The loops are unrolled to process two pixels per iteration for better
- * performance on most CPU architectures (superscalar processors
- * can issue several operations simultaneously, other processors can hide
- * instructions latencies by pipelining operations). Unrolling more
- * does not make much sense because the compiler will start running out
- * of spare registers soon.
- */
-
-#define GET_8888_ALPHA(s) ((s) >> 24)
- /* This is not actually used since we don't have an OVER with
-    565 source, but it is needed to build. */
-#define GET_0565_ALPHA(s) 0xff
-#define GET_x888_ALPHA(s) 0xff
-
-#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,			\
-			      src_type_t, dst_type_t, OP, repeat_mode)				\
-static force_inline void									\
-scanline_func_name (dst_type_t       *dst,							\
-		    const src_type_t *src,							\
-		    int32_t           w,							\
-		    pixman_fixed_t    vx,							\
-		    pixman_fixed_t    unit_x,							\
-		    pixman_fixed_t    src_width_fixed,						\
-		    pixman_bool_t     fully_transparent_src)					\
-{												\
-	uint32_t   d;										\
-	src_type_t s1, s2;									\
-	uint8_t    a1, a2;									\
-	int        x1, x2;									\
-												\
-	if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src)			\
-	    return;										\
-												\
-	if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)		\
-	    abort();										\
-												\
-	while ((w -= 2) >= 0)									\
-	{											\
-	    x1 = pixman_fixed_to_int (vx);							\
-	    vx += unit_x;									\
-	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
-	    {											\
-		/* This works because we know that unit_x is positive */			\
-		while (vx >= 0)									\
-		    vx -= src_width_fixed;							\
-	    }											\
-	    s1 = *(src + x1);									\
-												\
-	    x2 = pixman_fixed_to_int (vx);							\
-	    vx += unit_x;									\
-	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
-	    {											\
-		/* This works because we know that unit_x is positive */			\
-		while (vx >= 0)									\
-		    vx -= src_width_fixed;							\
-	    }											\
-	    s2 = *(src + x2);									\
-												\
-	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
-	    {											\
-		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
-		a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2);						\
-												\
-		if (a1 == 0xff)									\
-		{										\
-		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
-		}										\
-		else if (s1)									\
-		{										\
-		    d = convert_ ## DST_FORMAT ## _to_8888 (*dst);				\
-		    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);				\
-		    a1 ^= 0xff;									\
-		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
-		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
-		}										\
-		dst++;										\
-												\
-		if (a2 == 0xff)									\
-		{										\
-		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);			\
-		}										\
-		else if (s2)									\
-		{										\
-		    d = convert_## DST_FORMAT ## _to_8888 (*dst);				\
-		    s2 = convert_## SRC_FORMAT ## _to_8888 (s2);				\
-		    a2 ^= 0xff;									\
-		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);					\
-		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
-		}										\
-		dst++;										\
-	    }											\
-	    else /* PIXMAN_OP_SRC */								\
-	    {											\
-		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
-		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);			\
-	    }											\
-	}											\
-												\
-	if (w & 1)										\
-	{											\
-	    x1 = pixman_fixed_to_int (vx);							\
-	    s1 = *(src + x1);									\
-												\
-	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
-	    {											\
-		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
-												\
-		if (a1 == 0xff)									\
-		{										\
-		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
-		}										\
-		else if (s1)									\
-		{										\
-		    d = convert_## DST_FORMAT ## _to_8888 (*dst);				\
-		    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);				\
-		    a1 ^= 0xff;									\
-		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
-		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
-		}										\
-		dst++;										\
-	    }											\
-	    else /* PIXMAN_OP_SRC */								\
-	    {											\
-		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
-	    }											\
-	}											\
-}
-
-#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
-				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
-static void											\
-fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,		\
-						   pixman_composite_info_t *info)               \
-{												\
-    PIXMAN_COMPOSITE_ARGS (info);					                        \
-    dst_type_t *dst_line;						                        \
-    mask_type_t *mask_line;									\
-    src_type_t *src_first_line;									\
-    int       y;										\
-    pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width);		\
-    pixman_fixed_t max_vy;									\
-    pixman_vector_t v;										\
-    pixman_fixed_t vx, vy;									\
-    pixman_fixed_t unit_x, unit_y;								\
-    int32_t left_pad, right_pad;								\
-												\
-    src_type_t *src;										\
-    dst_type_t *dst;										\
-    mask_type_t solid_mask;									\
-    const mask_type_t *mask = &solid_mask;							\
-    int src_stride, mask_stride, dst_stride;							\
-												\
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);	\
-    if (have_mask)										\
-    {												\
-	if (mask_is_solid)									\
-	    solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);	\
-	else											\
-	    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,			\
-				   mask_stride, mask_line, 1);					\
-    }												\
-    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
-     * transformed from destination space to source space */					\
-    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
-												\
-    /* reference point is the center of the pixel */						\
-    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
-    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
-    v.vector[2] = pixman_fixed_1;								\
-												\
-    if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
-	return;											\
-												\
-    unit_x = src_image->common.transform->matrix[0][0];						\
-    unit_y = src_image->common.transform->matrix[1][1];						\
-												\
-    /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */			\
-    v.vector[0] -= pixman_fixed_e;								\
-    v.vector[1] -= pixman_fixed_e;								\
-												\
-    vx = v.vector[0];										\
-    vy = v.vector[1];										\
-												\
-    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)					\
-    {												\
-	max_vy = pixman_int_to_fixed (src_image->bits.height);					\
-												\
-	/* Clamp repeating positions inside the actual samples */				\
-	repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);					\
-	repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
-    }												\
-												\
-    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
-	PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
-    {												\
-	pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x,			\
-					&width, &left_pad, &right_pad);				\
-	vx += left_pad * unit_x;								\
-    }												\
-												\
-    while (--height >= 0)									\
-    {												\
-	dst = dst_line;										\
-	dst_line += dst_stride;									\
-	if (have_mask && !mask_is_solid)							\
-	{											\
-	    mask = mask_line;									\
-	    mask_line += mask_stride;								\
-	}											\
-												\
-	y = pixman_fixed_to_int (vy);								\
-	vy += unit_y;										\
-	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
-	    repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
-	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
-	{											\
-	    repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height);				\
-	    src = src_first_line + src_stride * y;						\
-	    if (left_pad > 0)									\
-	    {											\
-		scanline_func (mask, dst,							\
-			       src + src_image->bits.width - src_image->bits.width + 1,		\
-			       left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);		\
-	    }											\
-	    if (width > 0)									\
-	    {											\
-		scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
-			       dst + left_pad, src + src_image->bits.width, width,		\
-			       vx - src_width_fixed, unit_x, src_width_fixed, FALSE);		\
-	    }											\
-	    if (right_pad > 0)									\
-	    {											\
-		scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
-			       dst + left_pad + width, src + src_image->bits.width,		\
-			       right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);		\
-	    }											\
-	}											\
-	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
-	{											\
-	    static const src_type_t zero[1] = { 0 };						\
-	    if (y < 0 || y >= src_image->bits.height)						\
-	    {											\
-		scanline_func (mask, dst, zero + 1, left_pad + width + right_pad,		\
-			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
-		continue;									\
-	    }											\
-	    src = src_first_line + src_stride * y;						\
-	    if (left_pad > 0)									\
-	    {											\
-		scanline_func (mask, dst, zero + 1, left_pad,					\
-			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
-	    }											\
-	    if (width > 0)									\
-	    {											\
-		scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
-			       dst + left_pad, src + src_image->bits.width, width,		\
-			       vx - src_width_fixed, unit_x, src_width_fixed, FALSE);		\
-	    }											\
-	    if (right_pad > 0)									\
-	    {											\
-		scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
-			       dst + left_pad + width, zero + 1, right_pad,			\
-			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
-	    }											\
-	}											\
-	else											\
-	{											\
-	    src = src_first_line + src_stride * y;						\
-	    scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed,	\
-			   unit_x, src_width_fixed, FALSE);					\
-	}											\
-    }												\
-}
-
-/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
-#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
-				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
-	FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,	\
-				  dst_type_t, repeat_mode, have_mask, mask_is_solid)
-
-#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t,	\
-			      repeat_mode)							\
-    static force_inline void									\
-    scanline_func##scale_func_name##_wrapper (							\
-		    const uint8_t    *mask,							\
-		    dst_type_t       *dst,							\
-		    const src_type_t *src,							\
-		    int32_t          w,								\
-		    pixman_fixed_t   vx,							\
-		    pixman_fixed_t   unit_x,							\
-		    pixman_fixed_t   max_vx,							\
-		    pixman_bool_t    fully_transparent_src)					\
-    {												\
-	scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src);			\
-    }												\
-    FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper,	\
-			       src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
-
-#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,		\
-			      repeat_mode)							\
-	FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t,		\
-			      dst_type_t, repeat_mode)
-
-#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,				\
-		     src_type_t, dst_type_t, OP, repeat_mode)				\
-    FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
-			  SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,		\
-			  OP, repeat_mode)						\
-    FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP,			\
-			  scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
-			  src_type_t, dst_type_t, repeat_mode)
-
-
-#define SCALED_NEAREST_FLAGS						\
-    (FAST_PATH_SCALE_TRANSFORM	|					\
-     FAST_PATH_NO_ALPHA_MAP	|					\
-     FAST_PATH_NEAREST_FILTER	|					\
-     FAST_PATH_NO_ACCESSORS	|					\
-     FAST_PATH_NARROW_FORMAT)
-
-#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func)			\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_NEAREST_FLAGS		|				\
-	 FAST_PATH_NORMAL_REPEAT	|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_null, 0,							\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
-    }
-
-#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func)			\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_NEAREST_FLAGS		|				\
-	 FAST_PATH_PAD_REPEAT		|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_null, 0,							\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
-    }
-
-#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func)			\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_NEAREST_FLAGS		|				\
-	 FAST_PATH_NONE_REPEAT		|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_null, 0,							\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
-    }
-
-#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)			\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
-	PIXMAN_null, 0,							\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
-    }
-
-#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_NEAREST_FLAGS		|				\
-	 FAST_PATH_NORMAL_REPEAT	|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
-    }
-
-#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func)		\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_NEAREST_FLAGS		|				\
-	 FAST_PATH_PAD_REPEAT		|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
-    }
-
-#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func)		\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_NEAREST_FLAGS		|				\
-	 FAST_PATH_NONE_REPEAT		|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
-    }
-
-#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,	\
-	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
-    }
-
-#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_NEAREST_FLAGS		|				\
-	 FAST_PATH_NORMAL_REPEAT	|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
-    }
-
-#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)		\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_NEAREST_FLAGS		|				\
-	 FAST_PATH_PAD_REPEAT		|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
-    }
-
-#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)		\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_NEAREST_FLAGS		|				\
-	 FAST_PATH_NONE_REPEAT		|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
-    }
-
-#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,	\
-	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
-    }
-
-/* Prefer the use of 'cover' variant, because it is faster */
-#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)				\
-    SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),			\
-    SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),			\
-    SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),				\
-    SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
-
-#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)			\
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),		\
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),		\
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
-
-#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func)		\
-    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
-    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
-    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func),              \
-    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
-
-/*****************************************************************************/
-
-/*
- * Identify 5 zones in each scanline for bilinear scaling. Depending on
- * whether 2 pixels to be interpolated are fetched from the image itself,
- * from the padding area around it or from both image and padding area.
- */
-static force_inline void
-bilinear_pad_repeat_get_scanline_bounds (int32_t         source_image_width,
-					 pixman_fixed_t  vx,
-					 pixman_fixed_t  unit_x,
-					 int32_t *       left_pad,
-					 int32_t *       left_tz,
-					 int32_t *       width,
-					 int32_t *       right_tz,
-					 int32_t *       right_pad)
-{
-	int width1 = *width, left_pad1, right_pad1;
-	int width2 = *width, left_pad2, right_pad2;
-
-	pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
-					&width1, &left_pad1, &right_pad1);
-	pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
-					unit_x, &width2, &left_pad2, &right_pad2);
-
-	*left_pad = left_pad2;
-	*left_tz = left_pad1 - left_pad2;
-	*right_tz = right_pad2 - right_pad1;
-	*right_pad = right_pad1;
-	*width -= *left_pad + *left_tz + *right_tz + *right_pad;
-}
-
-/*
- * Main loop template for single pass bilinear scaling. It needs to be
- * provided with 'scanline_func' which should do the compositing operation.
- * The needed function has the following prototype:
- *
- *	scanline_func (dst_type_t *       dst,
- *		       const mask_type_ * mask,
- *		       const src_type_t * src_top,
- *		       const src_type_t * src_bottom,
- *		       int32_t            width,
- *		       int                weight_top,
- *		       int                weight_bottom,
- *		       pixman_fixed_t     vx,
- *		       pixman_fixed_t     unit_x,
- *		       pixman_fixed_t     max_vx,
- *		       pixman_bool_t      zero_src)
- *
- * Where:
- *  dst                 - destination scanline buffer for storing results
- *  mask                - mask buffer (or single value for solid mask)
- *  src_top, src_bottom - two source scanlines
- *  width               - number of pixels to process
- *  weight_top          - weight of the top row for interpolation
- *  weight_bottom       - weight of the bottom row for interpolation
- *  vx                  - initial position for fetching the first pair of
- *                        pixels from the source buffer
- *  unit_x              - position increment needed to move to the next pair
- *                        of pixels
- *  max_vx              - image size as a fixed point value, can be used for
- *                        implementing NORMAL repeat (when it is supported)
- *  zero_src            - boolean hint variable, which is set to TRUE when
- *                        all source pixels are fetched from zero padding
- *                        zone for NONE repeat
- *
- * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to
- *       BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that
- *       for NONE repeat when handling fuzzy antialiased top or bottom image
- *       edges. Also both top and bottom weight variables are guaranteed to
- *       have value, which is less than BILINEAR_INTERPOLATION_RANGE.
- *       For example, the weights can fit into unsigned byte or be used
- *       with 8-bit SIMD multiplication instructions for 8-bit interpolation
- *       precision.
- */
-#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
-				  dst_type_t, repeat_mode, flags)				\
-static void											\
-fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,		\
-						   pixman_composite_info_t *info)		\
-{												\
-    PIXMAN_COMPOSITE_ARGS (info);								\
-    dst_type_t *dst_line;									\
-    mask_type_t *mask_line;									\
-    src_type_t *src_first_line;									\
-    int       y1, y2;										\
-    pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */		\
-    pixman_vector_t v;										\
-    pixman_fixed_t vx, vy;									\
-    pixman_fixed_t unit_x, unit_y;								\
-    int32_t left_pad, left_tz, right_tz, right_pad;						\
-												\
-    dst_type_t *dst;										\
-    mask_type_t solid_mask;									\
-    const mask_type_t *mask = &solid_mask;							\
-    int src_stride, mask_stride, dst_stride;							\
-												\
-    int src_width;										\
-    pixman_fixed_t src_width_fixed;								\
-    int max_x;											\
-    pixman_bool_t need_src_extension;								\
-												\
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);	\
-    if (flags & FLAG_HAVE_SOLID_MASK)								\
-    {												\
-	solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);	\
-	mask_stride = 0;									\
-    }												\
-    else if (flags & FLAG_HAVE_NON_SOLID_MASK)							\
-    {												\
-	PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,				\
-			       mask_stride, mask_line, 1);					\
-    }												\
-												\
-    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
-     * transformed from destination space to source space */					\
-    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
-												\
-    /* reference point is the center of the pixel */						\
-    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
-    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
-    v.vector[2] = pixman_fixed_1;								\
-												\
-    if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
-	return;											\
-												\
-    unit_x = src_image->common.transform->matrix[0][0];						\
-    unit_y = src_image->common.transform->matrix[1][1];						\
-												\
-    v.vector[0] -= pixman_fixed_1 / 2;								\
-    v.vector[1] -= pixman_fixed_1 / 2;								\
-												\
-    vy = v.vector[1];										\
-												\
-    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
-	PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
-    {												\
-	bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x,	\
-					&left_pad, &left_tz, &width, &right_tz, &right_pad);	\
-	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
-	{											\
-	    /* PAD repeat does not need special handling for 'transition zones' and */		\
-	    /* they can be combined with 'padding zones' safely */				\
-	    left_pad += left_tz;								\
-	    right_pad += right_tz;								\
-	    left_tz = right_tz = 0;								\
-	}											\
-	v.vector[0] += left_pad * unit_x;							\
-    }												\
-												\
-    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)					\
-    {												\
-	vx = v.vector[0];									\
-	repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width));		\
-	max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1;			\
-												\
-	if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH)					\
-	{											\
-	    src_width = 0;									\
-												\
-	    while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x)			\
-		src_width += src_image->bits.width;						\
-												\
-	    need_src_extension = TRUE;								\
-	}											\
-	else											\
-	{											\
-	    src_width = src_image->bits.width;							\
-	    need_src_extension = FALSE;								\
-	}											\
-												\
-	src_width_fixed = pixman_int_to_fixed (src_width);					\
-    }												\
-												\
-    while (--height >= 0)									\
-    {												\
-	int weight1, weight2;									\
-	dst = dst_line;										\
-	dst_line += dst_stride;									\
-	vx = v.vector[0];									\
-	if (flags & FLAG_HAVE_NON_SOLID_MASK)							\
-	{											\
-	    mask = mask_line;									\
-	    mask_line += mask_stride;								\
-	}											\
-												\
-	y1 = pixman_fixed_to_int (vy);								\
-	weight2 = pixman_fixed_to_bilinear_weight (vy);						\
-	if (weight2)										\
-	{											\
-	    /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */	\
-	    y2 = y1 + 1;									\
-	    weight1 = BILINEAR_INTERPOLATION_RANGE - weight2;					\
-	}											\
-	else											\
-	{											\
-	    /* set both top and bottom row to the same scanline and tweak weights */		\
-	    y2 = y1;										\
-	    weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2;				\
-	}											\
-	vy += unit_y;										\
-	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
-	{											\
-	    src_type_t *src1, *src2;								\
-	    src_type_t buf1[2];									\
-	    src_type_t buf2[2];									\
-	    repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height);				\
-	    repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height);				\
-	    src1 = src_first_line + src_stride * y1;						\
-	    src2 = src_first_line + src_stride * y2;						\
-												\
-	    if (left_pad > 0)									\
-	    {											\
-		buf1[0] = buf1[1] = src1[0];							\
-		buf2[0] = buf2[1] = src2[0];							\
-		scanline_func (dst, mask,							\
-			       buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE);		\
-		dst += left_pad;								\
-		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
-		    mask += left_pad;								\
-	    }											\
-	    if (width > 0)									\
-	    {											\
-		scanline_func (dst, mask,							\
-			       src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);	\
-		dst += width;									\
-		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
-		    mask += width;								\
-	    }											\
-	    if (right_pad > 0)									\
-	    {											\
-		buf1[0] = buf1[1] = src1[src_image->bits.width - 1];				\
-		buf2[0] = buf2[1] = src2[src_image->bits.width - 1];				\
-		scanline_func (dst, mask,							\
-			       buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE);	\
-	    }											\
-	}											\
-	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
-	{											\
-	    src_type_t *src1, *src2;								\
-	    src_type_t buf1[2];									\
-	    src_type_t buf2[2];									\
-	    /* handle top/bottom zero padding by just setting weights to 0 if needed */		\
-	    if (y1 < 0)										\
-	    {											\
-		weight1 = 0;									\
-		y1 = 0;										\
-	    }											\
-	    if (y1 >= src_image->bits.height)							\
-	    {											\
-		weight1 = 0;									\
-		y1 = src_image->bits.height - 1;						\
-	    }											\
-	    if (y2 < 0)										\
-	    {											\
-		weight2 = 0;									\
-		y2 = 0;										\
-	    }											\
-	    if (y2 >= src_image->bits.height)							\
-	    {											\
-		weight2 = 0;									\
-		y2 = src_image->bits.height - 1;						\
-	    }											\
-	    src1 = src_first_line + src_stride * y1;						\
-	    src2 = src_first_line + src_stride * y2;						\
-												\
-	    if (left_pad > 0)									\
-	    {											\
-		buf1[0] = buf1[1] = 0;								\
-		buf2[0] = buf2[1] = 0;								\
-		scanline_func (dst, mask,							\
-			       buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE);		\
-		dst += left_pad;								\
-		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
-		    mask += left_pad;								\
-	    }											\
-	    if (left_tz > 0)									\
-	    {											\
-		buf1[0] = 0;									\
-		buf1[1] = src1[0];								\
-		buf2[0] = 0;									\
-		buf2[1] = src2[0];								\
-		scanline_func (dst, mask,							\
-			       buf1, buf2, left_tz, weight1, weight2,				\
-			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
-		dst += left_tz;									\
-		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
-		    mask += left_tz;								\
-		vx += left_tz * unit_x;								\
-	    }											\
-	    if (width > 0)									\
-	    {											\
-		scanline_func (dst, mask,							\
-			       src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);	\
-		dst += width;									\
-		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
-		    mask += width;								\
-		vx += width * unit_x;								\
-	    }											\
-	    if (right_tz > 0)									\
-	    {											\
-		buf1[0] = src1[src_image->bits.width - 1];					\
-		buf1[1] = 0;									\
-		buf2[0] = src2[src_image->bits.width - 1];					\
-		buf2[1] = 0;									\
-		scanline_func (dst, mask,							\
-			       buf1, buf2, right_tz, weight1, weight2,				\
-			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
-		dst += right_tz;								\
-		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
-		    mask += right_tz;								\
-	    }											\
-	    if (right_pad > 0)									\
-	    {											\
-		buf1[0] = buf1[1] = 0;								\
-		buf2[0] = buf2[1] = 0;								\
-		scanline_func (dst, mask,							\
-			       buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE);		\
-	    }											\
-	}											\
-	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
-	{											\
-	    int32_t	    num_pixels;								\
-	    int32_t	    width_remain;							\
-	    src_type_t *    src_line_top;							\
-	    src_type_t *    src_line_bottom;							\
-	    src_type_t	    buf1[2];								\
-	    src_type_t	    buf2[2];								\
-	    src_type_t	    extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2];			\
-	    src_type_t	    extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2];			\
-	    int		    i, j;								\
-												\
-	    repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height);				\
-	    repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height);				\
-	    src_line_top = src_first_line + src_stride * y1;					\
-	    src_line_bottom = src_first_line + src_stride * y2;					\
-												\
-	    if (need_src_extension)								\
-	    {											\
-		for (i=0; i<src_width;)								\
-		{										\
-		    for (j=0; j<src_image->bits.width; j++, i++)				\
-		    {										\
-			extended_src_line0[i] = src_line_top[j];				\
-			extended_src_line1[i] = src_line_bottom[j];				\
-		    }										\
-		}										\
-												\
-		src_line_top = &extended_src_line0[0];						\
-		src_line_bottom = &extended_src_line1[0];					\
-	    }											\
-												\
-	    /* Top & Bottom wrap around buffer */						\
-	    buf1[0] = src_line_top[src_width - 1];						\
-	    buf1[1] = src_line_top[0];								\
-	    buf2[0] = src_line_bottom[src_width - 1];						\
-	    buf2[1] = src_line_bottom[0];							\
-												\
-	    width_remain = width;								\
-												\
-	    while (width_remain > 0)								\
-	    {											\
-		/* We use src_width_fixed because it can make vx in original source range */	\
-		repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);				\
-												\
-		/* Wrap around part */								\
-		if (pixman_fixed_to_int (vx) == src_width - 1)					\
-		{										\
-		    /* for positive unit_x							\
-		     * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed		\
-		     *										\
-		     * vx is in range [0, src_width_fixed - pixman_fixed_e]			\
-		     * So we are safe from overflow.						\
-		     */										\
-		    num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1;	\
-												\
-		    if (num_pixels > width_remain)						\
-			num_pixels = width_remain;						\
-												\
-		    scanline_func (dst, mask, buf1, buf2, num_pixels,				\
-				   weight1, weight2, pixman_fixed_frac(vx),			\
-				   unit_x, src_width_fixed, FALSE);				\
-												\
-		    width_remain -= num_pixels;							\
-		    vx += num_pixels * unit_x;							\
-		    dst += num_pixels;								\
-												\
-		    if (flags & FLAG_HAVE_NON_SOLID_MASK)					\
-			mask += num_pixels;							\
-												\
-		    repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);			\
-		}										\
-												\
-		/* Normal scanline composite */							\
-		if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0)		\
-		{										\
-		    /* for positive unit_x							\
-		     * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1)	\
-		     *										\
-		     * vx is in range [0, src_width_fixed - pixman_fixed_e]			\
-		     * So we are safe from overflow here.					\
-		     */										\
-		    num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e)	\
-				  / unit_x) + 1;						\
-												\
-		    if (num_pixels > width_remain)						\
-			num_pixels = width_remain;						\
-												\
-		    scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels,	\
-				   weight1, weight2, vx, unit_x, src_width_fixed, FALSE);	\
-												\
-		    width_remain -= num_pixels;							\
-		    vx += num_pixels * unit_x;							\
-		    dst += num_pixels;								\
-												\
-		    if (flags & FLAG_HAVE_NON_SOLID_MASK)					\
-		        mask += num_pixels;							\
-		}										\
-	    }											\
-	}											\
-	else											\
-	{											\
-	    scanline_func (dst, mask, src_first_line + src_stride * y1,				\
-			   src_first_line + src_stride * y2, width,				\
-			   weight1, weight2, vx, unit_x, max_vx, FALSE);			\
-	}											\
-    }												\
-}
-
-/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
-#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
-				  dst_type_t, repeat_mode, flags)				\
-	FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
-				  dst_type_t, repeat_mode, flags)
-
-#define SCALED_BILINEAR_FLAGS						\
-    (FAST_PATH_SCALE_TRANSFORM	|					\
-     FAST_PATH_NO_ALPHA_MAP	|					\
-     FAST_PATH_BILINEAR_FILTER	|					\
-     FAST_PATH_NO_ACCESSORS	|					\
-     FAST_PATH_NARROW_FORMAT)
-
-#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func)			\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_BILINEAR_FLAGS		|				\
-	 FAST_PATH_PAD_REPEAT		|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_null, 0,							\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
-    }
-
-#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func)			\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_BILINEAR_FLAGS		|				\
-	 FAST_PATH_NONE_REPEAT		|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_null, 0,							\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
-    }
-
-#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func)			\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
-	PIXMAN_null, 0,							\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
-    }
-
-#define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func)			\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_BILINEAR_FLAGS		|				\
-	 FAST_PATH_NORMAL_REPEAT	|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_null, 0,							\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
-    }
-
-#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func)		\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_BILINEAR_FLAGS		|				\
-	 FAST_PATH_PAD_REPEAT		|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
-    }
-
-#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func)		\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_BILINEAR_FLAGS		|				\
-	 FAST_PATH_NONE_REPEAT		|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
-    }
-
-#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
-	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
-    }
-
-#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_BILINEAR_FLAGS		|				\
-	 FAST_PATH_NORMAL_REPEAT	|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
-    }
-
-#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)		\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_BILINEAR_FLAGS		|				\
-	 FAST_PATH_PAD_REPEAT		|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
-    }
-
-#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)		\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_BILINEAR_FLAGS		|				\
-	 FAST_PATH_NONE_REPEAT		|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
-    }
-
-#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
-	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
-    }
-
-#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)	\
-    {   PIXMAN_OP_ ## op,						\
-	PIXMAN_ ## s,							\
-	(SCALED_BILINEAR_FLAGS		|				\
-	 FAST_PATH_NORMAL_REPEAT	|				\
-	 FAST_PATH_X_UNIT_POSITIVE),					\
-	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
-	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
-	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
-    }
-
-/* Prefer the use of 'cover' variant, because it is faster */
-#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func)				\
-    SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func),			\
-    SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func),			\
-    SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func),			\
-    SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
-
-#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func)			\
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func),		\
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func),		\
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func),		\
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
-
-#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func)		\
-    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
-    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
-    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func),		\
-    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
-
-#endif
diff --git a/vendor/pixman/pixman/pixman-linear-gradient.c b/vendor/pixman/pixman/pixman-linear-gradient.c
deleted file mode 100644
index 014b69ceb..000000000
--- a/vendor/pixman/pixman/pixman-linear-gradient.c
+++ /dev/null
@@ -1,292 +0,0 @@
-/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
- *             2005 Lars Knoll & Zack Rusin, Trolltech
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Keith Packard makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-#include <stdlib.h>
-#include "pixman-private.h"
-
-static pixman_bool_t
-linear_gradient_is_horizontal (pixman_image_t *image,
-			       int             x,
-			       int             y,
-			       int             width,
-			       int             height)
-{
-    linear_gradient_t *linear = (linear_gradient_t *)image;
-    pixman_vector_t v;
-    pixman_fixed_32_32_t l;
-    pixman_fixed_48_16_t dx, dy;
-    double inc;
-
-    if (image->common.transform)
-    {
-	/* projective transformation */
-	if (image->common.transform->matrix[2][0] != 0 ||
-	    image->common.transform->matrix[2][1] != 0 ||
-	    image->common.transform->matrix[2][2] == 0)
-	{
-	    return FALSE;
-	}
-
-	v.vector[0] = image->common.transform->matrix[0][1];
-	v.vector[1] = image->common.transform->matrix[1][1];
-	v.vector[2] = image->common.transform->matrix[2][2];
-    }
-    else
-    {
-	v.vector[0] = 0;
-	v.vector[1] = pixman_fixed_1;
-	v.vector[2] = pixman_fixed_1;
-    }
-
-    dx = linear->p2.x - linear->p1.x;
-    dy = linear->p2.y - linear->p1.y;
-
-    l = dx * dx + dy * dy;
-
-    if (l == 0)
-	return FALSE;
-
-    /*
-     * compute how much the input of the gradient walked changes
-     * when moving vertically through the whole image
-     */
-    inc = height * (double) pixman_fixed_1 * pixman_fixed_1 *
-	(dx * v.vector[0] + dy * v.vector[1]) /
-	(v.vector[2] * (double) l);
-
-    /* check that casting to integer would result in 0 */
-    if (-1 < inc && inc < 1)
-	return TRUE;
-
-    return FALSE;
-}
-
-static uint32_t *
-linear_get_scanline (pixman_iter_t                 *iter,
-		     const uint32_t                *mask,
-		     int                            Bpp,
-		     pixman_gradient_walker_write_t write_pixel,
-		     pixman_gradient_walker_fill_t  fill_pixel)
-{
-    pixman_image_t *image  = iter->image;
-    int             x      = iter->x;
-    int             y      = iter->y;
-    int             width  = iter->width;
-    uint32_t *      buffer = iter->buffer;
-
-    pixman_vector_t v, unit;
-    pixman_fixed_32_32_t l;
-    pixman_fixed_48_16_t dx, dy;
-    gradient_t *gradient = (gradient_t *)image;
-    linear_gradient_t *linear = (linear_gradient_t *)image;
-    uint32_t *end = buffer + width * (Bpp / 4);
-    pixman_gradient_walker_t walker;
-
-    _pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
-
-    /* reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
-    v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
-    v.vector[2] = pixman_fixed_1;
-
-    if (image->common.transform)
-    {
-	if (!pixman_transform_point_3d (image->common.transform, &v))
-	    return iter->buffer;
-
-	unit.vector[0] = image->common.transform->matrix[0][0];
-	unit.vector[1] = image->common.transform->matrix[1][0];
-	unit.vector[2] = image->common.transform->matrix[2][0];
-    }
-    else
-    {
-	unit.vector[0] = pixman_fixed_1;
-	unit.vector[1] = 0;
-	unit.vector[2] = 0;
-    }
-
-    dx = linear->p2.x - linear->p1.x;
-    dy = linear->p2.y - linear->p1.y;
-
-    l = dx * dx + dy * dy;
-
-    if (l == 0 || unit.vector[2] == 0)
-    {
-	/* affine transformation only */
-	pixman_fixed_32_32_t t, next_inc;
-	double inc;
-
-	if (l == 0 || v.vector[2] == 0)
-	{
-	    t = 0;
-	    inc = 0;
-	}
-	else
-	{
-	    double invden, v2;
-
-	    invden = pixman_fixed_1 * (double) pixman_fixed_1 /
-		(l * (double) v.vector[2]);
-	    v2 = v.vector[2] * (1. / pixman_fixed_1);
-	    t = ((dx * v.vector[0] + dy * v.vector[1]) -
-		 (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
-	    inc = (dx * unit.vector[0] + dy * unit.vector[1]) * invden;
-	}
-	next_inc = 0;
-
-	if (((pixman_fixed_32_32_t )(inc * width)) == 0)
-	{
-	    fill_pixel (&walker, t, buffer, end);
-	}
-	else
-	{
-	    int i;
-
-	    i = 0;
-	    while (buffer < end)
-	    {
-		if (!mask || *mask++)
-		{
-		    write_pixel (&walker, t + next_inc, buffer);
-		}
-		i++;
-		next_inc = inc * i;
-		buffer += (Bpp / 4);
-	    }
-	}
-    }
-    else
-    {
-	/* projective transformation */
-        double t;
-
-	t = 0;
-
-	while (buffer < end)
-	{
-	    if (!mask || *mask++)
-	    {
-	        if (v.vector[2] != 0)
-		{
-		    double invden, v2;
-
-		    invden = pixman_fixed_1 * (double) pixman_fixed_1 /
-			(l * (double) v.vector[2]);
-		    v2 = v.vector[2] * (1. / pixman_fixed_1);
-		    t = ((dx * v.vector[0] + dy * v.vector[1]) -
-			 (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
-		}
-
-		write_pixel (&walker, t, buffer);
-	    }
-
-	    buffer += (Bpp / 4);
-
-	    v.vector[0] += unit.vector[0];
-	    v.vector[1] += unit.vector[1];
-	    v.vector[2] += unit.vector[2];
-	}
-    }
-
-    iter->y++;
-
-    return iter->buffer;
-}
-
-static uint32_t *
-linear_get_scanline_narrow (pixman_iter_t  *iter,
-			    const uint32_t *mask)
-{
-    return linear_get_scanline (iter, mask, 4,
-				_pixman_gradient_walker_write_narrow,
-				_pixman_gradient_walker_fill_narrow);
-}
-
-
-static uint32_t *
-linear_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
-{
-    return linear_get_scanline (iter, NULL, 16,
-				_pixman_gradient_walker_write_wide,
-				_pixman_gradient_walker_fill_wide);
-}
-
-void
-_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t  *iter)
-{
-    if (linear_gradient_is_horizontal (
-	    iter->image, iter->x, iter->y, iter->width, iter->height))
-    {
-	if (iter->iter_flags & ITER_NARROW)
-	    linear_get_scanline_narrow (iter, NULL);
-	else
-	    linear_get_scanline_wide (iter, NULL);
-
-	iter->get_scanline = _pixman_iter_get_scanline_noop;
-    }
-    else
-    {
-	if (iter->iter_flags & ITER_NARROW)
-	    iter->get_scanline = linear_get_scanline_narrow;
-	else
-	    iter->get_scanline = linear_get_scanline_wide;
-    }
-}
-
-PIXMAN_EXPORT pixman_image_t *
-pixman_image_create_linear_gradient (const pixman_point_fixed_t *  p1,
-                                     const pixman_point_fixed_t *  p2,
-                                     const pixman_gradient_stop_t *stops,
-                                     int                           n_stops)
-{
-    pixman_image_t *image;
-    linear_gradient_t *linear;
-
-    image = _pixman_image_allocate ();
-
-    if (!image)
-	return NULL;
-
-    linear = &image->linear;
-
-    if (!_pixman_init_gradient (&linear->common, stops, n_stops))
-    {
-	free (image);
-	return NULL;
-    }
-
-    linear->p1 = *p1;
-    linear->p2 = *p2;
-
-    image->type = LINEAR;
-
-    return image;
-}
-
diff --git a/vendor/pixman/pixman/pixman-matrix.c b/vendor/pixman/pixman/pixman-matrix.c
deleted file mode 100644
index da5209cbe..000000000
--- a/vendor/pixman/pixman/pixman-matrix.c
+++ /dev/null
@@ -1,1073 +0,0 @@
-/*
- * Copyright © 2008 Keith Packard
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that copyright
- * notice and this permission notice appear in supporting documentation, and
- * that the name of the copyright holders not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  The copyright holders make no representations
- * about the suitability of this software for any purpose.  It is provided "as
- * is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
- * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
- * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THIS SOFTWARE.
- */
-
-/*
- * Matrix interfaces
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include <math.h>
-#include <string.h>
-#include "pixman-private.h"
-
-#define F(x)    pixman_int_to_fixed (x)
-
-static force_inline int
-count_leading_zeros (uint32_t x)
-{
-#ifdef HAVE_BUILTIN_CLZ
-    return __builtin_clz (x);
-#else
-    int n = 0;
-    while (x)
-    {
-        n++;
-        x >>= 1;
-    }
-    return 32 - n;
-#endif
-}
-
-/*
- * Large signed/unsigned integer division with rounding for the platforms with
- * only 64-bit integer data type supported (no 128-bit data type).
- *
- * Arguments:
- *     hi, lo - high and low 64-bit parts of the dividend
- *     div    - 48-bit divisor
- *
- * Returns: lowest 64 bits of the result as a return value and highest 64
- *          bits of the result to "result_hi" pointer
- */
-
-/* grade-school unsigned division (128-bit by 48-bit) with rounding to nearest */
-static force_inline uint64_t
-rounded_udiv_128_by_48 (uint64_t  hi,
-                        uint64_t  lo,
-                        uint64_t  div,
-                        uint64_t *result_hi)
-{
-    uint64_t tmp, remainder, result_lo;
-    assert(div < ((uint64_t)1 << 48));
-
-    remainder = hi % div;
-    *result_hi = hi / div;
-
-    tmp = (remainder << 16) + (lo >> 48);
-    result_lo = tmp / div;
-    remainder = tmp % div;
-
-    tmp = (remainder << 16) + ((lo >> 32) & 0xFFFF);
-    result_lo = (result_lo << 16) + (tmp / div);
-    remainder = tmp % div;
-
-    tmp = (remainder << 16) + ((lo >> 16) & 0xFFFF);
-    result_lo = (result_lo << 16) + (tmp / div);
-    remainder = tmp % div;
-
-    tmp = (remainder << 16) + (lo & 0xFFFF);
-    result_lo = (result_lo << 16) + (tmp / div);
-    remainder = tmp % div;
-
-    /* round to nearest */
-    if (remainder * 2 >= div && ++result_lo == 0)
-        *result_hi += 1;
-
-    return result_lo;
-}
-
-/* signed division (128-bit by 49-bit) with rounding to nearest */
-static inline int64_t
-rounded_sdiv_128_by_49 (int64_t   hi,
-                        uint64_t  lo,
-                        int64_t   div,
-                        int64_t  *signed_result_hi)
-{
-    uint64_t result_lo, result_hi;
-    int sign = 0;
-    if (div < 0)
-    {
-        div = -div;
-        sign ^= 1;
-    }
-    if (hi < 0)
-    {
-        if (lo != 0)
-            hi++;
-        hi = -hi;
-        lo = -lo;
-        sign ^= 1;
-    }
-    result_lo = rounded_udiv_128_by_48 (hi, lo, div, &result_hi);
-    if (sign)
-    {
-        if (result_lo != 0)
-            result_hi++;
-        result_hi = -result_hi;
-        result_lo = -result_lo;
-    }
-    if (signed_result_hi)
-    {
-        *signed_result_hi = result_hi;
-    }
-    return result_lo;
-}
-
-/*
- * Multiply 64.16 fixed point value by (2^scalebits) and convert
- * to 128-bit integer.
- */
-static force_inline void
-fixed_64_16_to_int128 (int64_t  hi,
-                       int64_t  lo,
-                       int64_t *rhi,
-                       int64_t *rlo,
-                       int      scalebits)
-{
-    /* separate integer and fractional parts */
-    hi += lo >> 16;
-    lo &= 0xFFFF;
-
-    if (scalebits <= 0)
-    {
-        *rlo = hi >> (-scalebits);
-        *rhi = *rlo >> 63;
-    }
-    else
-    {
-        *rhi = hi >> (64 - scalebits);
-        *rlo = (uint64_t)hi << scalebits;
-        if (scalebits < 16)
-            *rlo += lo >> (16 - scalebits);
-        else
-            *rlo += lo << (scalebits - 16);
-    }
-}
-
-/*
- * Convert 112.16 fixed point value to 48.16 with clamping for the out
- * of range values.
- */
-static force_inline pixman_fixed_48_16_t
-fixed_112_16_to_fixed_48_16 (int64_t hi, int64_t lo, pixman_bool_t *clampflag)
-{
-    if ((lo >> 63) != hi)
-    {
-        *clampflag = TRUE;
-        return hi >= 0 ? INT64_MAX : INT64_MIN;
-    }
-    else
-    {
-        return lo;
-    }
-}
-
-/*
- * Transform a point with 31.16 fixed point coordinates from the destination
- * space to a point with 48.16 fixed point coordinates in the source space.
- * No overflows are possible for affine transformations and the results are
- * accurate including the least significant bit. Projective transformations
- * may overflow, in this case the results are just clamped to return maximum
- * or minimum 48.16 values (so that the caller can at least handle the NONE
- * and PAD repeats correctly) and the return value is FALSE to indicate that
- * such clamping has happened.
- */
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_point_31_16 (const pixman_transform_t    *t,
-                              const pixman_vector_48_16_t *v,
-                              pixman_vector_48_16_t       *result)
-{
-    pixman_bool_t clampflag = FALSE;
-    int i;
-    int64_t tmp[3][2], divint;
-    uint16_t divfrac;
-
-    /* input vector values must have no more than 31 bits (including sign)
-     * in the integer part */
-    assert (v->v[0] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
-    assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
-    assert (v->v[1] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
-    assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
-    assert (v->v[2] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
-    assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
-
-    for (i = 0; i < 3; i++)
-    {
-        tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16);
-        tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF);
-        tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16);
-        tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF);
-        tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16);
-        tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF);
-    }
-
-    /*
-     * separate 64-bit integer and 16-bit fractional parts for the divisor,
-     * which is also scaled by 65536 after fixed point multiplication.
-     */
-    divint  = tmp[2][0] + (tmp[2][1] >> 16);
-    divfrac = tmp[2][1] & 0xFFFF;
-
-    if (divint == pixman_fixed_1 && divfrac == 0)
-    {
-        /*
-         * this is a simple affine transformation
-         */
-        result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
-        result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
-        result->v[2] = pixman_fixed_1;
-    }
-    else if (divint == 0 && divfrac == 0)
-    {
-        /*
-         * handle zero divisor (if the values are non-zero, set the
-         * results to maximum positive or minimum negative)
-         */
-        clampflag = TRUE;
-
-        result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
-        result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
-
-        if (result->v[0] > 0)
-            result->v[0] = INT64_MAX;
-        else if (result->v[0] < 0)
-            result->v[0] = INT64_MIN;
-
-        if (result->v[1] > 0)
-            result->v[1] = INT64_MAX;
-        else if (result->v[1] < 0)
-            result->v[1] = INT64_MIN;
-    }
-    else
-    {
-        /*
-         * projective transformation, analyze the top 32 bits of the divisor
-         */
-        int32_t hi32divbits = divint >> 32;
-        if (hi32divbits < 0)
-            hi32divbits = ~hi32divbits;
-
-        if (hi32divbits == 0)
-        {
-            /* the divisor is small, we can actually keep all the bits */
-            int64_t hi, rhi, lo, rlo;
-            int64_t div = ((uint64_t)divint << 16) + divfrac;
-
-            fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32);
-            rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
-            result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
-
-            fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32);
-            rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
-            result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
-        }
-        else
-        {
-            /* the divisor needs to be reduced to 48 bits */
-            int64_t hi, rhi, lo, rlo, div;
-            int shift = 32 - count_leading_zeros (hi32divbits);
-            fixed_64_16_to_int128 (divint, divfrac, &hi, &div, 16 - shift);
-
-            fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32 - shift);
-            rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
-            result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
-
-            fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32 - shift);
-            rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
-            result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
-        }
-    }
-    result->v[2] = pixman_fixed_1;
-    return !clampflag;
-}
-
-PIXMAN_EXPORT void
-pixman_transform_point_31_16_affine (const pixman_transform_t    *t,
-                                     const pixman_vector_48_16_t *v,
-                                     pixman_vector_48_16_t       *result)
-{
-    int64_t hi0, lo0, hi1, lo1;
-
-    /* input vector values must have no more than 31 bits (including sign)
-     * in the integer part */
-    assert (v->v[0] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
-    assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
-    assert (v->v[1] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
-    assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
-
-    hi0  = (int64_t)t->matrix[0][0] * (v->v[0] >> 16);
-    lo0  = (int64_t)t->matrix[0][0] * (v->v[0] & 0xFFFF);
-    hi0 += (int64_t)t->matrix[0][1] * (v->v[1] >> 16);
-    lo0 += (int64_t)t->matrix[0][1] * (v->v[1] & 0xFFFF);
-    hi0 += (int64_t)t->matrix[0][2];
-
-    hi1  = (int64_t)t->matrix[1][0] * (v->v[0] >> 16);
-    lo1  = (int64_t)t->matrix[1][0] * (v->v[0] & 0xFFFF);
-    hi1 += (int64_t)t->matrix[1][1] * (v->v[1] >> 16);
-    lo1 += (int64_t)t->matrix[1][1] * (v->v[1] & 0xFFFF);
-    hi1 += (int64_t)t->matrix[1][2];
-
-    result->v[0] = hi0 + ((lo0 + 0x8000) >> 16);
-    result->v[1] = hi1 + ((lo1 + 0x8000) >> 16);
-    result->v[2] = pixman_fixed_1;
-}
-
-PIXMAN_EXPORT void
-pixman_transform_point_31_16_3d (const pixman_transform_t    *t,
-                                 const pixman_vector_48_16_t *v,
-                                 pixman_vector_48_16_t       *result)
-{
-    int i;
-    int64_t tmp[3][2];
-
-    /* input vector values must have no more than 31 bits (including sign)
-     * in the integer part */
-    assert (v->v[0] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
-    assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
-    assert (v->v[1] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
-    assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
-    assert (v->v[2] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
-    assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
-
-    for (i = 0; i < 3; i++)
-    {
-        tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16);
-        tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF);
-        tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16);
-        tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF);
-        tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16);
-        tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF);
-    }
-
-    result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
-    result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
-    result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16);
-}
-
-PIXMAN_EXPORT void
-pixman_transform_init_identity (struct pixman_transform *matrix)
-{
-    int i;
-
-    memset (matrix, '\0', sizeof (struct pixman_transform));
-    for (i = 0; i < 3; i++)
-	matrix->matrix[i][i] = F (1);
-}
-
-typedef pixman_fixed_32_32_t pixman_fixed_34_30_t;
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_point_3d (const struct pixman_transform *transform,
-                           struct pixman_vector *         vector)
-{
-    pixman_vector_48_16_t tmp;
-    tmp.v[0] = vector->vector[0];
-    tmp.v[1] = vector->vector[1];
-    tmp.v[2] = vector->vector[2];
-
-    pixman_transform_point_31_16_3d (transform, &tmp, &tmp);
-
-    vector->vector[0] = tmp.v[0];
-    vector->vector[1] = tmp.v[1];
-    vector->vector[2] = tmp.v[2];
-
-    return vector->vector[0] == tmp.v[0] &&
-           vector->vector[1] == tmp.v[1] &&
-           vector->vector[2] == tmp.v[2];
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_point (const struct pixman_transform *transform,
-                        struct pixman_vector *         vector)
-{
-    pixman_vector_48_16_t tmp;
-    tmp.v[0] = vector->vector[0];
-    tmp.v[1] = vector->vector[1];
-    tmp.v[2] = vector->vector[2];
-
-    if (!pixman_transform_point_31_16 (transform, &tmp, &tmp))
-        return FALSE;
-
-    vector->vector[0] = tmp.v[0];
-    vector->vector[1] = tmp.v[1];
-    vector->vector[2] = tmp.v[2];
-
-    return vector->vector[0] == tmp.v[0] &&
-           vector->vector[1] == tmp.v[1] &&
-           vector->vector[2] == tmp.v[2];
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_multiply (struct pixman_transform *      dst,
-                           const struct pixman_transform *l,
-                           const struct pixman_transform *r)
-{
-    struct pixman_transform d;
-    int dx, dy;
-    int o;
-
-    for (dy = 0; dy < 3; dy++)
-    {
-	for (dx = 0; dx < 3; dx++)
-	{
-	    pixman_fixed_48_16_t v;
-	    pixman_fixed_32_32_t partial;
-	    
-	    v = 0;
-	    for (o = 0; o < 3; o++)
-	    {
-		partial =
-		    (pixman_fixed_32_32_t) l->matrix[dy][o] *
-		    (pixman_fixed_32_32_t) r->matrix[o][dx];
-
-		v += (partial + 0x8000) >> 16;
-	    }
-
-	    if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16)
-		return FALSE;
-	    
-	    d.matrix[dy][dx] = (pixman_fixed_t) v;
-	}
-    }
-
-    *dst = d;
-    return TRUE;
-}
-
-PIXMAN_EXPORT void
-pixman_transform_init_scale (struct pixman_transform *t,
-                             pixman_fixed_t           sx,
-                             pixman_fixed_t           sy)
-{
-    memset (t, '\0', sizeof (struct pixman_transform));
-
-    t->matrix[0][0] = sx;
-    t->matrix[1][1] = sy;
-    t->matrix[2][2] = F (1);
-}
-
-static pixman_fixed_t
-fixed_inverse (pixman_fixed_t x)
-{
-    return (pixman_fixed_t) ((((pixman_fixed_48_16_t) F (1)) * F (1)) / x);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_scale (struct pixman_transform *forward,
-                        struct pixman_transform *reverse,
-                        pixman_fixed_t           sx,
-                        pixman_fixed_t           sy)
-{
-    struct pixman_transform t;
-
-    if (sx == 0 || sy == 0)
-	return FALSE;
-
-    if (forward)
-    {
-	pixman_transform_init_scale (&t, sx, sy);
-	if (!pixman_transform_multiply (forward, &t, forward))
-	    return FALSE;
-    }
-    
-    if (reverse)
-    {
-	pixman_transform_init_scale (&t, fixed_inverse (sx),
-	                             fixed_inverse (sy));
-	if (!pixman_transform_multiply (reverse, reverse, &t))
-	    return FALSE;
-    }
-    
-    return TRUE;
-}
-
-PIXMAN_EXPORT void
-pixman_transform_init_rotate (struct pixman_transform *t,
-                              pixman_fixed_t           c,
-                              pixman_fixed_t           s)
-{
-    memset (t, '\0', sizeof (struct pixman_transform));
-
-    t->matrix[0][0] = c;
-    t->matrix[0][1] = -s;
-    t->matrix[1][0] = s;
-    t->matrix[1][1] = c;
-    t->matrix[2][2] = F (1);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_rotate (struct pixman_transform *forward,
-                         struct pixman_transform *reverse,
-                         pixman_fixed_t           c,
-                         pixman_fixed_t           s)
-{
-    struct pixman_transform t;
-
-    if (forward)
-    {
-	pixman_transform_init_rotate (&t, c, s);
-	if (!pixman_transform_multiply (forward, &t, forward))
-	    return FALSE;
-    }
-
-    if (reverse)
-    {
-	pixman_transform_init_rotate (&t, c, -s);
-	if (!pixman_transform_multiply (reverse, reverse, &t))
-	    return FALSE;
-    }
-    
-    return TRUE;
-}
-
-PIXMAN_EXPORT void
-pixman_transform_init_translate (struct pixman_transform *t,
-                                 pixman_fixed_t           tx,
-                                 pixman_fixed_t           ty)
-{
-    memset (t, '\0', sizeof (struct pixman_transform));
-
-    t->matrix[0][0] = F (1);
-    t->matrix[0][2] = tx;
-    t->matrix[1][1] = F (1);
-    t->matrix[1][2] = ty;
-    t->matrix[2][2] = F (1);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_translate (struct pixman_transform *forward,
-                            struct pixman_transform *reverse,
-                            pixman_fixed_t           tx,
-                            pixman_fixed_t           ty)
-{
-    struct pixman_transform t;
-
-    if (forward)
-    {
-	pixman_transform_init_translate (&t, tx, ty);
-
-	if (!pixman_transform_multiply (forward, &t, forward))
-	    return FALSE;
-    }
-
-    if (reverse)
-    {
-	pixman_transform_init_translate (&t, -tx, -ty);
-
-	if (!pixman_transform_multiply (reverse, reverse, &t))
-	    return FALSE;
-    }
-    return TRUE;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_bounds (const struct pixman_transform *matrix,
-                         struct pixman_box16 *          b)
-
-{
-    struct pixman_vector v[4];
-    int i;
-    int x1, y1, x2, y2;
-
-    v[0].vector[0] = F (b->x1);
-    v[0].vector[1] = F (b->y1);
-    v[0].vector[2] = F (1);
-
-    v[1].vector[0] = F (b->x2);
-    v[1].vector[1] = F (b->y1);
-    v[1].vector[2] = F (1);
-
-    v[2].vector[0] = F (b->x2);
-    v[2].vector[1] = F (b->y2);
-    v[2].vector[2] = F (1);
-
-    v[3].vector[0] = F (b->x1);
-    v[3].vector[1] = F (b->y2);
-    v[3].vector[2] = F (1);
-
-    for (i = 0; i < 4; i++)
-    {
-	if (!pixman_transform_point (matrix, &v[i]))
-	    return FALSE;
-
-	x1 = pixman_fixed_to_int (v[i].vector[0]);
-	y1 = pixman_fixed_to_int (v[i].vector[1]);
-	x2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[0]));
-	y2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[1]));
-
-	if (i == 0)
-	{
-	    b->x1 = x1;
-	    b->y1 = y1;
-	    b->x2 = x2;
-	    b->y2 = y2;
-	}
-	else
-	{
-	    if (x1 < b->x1) b->x1 = x1;
-	    if (y1 < b->y1) b->y1 = y1;
-	    if (x2 > b->x2) b->x2 = x2;
-	    if (y2 > b->y2) b->y2 = y2;
-	}
-    }
-
-    return TRUE;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_invert (struct pixman_transform *      dst,
-                         const struct pixman_transform *src)
-{
-    struct pixman_f_transform m;
-
-    pixman_f_transform_from_pixman_transform (&m, src);
-
-    if (!pixman_f_transform_invert (&m, &m))
-	return FALSE;
-
-    if (!pixman_transform_from_pixman_f_transform (dst, &m))
-	return FALSE;
-
-    return TRUE;
-}
-
-static pixman_bool_t
-within_epsilon (pixman_fixed_t a,
-                pixman_fixed_t b,
-                pixman_fixed_t epsilon)
-{
-    pixman_fixed_t t = a - b;
-
-    if (t < 0)
-	t = -t;
-
-    return t <= epsilon;
-}
-
-#define EPSILON (pixman_fixed_t) (2)
-
-#define IS_SAME(a, b) (within_epsilon (a, b, EPSILON))
-#define IS_ZERO(a)    (within_epsilon (a, 0, EPSILON))
-#define IS_ONE(a)     (within_epsilon (a, F (1), EPSILON))
-#define IS_UNIT(a)			    \
-    (within_epsilon (a, F (1), EPSILON) ||  \
-     within_epsilon (a, F (-1), EPSILON) || \
-     IS_ZERO (a))
-#define IS_INT(a)    (IS_ZERO (pixman_fixed_frac (a)))
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_is_identity (const struct pixman_transform *t)
-{
-    return (IS_SAME (t->matrix[0][0], t->matrix[1][1]) &&
-	    IS_SAME (t->matrix[0][0], t->matrix[2][2]) &&
-	    !IS_ZERO (t->matrix[0][0]) &&
-	    IS_ZERO (t->matrix[0][1]) &&
-	    IS_ZERO (t->matrix[0][2]) &&
-	    IS_ZERO (t->matrix[1][0]) &&
-	    IS_ZERO (t->matrix[1][2]) &&
-	    IS_ZERO (t->matrix[2][0]) &&
-	    IS_ZERO (t->matrix[2][1]));
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_is_scale (const struct pixman_transform *t)
-{
-    return (!IS_ZERO (t->matrix[0][0]) &&
-            IS_ZERO (t->matrix[0][1]) &&
-            IS_ZERO (t->matrix[0][2]) &&
-
-            IS_ZERO (t->matrix[1][0]) &&
-            !IS_ZERO (t->matrix[1][1]) &&
-            IS_ZERO (t->matrix[1][2]) &&
-
-            IS_ZERO (t->matrix[2][0]) &&
-            IS_ZERO (t->matrix[2][1]) &&
-            !IS_ZERO (t->matrix[2][2]));
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_is_int_translate (const struct pixman_transform *t)
-{
-    return (IS_ONE (t->matrix[0][0]) &&
-            IS_ZERO (t->matrix[0][1]) &&
-            IS_INT (t->matrix[0][2]) &&
-
-            IS_ZERO (t->matrix[1][0]) &&
-            IS_ONE (t->matrix[1][1]) &&
-            IS_INT (t->matrix[1][2]) &&
-
-            IS_ZERO (t->matrix[2][0]) &&
-            IS_ZERO (t->matrix[2][1]) &&
-            IS_ONE (t->matrix[2][2]));
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_is_inverse (const struct pixman_transform *a,
-                             const struct pixman_transform *b)
-{
-    struct pixman_transform t;
-
-    if (!pixman_transform_multiply (&t, a, b))
-	return FALSE;
-
-    return pixman_transform_is_identity (&t);
-}
-
-PIXMAN_EXPORT void
-pixman_f_transform_from_pixman_transform (struct pixman_f_transform *    ft,
-                                          const struct pixman_transform *t)
-{
-    int i, j;
-
-    for (j = 0; j < 3; j++)
-    {
-	for (i = 0; i < 3; i++)
-	    ft->m[j][i] = pixman_fixed_to_double (t->matrix[j][i]);
-    }
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_from_pixman_f_transform (struct pixman_transform *        t,
-                                          const struct pixman_f_transform *ft)
-{
-    int i, j;
-
-    for (j = 0; j < 3; j++)
-    {
-	for (i = 0; i < 3; i++)
-	{
-	    double d = ft->m[j][i];
-	    if (d < -32767.0 || d > 32767.0)
-		return FALSE;
-	    d = d * 65536.0 + 0.5;
-	    t->matrix[j][i] = (pixman_fixed_t) floor (d);
-	}
-    }
-    
-    return TRUE;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_f_transform_invert (struct pixman_f_transform *      dst,
-                           const struct pixman_f_transform *src)
-{
-    static const int a[3] = { 2, 2, 1 };
-    static const int b[3] = { 1, 0, 0 };
-    pixman_f_transform_t d;
-    double det;
-    int i, j;
-
-    det = 0;
-    for (i = 0; i < 3; i++)
-    {
-	double p;
-	int ai = a[i];
-	int bi = b[i];
-	p = src->m[i][0] * (src->m[ai][2] * src->m[bi][1] -
-	                    src->m[ai][1] * src->m[bi][2]);
-	if (i == 1)
-	    p = -p;
-	det += p;
-    }
-    
-    if (det == 0)
-	return FALSE;
-    
-    det = 1 / det;
-    for (j = 0; j < 3; j++)
-    {
-	for (i = 0; i < 3; i++)
-	{
-	    double p;
-	    int ai = a[i];
-	    int aj = a[j];
-	    int bi = b[i];
-	    int bj = b[j];
-
-	    p = (src->m[ai][aj] * src->m[bi][bj] -
-	         src->m[ai][bj] * src->m[bi][aj]);
-	    
-	    if (((i + j) & 1) != 0)
-		p = -p;
-	    
-	    d.m[j][i] = det * p;
-	}
-    }
-
-    *dst = d;
-
-    return TRUE;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_f_transform_point (const struct pixman_f_transform *t,
-                          struct pixman_f_vector *         v)
-{
-    struct pixman_f_vector result;
-    int i, j;
-    double a;
-
-    for (j = 0; j < 3; j++)
-    {
-	a = 0;
-	for (i = 0; i < 3; i++)
-	    a += t->m[j][i] * v->v[i];
-	result.v[j] = a;
-    }
-    
-    if (!result.v[2])
-	return FALSE;
-
-    for (j = 0; j < 2; j++)
-	v->v[j] = result.v[j] / result.v[2];
-
-    v->v[2] = 1;
-
-    return TRUE;
-}
-
-PIXMAN_EXPORT void
-pixman_f_transform_point_3d (const struct pixman_f_transform *t,
-                             struct pixman_f_vector *         v)
-{
-    struct pixman_f_vector result;
-    int i, j;
-    double a;
-
-    for (j = 0; j < 3; j++)
-    {
-	a = 0;
-	for (i = 0; i < 3; i++)
-	    a += t->m[j][i] * v->v[i];
-	result.v[j] = a;
-    }
-    
-    *v = result;
-}
-
-PIXMAN_EXPORT void
-pixman_f_transform_multiply (struct pixman_f_transform *      dst,
-                             const struct pixman_f_transform *l,
-                             const struct pixman_f_transform *r)
-{
-    struct pixman_f_transform d;
-    int dx, dy;
-    int o;
-
-    for (dy = 0; dy < 3; dy++)
-    {
-	for (dx = 0; dx < 3; dx++)
-	{
-	    double v = 0;
-	    for (o = 0; o < 3; o++)
-		v += l->m[dy][o] * r->m[o][dx];
-	    d.m[dy][dx] = v;
-	}
-    }
-    
-    *dst = d;
-}
-
-PIXMAN_EXPORT void
-pixman_f_transform_init_scale (struct pixman_f_transform *t,
-                               double                     sx,
-                               double                     sy)
-{
-    t->m[0][0] = sx;
-    t->m[0][1] = 0;
-    t->m[0][2] = 0;
-    t->m[1][0] = 0;
-    t->m[1][1] = sy;
-    t->m[1][2] = 0;
-    t->m[2][0] = 0;
-    t->m[2][1] = 0;
-    t->m[2][2] = 1;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_f_transform_scale (struct pixman_f_transform *forward,
-                          struct pixman_f_transform *reverse,
-                          double                     sx,
-                          double                     sy)
-{
-    struct pixman_f_transform t;
-
-    if (sx == 0 || sy == 0)
-	return FALSE;
-
-    if (forward)
-    {
-	pixman_f_transform_init_scale (&t, sx, sy);
-	pixman_f_transform_multiply (forward, &t, forward);
-    }
-    
-    if (reverse)
-    {
-	pixman_f_transform_init_scale (&t, 1 / sx, 1 / sy);
-	pixman_f_transform_multiply (reverse, reverse, &t);
-    }
-    
-    return TRUE;
-}
-
-PIXMAN_EXPORT void
-pixman_f_transform_init_rotate (struct pixman_f_transform *t,
-                                double                     c,
-                                double                     s)
-{
-    t->m[0][0] = c;
-    t->m[0][1] = -s;
-    t->m[0][2] = 0;
-    t->m[1][0] = s;
-    t->m[1][1] = c;
-    t->m[1][2] = 0;
-    t->m[2][0] = 0;
-    t->m[2][1] = 0;
-    t->m[2][2] = 1;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_f_transform_rotate (struct pixman_f_transform *forward,
-                           struct pixman_f_transform *reverse,
-                           double                     c,
-                           double                     s)
-{
-    struct pixman_f_transform t;
-
-    if (forward)
-    {
-	pixman_f_transform_init_rotate (&t, c, s);
-	pixman_f_transform_multiply (forward, &t, forward);
-    }
-    
-    if (reverse)
-    {
-	pixman_f_transform_init_rotate (&t, c, -s);
-	pixman_f_transform_multiply (reverse, reverse, &t);
-    }
-
-    return TRUE;
-}
-
-PIXMAN_EXPORT void
-pixman_f_transform_init_translate (struct pixman_f_transform *t,
-                                   double                     tx,
-                                   double                     ty)
-{
-    t->m[0][0] = 1;
-    t->m[0][1] = 0;
-    t->m[0][2] = tx;
-    t->m[1][0] = 0;
-    t->m[1][1] = 1;
-    t->m[1][2] = ty;
-    t->m[2][0] = 0;
-    t->m[2][1] = 0;
-    t->m[2][2] = 1;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_f_transform_translate (struct pixman_f_transform *forward,
-                              struct pixman_f_transform *reverse,
-                              double                     tx,
-                              double                     ty)
-{
-    struct pixman_f_transform t;
-
-    if (forward)
-    {
-	pixman_f_transform_init_translate (&t, tx, ty);
-	pixman_f_transform_multiply (forward, &t, forward);
-    }
-
-    if (reverse)
-    {
-	pixman_f_transform_init_translate (&t, -tx, -ty);
-	pixman_f_transform_multiply (reverse, reverse, &t);
-    }
-
-    return TRUE;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_f_transform_bounds (const struct pixman_f_transform *t,
-                           struct pixman_box16 *            b)
-{
-    struct pixman_f_vector v[4];
-    int i;
-    int x1, y1, x2, y2;
-
-    v[0].v[0] = b->x1;
-    v[0].v[1] = b->y1;
-    v[0].v[2] = 1;
-    v[1].v[0] = b->x2;
-    v[1].v[1] = b->y1;
-    v[1].v[2] = 1;
-    v[2].v[0] = b->x2;
-    v[2].v[1] = b->y2;
-    v[2].v[2] = 1;
-    v[3].v[0] = b->x1;
-    v[3].v[1] = b->y2;
-    v[3].v[2] = 1;
-
-    for (i = 0; i < 4; i++)
-    {
-	if (!pixman_f_transform_point (t, &v[i]))
-	    return FALSE;
-
-	x1 = floor (v[i].v[0]);
-	y1 = floor (v[i].v[1]);
-	x2 = ceil (v[i].v[0]);
-	y2 = ceil (v[i].v[1]);
-
-	if (i == 0)
-	{
-	    b->x1 = x1;
-	    b->y1 = y1;
-	    b->x2 = x2;
-	    b->y2 = y2;
-	}
-	else
-	{
-	    if (x1 < b->x1) b->x1 = x1;
-	    if (y1 < b->y1) b->y1 = y1;
-	    if (x2 > b->x2) b->x2 = x2;
-	    if (y2 > b->y2) b->y2 = y2;
-	}
-    }
-
-    return TRUE;
-}
-
-PIXMAN_EXPORT void
-pixman_f_transform_init_identity (struct pixman_f_transform *t)
-{
-    int i, j;
-
-    for (j = 0; j < 3; j++)
-    {
-	for (i = 0; i < 3; i++)
-	    t->m[j][i] = i == j ? 1 : 0;
-    }
-}
diff --git a/vendor/pixman/pixman/pixman-mips-dspr2-asm.S b/vendor/pixman/pixman/pixman-mips-dspr2-asm.S
deleted file mode 100644
index 9dad163b7..000000000
--- a/vendor/pixman/pixman/pixman-mips-dspr2-asm.S
+++ /dev/null
@@ -1,4283 +0,0 @@
-/*
- * Copyright (c) 2012
- *      MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Author:  Nemanja Lukic (nemanja.lukic@rt-rk.com)
- */
-
-#include "pixman-private.h"
-#include "pixman-mips-dspr2-asm.h"
-
-LEAF_MIPS_DSPR2(pixman_fill_buff16_mips)
-/*
- * a0 - *dest
- * a1 - count (bytes)
- * a2 - value to fill buffer with
- */
-
-    beqz     a1, 3f
-     andi    t1, a0, 0x0002
-    beqz     t1, 0f          /* check if address is 4-byte aligned */
-     nop
-    sh       a2, 0(a0)
-    addiu    a0, a0, 2
-    addiu    a1, a1, -2
-0:
-    srl      t1, a1, 5       /* t1 how many multiples of 32 bytes */
-    replv.ph a2, a2          /* replicate fill value (16bit) in a2 */
-    beqz     t1, 2f
-     nop
-1:
-    addiu    t1, t1, -1
-    beqz     t1, 11f
-     addiu   a1, a1, -32
-    pref     30, 32(a0)
-    sw       a2, 0(a0)
-    sw       a2, 4(a0)
-    sw       a2, 8(a0)
-    sw       a2, 12(a0)
-    sw       a2, 16(a0)
-    sw       a2, 20(a0)
-    sw       a2, 24(a0)
-    sw       a2, 28(a0)
-    b        1b
-     addiu   a0, a0, 32
-11:
-    sw       a2, 0(a0)
-    sw       a2, 4(a0)
-    sw       a2, 8(a0)
-    sw       a2, 12(a0)
-    sw       a2, 16(a0)
-    sw       a2, 20(a0)
-    sw       a2, 24(a0)
-    sw       a2, 28(a0)
-    addiu    a0, a0, 32
-2:
-    blez     a1, 3f
-     addiu   a1, a1, -2
-    sh       a2, 0(a0)
-    b        2b
-     addiu   a0, a0, 2
-3:
-    jr       ra
-     nop
-
-END(pixman_fill_buff16_mips)
-
-LEAF_MIPS32R2(pixman_fill_buff32_mips)
-/*
- * a0 - *dest
- * a1 - count (bytes)
- * a2 - value to fill buffer with
- */
-
-    beqz     a1, 3f
-     nop
-    srl      t1, a1, 5 /* t1 how many multiples of 32 bytes */
-    beqz     t1, 2f
-     nop
-1:
-    addiu    t1, t1, -1
-    beqz     t1, 11f
-     addiu   a1, a1, -32
-    pref     30, 32(a0)
-    sw       a2, 0(a0)
-    sw       a2, 4(a0)
-    sw       a2, 8(a0)
-    sw       a2, 12(a0)
-    sw       a2, 16(a0)
-    sw       a2, 20(a0)
-    sw       a2, 24(a0)
-    sw       a2, 28(a0)
-    b        1b
-     addiu   a0, a0, 32
-11:
-    sw       a2, 0(a0)
-    sw       a2, 4(a0)
-    sw       a2, 8(a0)
-    sw       a2, 12(a0)
-    sw       a2, 16(a0)
-    sw       a2, 20(a0)
-    sw       a2, 24(a0)
-    sw       a2, 28(a0)
-    addiu    a0, a0, 32
-2:
-    blez     a1, 3f
-     addiu   a1, a1, -4
-    sw       a2, 0(a0)
-    b        2b
-     addiu   a0, a0, 4
-3:
-    jr       ra
-     nop
-
-END(pixman_fill_buff32_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips)
-/*
- * a0 - dst (r5g6b5)
- * a1 - src (a8r8g8b8)
- * a2 - w
- */
-
-    beqz     a2, 3f
-     nop
-    addiu    t1, a2, -1
-    beqz     t1, 2f
-     nop
-    li       t4, 0xf800f800
-    li       t5, 0x07e007e0
-    li       t6, 0x001f001f
-1:
-    lw       t0, 0(a1)
-    lw       t1, 4(a1)
-    addiu    a1, a1, 8
-    addiu    a2, a2, -2
-
-    CONVERT_2x8888_TO_2x0565 t0, t1, t2, t3, t4, t5, t6, t7, t8
-
-    sh       t2, 0(a0)
-    sh       t3, 2(a0)
-
-    addiu    t2, a2, -1
-    bgtz     t2, 1b
-     addiu   a0, a0, 4
-2:
-    beqz     a2, 3f
-     nop
-    lw       t0, 0(a1)
-
-    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
-
-    sh       t1, 0(a0)
-3:
-    j        ra
-     nop
-
-END(pixman_composite_src_8888_0565_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_src_0565_8888_asm_mips)
-/*
- * a0 - dst (a8r8g8b8)
- * a1 - src (r5g6b5)
- * a2 - w
- */
-
-    beqz     a2, 3f
-     nop
-    addiu    t1, a2, -1
-    beqz     t1, 2f
-     nop
-    li       t4, 0x07e007e0
-    li       t5, 0x001F001F
-1:
-    lhu      t0, 0(a1)
-    lhu      t1, 2(a1)
-    addiu    a1, a1, 4
-    addiu    a2, a2, -2
-
-    CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
-
-    sw       t2, 0(a0)
-    sw       t3, 4(a0)
-
-    addiu    t2, a2, -1
-    bgtz     t2, 1b
-     addiu   a0, a0, 8
-2:
-    beqz     a2, 3f
-     nop
-    lhu      t0, 0(a1)
-
-    CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3
-
-    sw       t1, 0(a0)
-3:
-    j        ra
-     nop
-
-END(pixman_composite_src_0565_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips)
-/*
- * a0 - dst (a8r8g8b8)
- * a1 - src (x8r8g8b8)
- * a2 - w
- */
-
-    beqz     a2, 4f
-     nop
-    li       t9, 0xff000000
-    srl      t8, a2, 3    /* t1 = how many multiples of 8 src pixels */
-    beqz     t8, 3f       /* branch if less than 8 src pixels */
-     nop
-1:
-    addiu    t8, t8, -1
-    beqz     t8, 2f
-     addiu   a2, a2, -8
-    pref     0, 32(a1)
-    lw       t0, 0(a1)
-    lw       t1, 4(a1)
-    lw       t2, 8(a1)
-    lw       t3, 12(a1)
-    lw       t4, 16(a1)
-    lw       t5, 20(a1)
-    lw       t6, 24(a1)
-    lw       t7, 28(a1)
-    addiu    a1, a1, 32
-    or       t0, t0, t9
-    or       t1, t1, t9
-    or       t2, t2, t9
-    or       t3, t3, t9
-    or       t4, t4, t9
-    or       t5, t5, t9
-    or       t6, t6, t9
-    or       t7, t7, t9
-    pref     30, 32(a0)
-    sw       t0, 0(a0)
-    sw       t1, 4(a0)
-    sw       t2, 8(a0)
-    sw       t3, 12(a0)
-    sw       t4, 16(a0)
-    sw       t5, 20(a0)
-    sw       t6, 24(a0)
-    sw       t7, 28(a0)
-    b        1b
-     addiu   a0, a0, 32
-2:
-    lw       t0, 0(a1)
-    lw       t1, 4(a1)
-    lw       t2, 8(a1)
-    lw       t3, 12(a1)
-    lw       t4, 16(a1)
-    lw       t5, 20(a1)
-    lw       t6, 24(a1)
-    lw       t7, 28(a1)
-    addiu    a1, a1, 32
-    or       t0, t0, t9
-    or       t1, t1, t9
-    or       t2, t2, t9
-    or       t3, t3, t9
-    or       t4, t4, t9
-    or       t5, t5, t9
-    or       t6, t6, t9
-    or       t7, t7, t9
-    sw       t0, 0(a0)
-    sw       t1, 4(a0)
-    sw       t2, 8(a0)
-    sw       t3, 12(a0)
-    sw       t4, 16(a0)
-    sw       t5, 20(a0)
-    sw       t6, 24(a0)
-    sw       t7, 28(a0)
-    beqz     a2, 4f
-     addiu   a0, a0, 32
-3:
-    lw       t0, 0(a1)
-    addiu    a1, a1, 4
-    addiu    a2, a2, -1
-    or       t1, t0, t9
-    sw       t1, 0(a0)
-    bnez     a2, 3b
-     addiu   a0, a0, 4
-4:
-    jr       ra
-     nop
-
-END(pixman_composite_src_x888_8888_asm_mips)
-
-#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-LEAF_MIPS_DSPR2(pixman_composite_src_0888_8888_rev_asm_mips)
-/*
- * a0 - dst (a8r8g8b8)
- * a1 - src (b8g8r8)
- * a2 - w
- */
-
-    beqz              a2, 6f
-     nop
-
-    lui               t8, 0xff00;
-    srl               t9, a2, 2   /* t9 = how many multiples of 4 src pixels */
-    beqz              t9, 4f      /* branch if less than 4 src pixels */
-     nop
-
-    li                t0, 0x1
-    li                t1, 0x2
-    li                t2, 0x3
-    andi              t3, a1, 0x3
-    beq               t3, t0, 1f
-     nop
-    beq               t3, t1, 2f
-     nop
-    beq               t3, t2, 3f
-     nop
-
-0:
-    beqz              t9, 4f
-     addiu            t9, t9, -1
-    lw                t0, 0(a1)            /* t0 = R2 | B1 | G1 | R1 */
-    lw                t1, 4(a1)            /* t1 = G3 | R3 | B2 | G2 */
-    lw                t2, 8(a1)            /* t2 = B4 | G4 | R4 | B3 */
-
-    addiu             a1, a1, 12
-    addiu             a2, a2, -4
-
-    wsbh              t0, t0               /* t0 = B1 | R2 | R1 | G1 */
-    wsbh              t1, t1               /* t1 = R3 | G3 | G2 | B2 */
-    wsbh              t2, t2               /* t2 = G4 | B4 | B3 | R4 */
-
-    packrl.ph         t3, t1, t0           /* t3 = G2 | B2 | B1 | R2 */
-    packrl.ph         t4, t0, t0           /* t4 = R1 | G1 | B1 | R2 */
-    rotr              t3, t3, 16           /* t3 = B1 | R2 | G2 | B2 */
-    or                t3, t3, t8           /* t3 = FF | R2 | G2 | B2 */
-    srl               t4, t4, 8            /* t4 =  0 | R1 | G1 | B1 */
-    or                t4, t4, t8           /* t4 = FF | R1 | G1 | B1 */
-    packrl.ph         t5, t2, t1           /* t5 = B3 | R4 | R3 | G3 */
-    rotr              t5, t5, 24           /* t5 = R4 | R3 | G3 | B3 */
-    or                t5, t5, t8           /* t5 = FF | R3 | G3 | B3 */
-    rotr              t2, t2, 16           /* t2 = B3 | R4 | G4 | B4 */
-    or                t2, t2, t8           /* t5 = FF | R3 | G3 | B3 */
-
-    sw                t4, 0(a0)
-    sw                t3, 4(a0)
-    sw                t5, 8(a0)
-    sw                t2, 12(a0)
-    b                 0b
-     addiu            a0, a0, 16
-
-1:
-    lbu               t6, 0(a1)            /* t6 =  0 |  0 |  0 | R1 */
-    lhu               t7, 1(a1)            /* t7 =  0 |  0 | B1 | G1 */
-    sll               t6, t6, 16           /* t6 =  0 | R1 |  0 | 0  */
-    wsbh              t7, t7               /* t7 =  0 |  0 | G1 | B1 */
-    or                t7, t6, t7           /* t7 =  0 | R1 | G1 | B1 */
-11:
-    beqz              t9, 4f
-     addiu            t9, t9, -1
-    lw                t0, 3(a1)            /* t0 = R3 | B2 | G2 | R2 */
-    lw                t1, 7(a1)            /* t1 = G4 | R4 | B3 | G3 */
-    lw                t2, 11(a1)           /* t2 = B5 | G5 | R5 | B4 */
-
-    addiu             a1, a1, 12
-    addiu             a2, a2, -4
-
-    wsbh              t0, t0               /* t0 = B2 | R3 | R2 | G2 */
-    wsbh              t1, t1               /* t1 = R4 | G4 | G3 | B3 */
-    wsbh              t2, t2               /* t2 = G5 | B5 | B4 | R5 */
-
-    packrl.ph         t3, t1, t0           /* t3 = G3 | B3 | B2 | R3 */
-    packrl.ph         t4, t2, t1           /* t4 = B4 | R5 | R4 | G4 */
-    rotr              t0, t0, 24           /* t0 = R3 | R2 | G2 | B2 */
-    rotr              t3, t3, 16           /* t3 = B2 | R3 | G3 | B3 */
-    rotr              t4, t4, 24           /* t4 = R5 | R4 | G4 | B4 */
-    or                t7, t7, t8           /* t7 = FF | R1 | G1 | B1 */
-    or                t0, t0, t8           /* t0 = FF | R2 | G2 | B2 */
-    or                t3, t3, t8           /* t1 = FF | R3 | G3 | B3 */
-    or                t4, t4, t8           /* t3 = FF | R4 | G4 | B4 */
-
-    sw                t7, 0(a0)
-    sw                t0, 4(a0)
-    sw                t3, 8(a0)
-    sw                t4, 12(a0)
-    rotr              t7, t2, 16           /* t7 = xx | R5 | G5 | B5 */
-    b                 11b
-     addiu            a0, a0, 16
-
-2:
-    lhu               t7, 0(a1)            /* t7 =  0 |  0 | G1 | R1 */
-    wsbh              t7, t7               /* t7 =  0 |  0 | R1 | G1 */
-21:
-    beqz              t9, 4f
-     addiu            t9, t9, -1
-    lw                t0, 2(a1)            /* t0 = B2 | G2 | R2 | B1 */
-    lw                t1, 6(a1)            /* t1 = R4 | B3 | G3 | R3 */
-    lw                t2, 10(a1)           /* t2 = G5 | R5 | B4 | G4 */
-
-    addiu             a1, a1, 12
-    addiu             a2, a2, -4
-
-    wsbh              t0, t0               /* t0 = G2 | B2 | B1 | R2 */
-    wsbh              t1, t1               /* t1 = B3 | R4 | R3 | G3 */
-    wsbh              t2, t2               /* t2 = R5 | G5 | G4 | B4 */
-
-    precr_sra.ph.w    t7, t0, 0            /* t7 = R1 | G1 | B1 | R2 */
-    rotr              t0, t0, 16           /* t0 = B1 | R2 | G2 | B2 */
-    packrl.ph         t3, t2, t1           /* t3 = G4 | B4 | B3 | R4 */
-    rotr              t1, t1, 24           /* t1 = R4 | R3 | G3 | B3 */
-    srl               t7, t7, 8            /* t7 =  0 | R1 | G1 | B1 */
-    rotr              t3, t3, 16           /* t3 = B3 | R4 | G4 | B4 */
-    or                t7, t7, t8           /* t7 = FF | R1 | G1 | B1 */
-    or                t0, t0, t8           /* t0 = FF | R2 | G2 | B2 */
-    or                t1, t1, t8           /* t1 = FF | R3 | G3 | B3 */
-    or                t3, t3, t8           /* t3 = FF | R4 | G4 | B4 */
-
-    sw                t7, 0(a0)
-    sw                t0, 4(a0)
-    sw                t1, 8(a0)
-    sw                t3, 12(a0)
-    srl               t7, t2, 16           /* t7 =  0 |  0 | R5 | G5 */
-    b                 21b
-     addiu            a0, a0, 16
-
-3:
-    lbu               t7, 0(a1)            /* t7 =  0 |  0 |  0 | R1 */
-31:
-    beqz              t9, 4f
-     addiu            t9, t9, -1
-    lw                t0, 1(a1)            /* t0 = G2 | R2 | B1 | G1 */
-    lw                t1, 5(a1)            /* t1 = B3 | G3 | R3 | B2 */
-    lw                t2, 9(a1)            /* t2 = R5 | B4 | G4 | R4 */
-
-    addiu             a1, a1, 12
-    addiu             a2, a2, -4
-
-    wsbh              t0, t0               /* t0 = R2 | G2 | G1 | B1 */
-    wsbh              t1, t1               /* t1 = G3 | B3 | B2 | R3 */
-    wsbh              t2, t2               /* t2 = B4 | R5 | R4 | G4 */
-
-    precr_sra.ph.w    t7, t0, 0            /* t7 = xx | R1 | G1 | B1 */
-    packrl.ph         t3, t1, t0           /* t3 = B2 | R3 | R2 | G2 */
-    rotr              t1, t1, 16           /* t1 = B2 | R3 | G3 | B3 */
-    rotr              t4, t2, 24           /* t4 = R5 | R4 | G4 | B4 */
-    rotr              t3, t3, 24           /* t3 = R3 | R2 | G2 | B2 */
-    or                t7, t7, t8           /* t7 = FF | R1 | G1 | B1 */
-    or                t3, t3, t8           /* t3 = FF | R2 | G2 | B2 */
-    or                t1, t1, t8           /* t1 = FF | R3 | G3 | B3 */
-    or                t4, t4, t8           /* t4 = FF | R4 | G4 | B4 */
-
-    sw                t7, 0(a0)
-    sw                t3, 4(a0)
-    sw                t1, 8(a0)
-    sw                t4, 12(a0)
-    srl               t7, t2, 16           /* t7 =  0 |  0 | xx | R5 */
-    b                 31b
-     addiu            a0, a0, 16
-
-4:
-    beqz              a2, 6f
-     nop
-5:
-    lbu               t0, 0(a1)            /* t0 =  0 | 0 | 0 | R */
-    lbu               t1, 1(a1)            /* t1 =  0 | 0 | 0 | G */
-    lbu               t2, 2(a1)            /* t2 =  0 | 0 | 0 | B */
-    addiu             a1, a1, 3
-
-    sll               t0, t0, 16           /* t2 =  0 | R | 0 | 0 */
-    sll               t1, t1, 8            /* t1 =  0 | 0 | G | 0 */
-
-    or                t2, t2, t1           /* t2 =  0 | 0 | G | B */
-    or                t2, t2, t0           /* t2 =  0 | R | G | B */
-    or                t2, t2, t8           /* t2 = FF | R | G | B */
-
-    sw                t2, 0(a0)
-    addiu             a2, a2, -1
-    bnez              a2, 5b
-     addiu            a0, a0, 4
-6:
-    j                 ra
-     nop
-
-END(pixman_composite_src_0888_8888_rev_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm_mips)
-/*
- * a0 - dst (r5g6b5)
- * a1 - src (b8g8r8)
- * a2 - w
- */
-
-    SAVE_REGS_ON_STACK 0, v0, v1
-    beqz              a2, 6f
-     nop
-
-    li                t6, 0xf800f800
-    li                t7, 0x07e007e0
-    li                t8, 0x001F001F
-    srl               t9, a2, 2   /* t9 = how many multiples of 4 src pixels */
-    beqz              t9, 4f      /* branch if less than 4 src pixels */
-     nop
-
-    li                t0, 0x1
-    li                t1, 0x2
-    li                t2, 0x3
-    andi              t3, a1, 0x3
-    beq               t3, t0, 1f
-     nop
-    beq               t3, t1, 2f
-     nop
-    beq               t3, t2, 3f
-     nop
-
-0:
-    beqz              t9, 4f
-     addiu            t9, t9, -1
-    lw                t0, 0(a1)            /* t0 = R2 | B1 | G1 | R1 */
-    lw                t1, 4(a1)            /* t1 = G3 | R3 | B2 | G2 */
-    lw                t2, 8(a1)            /* t2 = B4 | G4 | R4 | B3 */
-
-    addiu             a1, a1, 12
-    addiu             a2, a2, -4
-
-    wsbh              t0, t0               /* t0 = B1 | R2 | R1 | G1 */
-    wsbh              t1, t1               /* t1 = R3 | G3 | G2 | B2 */
-    wsbh              t2, t2               /* t2 = G4 | B4 | B3 | R4 */
-
-    packrl.ph         t3, t1, t0           /* t3 = G2 | B2 | B1 | R2 */
-    packrl.ph         t4, t0, t0           /* t4 = R1 | G1 | B1 | R2 */
-    rotr              t3, t3, 16           /* t3 = B1 | R2 | G2 | B2 */
-    srl               t4, t4, 8            /* t4 =  0 | R1 | G1 | B1 */
-    packrl.ph         t5, t2, t1           /* t5 = B3 | R4 | R3 | G3 */
-    rotr              t5, t5, 24           /* t5 = R4 | R3 | G3 | B3 */
-    rotr              t2, t2, 16           /* t2 = B3 | R4 | G4 | B4 */
-
-    CONVERT_2x8888_TO_2x0565 t4, t3, t4, t3, t6, t7, t8, v0, v1
-    CONVERT_2x8888_TO_2x0565 t5, t2, t5, t2, t6, t7, t8, v0, v1
-
-    sh                t4, 0(a0)
-    sh                t3, 2(a0)
-    sh                t5, 4(a0)
-    sh                t2, 6(a0)
-    b                 0b
-     addiu            a0, a0, 8
-
-1:
-    lbu               t4, 0(a1)            /* t4 =  0 |  0 |  0 | R1 */
-    lhu               t5, 1(a1)            /* t5 =  0 |  0 | B1 | G1 */
-    sll               t4, t4, 16           /* t4 =  0 | R1 |  0 | 0  */
-    wsbh              t5, t5               /* t5 =  0 |  0 | G1 | B1 */
-    or                t5, t4, t5           /* t5 =  0 | R1 | G1 | B1 */
-11:
-    beqz              t9, 4f
-     addiu            t9, t9, -1
-    lw                t0, 3(a1)            /* t0 = R3 | B2 | G2 | R2 */
-    lw                t1, 7(a1)            /* t1 = G4 | R4 | B3 | G3 */
-    lw                t2, 11(a1)           /* t2 = B5 | G5 | R5 | B4 */
-
-    addiu             a1, a1, 12
-    addiu             a2, a2, -4
-
-    wsbh              t0, t0               /* t0 = B2 | R3 | R2 | G2 */
-    wsbh              t1, t1               /* t1 = R4 | G4 | G3 | B3 */
-    wsbh              t2, t2               /* t2 = G5 | B5 | B4 | R5 */
-
-    packrl.ph         t3, t1, t0           /* t3 = G3 | B3 | B2 | R3 */
-    packrl.ph         t4, t2, t1           /* t4 = B4 | R5 | R4 | G4 */
-    rotr              t0, t0, 24           /* t0 = R3 | R2 | G2 | B2 */
-    rotr              t3, t3, 16           /* t3 = B2 | R3 | G3 | B3 */
-    rotr              t4, t4, 24           /* t4 = R5 | R4 | G4 | B4 */
-
-    CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1
-    CONVERT_2x8888_TO_2x0565 t3, t4, t3, t4, t6, t7, t8, v0, v1
-
-    sh                t5, 0(a0)
-    sh                t0, 2(a0)
-    sh                t3, 4(a0)
-    sh                t4, 6(a0)
-    rotr              t5, t2, 16           /* t5 = xx | R5 | G5 | B5 */
-    b                 11b
-     addiu            a0, a0, 8
-
-2:
-    lhu               t5, 0(a1)            /* t5 =  0 |  0 | G1 | R1 */
-    wsbh              t5, t5               /* t5 =  0 |  0 | R1 | G1 */
-21:
-    beqz              t9, 4f
-     addiu            t9, t9, -1
-    lw                t0, 2(a1)            /* t0 = B2 | G2 | R2 | B1 */
-    lw                t1, 6(a1)            /* t1 = R4 | B3 | G3 | R3 */
-    lw                t2, 10(a1)           /* t2 = G5 | R5 | B4 | G4 */
-
-    addiu             a1, a1, 12
-    addiu             a2, a2, -4
-
-    wsbh              t0, t0               /* t0 = G2 | B2 | B1 | R2 */
-    wsbh              t1, t1               /* t1 = B3 | R4 | R3 | G3 */
-    wsbh              t2, t2               /* t2 = R5 | G5 | G4 | B4 */
-
-    precr_sra.ph.w    t5, t0, 0            /* t5 = R1 | G1 | B1 | R2 */
-    rotr              t0, t0, 16           /* t0 = B1 | R2 | G2 | B2 */
-    packrl.ph         t3, t2, t1           /* t3 = G4 | B4 | B3 | R4 */
-    rotr              t1, t1, 24           /* t1 = R4 | R3 | G3 | B3 */
-    srl               t5, t5, 8            /* t5 =  0 | R1 | G1 | B1 */
-    rotr              t3, t3, 16           /* t3 = B3 | R4 | G4 | B4 */
-
-    CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1
-    CONVERT_2x8888_TO_2x0565 t1, t3, t1, t3, t6, t7, t8, v0, v1
-
-    sh                t5, 0(a0)
-    sh                t0, 2(a0)
-    sh                t1, 4(a0)
-    sh                t3, 6(a0)
-    srl               t5, t2, 16           /* t5 =  0 |  0 | R5 | G5 */
-    b                 21b
-     addiu            a0, a0, 8
-
-3:
-    lbu               t5, 0(a1)            /* t5 =  0 |  0 |  0 | R1 */
-31:
-    beqz              t9, 4f
-     addiu            t9, t9, -1
-    lw                t0, 1(a1)            /* t0 = G2 | R2 | B1 | G1 */
-    lw                t1, 5(a1)            /* t1 = B3 | G3 | R3 | B2 */
-    lw                t2, 9(a1)            /* t2 = R5 | B4 | G4 | R4 */
-
-    addiu             a1, a1, 12
-    addiu             a2, a2, -4
-
-    wsbh              t0, t0               /* t0 = R2 | G2 | G1 | B1 */
-    wsbh              t1, t1               /* t1 = G3 | B3 | B2 | R3 */
-    wsbh              t2, t2               /* t2 = B4 | R5 | R4 | G4 */
-
-    precr_sra.ph.w    t5, t0, 0            /* t5 = xx | R1 | G1 | B1 */
-    packrl.ph         t3, t1, t0           /* t3 = B2 | R3 | R2 | G2 */
-    rotr              t1, t1, 16           /* t1 = B2 | R3 | G3 | B3 */
-    rotr              t4, t2, 24           /* t4 = R5 | R4 | G4 | B4 */
-    rotr              t3, t3, 24           /* t3 = R3 | R2 | G2 | B2 */
-
-    CONVERT_2x8888_TO_2x0565 t5, t3, t5, t3, t6, t7, t8, v0, v1
-    CONVERT_2x8888_TO_2x0565 t1, t4, t1, t4, t6, t7, t8, v0, v1
-
-    sh                t5, 0(a0)
-    sh                t3, 2(a0)
-    sh                t1, 4(a0)
-    sh                t4, 6(a0)
-    srl               t5, t2, 16           /* t5 =  0 |  0 | xx | R5 */
-    b                 31b
-     addiu            a0, a0, 8
-
-4:
-    beqz              a2, 6f
-     nop
-5:
-    lbu               t0, 0(a1)            /* t0 =  0 | 0 | 0 | R */
-    lbu               t1, 1(a1)            /* t1 =  0 | 0 | 0 | G */
-    lbu               t2, 2(a1)            /* t2 =  0 | 0 | 0 | B */
-    addiu             a1, a1, 3
-
-    sll               t0, t0, 16           /* t2 =  0 | R | 0 | 0 */
-    sll               t1, t1, 8            /* t1 =  0 | 0 | G | 0 */
-
-    or                t2, t2, t1           /* t2 =  0 | 0 | G | B */
-    or                t2, t2, t0           /* t2 =  0 | R | G | B */
-
-    CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5
-
-    sh                t3, 0(a0)
-    addiu             a2, a2, -1
-    bnez              a2, 5b
-     addiu            a0, a0, 2
-6:
-    RESTORE_REGS_FROM_STACK 0, v0, v1
-    j                 ra
-     nop
-
-END(pixman_composite_src_0888_0565_rev_asm_mips)
-#endif
-
-LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm_mips)
-/*
- * a0 - dst  (a8b8g8r8)
- * a1 - src  (a8r8g8b8)
- * a2 - w
- */
-
-    SAVE_REGS_ON_STACK 0, v0
-    li       v0, 0x00ff00ff
-
-    beqz     a2, 3f
-     nop
-    addiu    t1, a2, -1
-    beqz     t1, 2f
-     nop
-1:
-    lw       t0, 0(a1)
-    lw       t1, 4(a1)
-    addiu    a1, a1, 8
-    addiu    a2, a2, -2
-    srl      t2, t0, 24
-    srl      t3, t1, 24
-
-    MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9
-
-    sll      t0, t0, 8
-    sll      t1, t1, 8
-    andi     t2, t2, 0xff
-    andi     t3, t3, 0xff
-    or       t0, t0, t2
-    or       t1, t1, t3
-    wsbh     t0, t0
-    wsbh     t1, t1
-    rotr     t0, t0, 16
-    rotr     t1, t1, 16
-    sw       t0, 0(a0)
-    sw       t1, 4(a0)
-
-    addiu    t2, a2, -1
-    bgtz     t2, 1b
-     addiu   a0, a0, 8
-2:
-    beqz     a2, 3f
-     nop
-    lw       t0, 0(a1)
-    srl      t1, t0, 24
-
-    MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5
-
-    sll      t0, t0, 8
-    andi     t1, t1, 0xff
-    or       t0, t0, t1
-    wsbh     t0, t0
-    rotr     t0, t0, 16
-    sw       t0, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 0, v0
-    j        ra
-     nop
-
-END(pixman_composite_src_pixbuf_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm_mips)
-/*
- * a0 - dst  (a8r8g8b8)
- * a1 - src  (a8r8g8b8)
- * a2 - w
- */
-
-    SAVE_REGS_ON_STACK 0, v0
-    li       v0, 0x00ff00ff
-
-    beqz     a2, 3f
-     nop
-    addiu    t1, a2, -1
-    beqz     t1, 2f
-     nop
-1:
-    lw       t0, 0(a1)
-    lw       t1, 4(a1)
-    addiu    a1, a1, 8
-    addiu    a2, a2, -2
-    srl      t2, t0, 24
-    srl      t3, t1, 24
-
-    MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9
-
-    sll      t0, t0, 8
-    sll      t1, t1, 8
-    andi     t2, t2, 0xff
-    andi     t3, t3, 0xff
-    or       t0, t0, t2
-    or       t1, t1, t3
-    rotr     t0, t0, 8
-    rotr     t1, t1, 8
-    sw       t0, 0(a0)
-    sw       t1, 4(a0)
-
-    addiu    t2, a2, -1
-    bgtz     t2, 1b
-     addiu   a0, a0, 8
-2:
-    beqz     a2, 3f
-     nop
-    lw       t0, 0(a1)
-    srl      t1, t0, 24
-
-    MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5
-
-    sll      t0, t0, 8
-    andi     t1, t1, 0xff
-    or       t0, t0, t1
-    rotr     t0, t0, 8
-    sw       t0, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 0, v0
-    j        ra
-     nop
-
-END(pixman_composite_src_rpixbuf_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips)
-/*
- * a0 - dst  (a8r8g8b8)
- * a1 - src  (32bit constant)
- * a2 - mask (a8)
- * a3 - w
- */
-
-
-    SAVE_REGS_ON_STACK 0, v0
-    li       v0, 0x00ff00ff
-
-    beqz     a3, 3f
-     nop
-    addiu    t1, a3, -1
-    beqz     t1, 2f
-     nop
-
-1:
-                       /* a1 = source      (32bit constant) */
-    lbu      t0, 0(a2) /* t2 = mask        (a8) */
-    lbu      t1, 1(a2) /* t3 = mask        (a8) */
-    addiu    a2, a2, 2
-
-    MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, t2, t3, v0, t4, t5, t6, t7, t8, t9
-
-    sw       t2, 0(a0)
-    sw       t3, 4(a0)
-    addiu    a3, a3, -2
-    addiu    t2, a3, -1
-    bgtz     t2, 1b
-     addiu   a0, a0, 8
-
-    beqz     a3, 3f
-     nop
-
-2:
-    lbu      t0, 0(a2)
-    addiu    a2, a2, 1
-
-    MIPS_UN8x4_MUL_UN8 a1, t0, t1, v0, t3, t4, t5
-
-    sw       t1, 0(a0)
-    addiu    a3, a3, -1
-    addiu    a0, a0, 4
-
-3:
-    RESTORE_REGS_FROM_STACK 0, v0
-    j        ra
-     nop
-
-END(pixman_composite_src_n_8_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm_mips)
-/*
- * a0 - dst  (a8)
- * a1 - src  (32bit constant)
- * a2 - mask (a8)
- * a3 - w
- */
-
-    li                t9, 0x00ff00ff
-    beqz              a3, 3f
-     nop
-    srl               t7, a3, 2   /* t7 = how many multiples of 4 dst pixels */
-    beqz              t7, 1f      /* branch if less than 4 src pixels */
-     nop
-
-    srl               t8, a1, 24
-    replv.ph          t8, t8
-
-0:
-    beqz              t7, 1f
-     addiu            t7, t7, -1
-    lbu               t0, 0(a2)
-    lbu               t1, 1(a2)
-    lbu               t2, 2(a2)
-    lbu               t3, 3(a2)
-
-    addiu             a2, a2, 4
-
-    precr_sra.ph.w    t1, t0, 0
-    precr_sra.ph.w    t3, t2, 0
-    precr.qb.ph       t0, t3, t1
-
-    muleu_s.ph.qbl    t2, t0, t8
-    muleu_s.ph.qbr    t3, t0, t8
-    shra_r.ph         t4, t2, 8
-    shra_r.ph         t5, t3, 8
-    and               t4, t4, t9
-    and               t5, t5, t9
-    addq.ph           t2, t2, t4
-    addq.ph           t3, t3, t5
-    shra_r.ph         t2, t2, 8
-    shra_r.ph         t3, t3, 8
-    precr.qb.ph       t2, t2, t3
-
-    sb                t2, 0(a0)
-    srl               t2, t2, 8
-    sb                t2, 1(a0)
-    srl               t2, t2, 8
-    sb                t2, 2(a0)
-    srl               t2, t2, 8
-    sb                t2, 3(a0)
-    addiu             a3, a3, -4
-    b                 0b
-     addiu            a0, a0, 4
-
-1:
-    beqz              a3, 3f
-     nop
-    srl               t8, a1, 24
-2:
-    lbu               t0, 0(a2)
-    addiu             a2, a2, 1
-
-    mul               t2, t0, t8
-    shra_r.ph         t3, t2, 8
-    andi              t3, t3, 0x00ff
-    addq.ph           t2, t2, t3
-    shra_r.ph         t2, t2, 8
-
-    sb                t2, 0(a0)
-    addiu             a3, a3, -1
-    bnez              a3, 2b
-     addiu            a0, a0, 1
-
-3:
-    j                 ra
-     nop
-
-END(pixman_composite_src_n_8_8_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
-/*
- * a0 - dst  (a8r8g8b8)
- * a1 - src  (32bit constant)
- * a2 - mask (a8r8g8b8)
- * a3 - w
- */
-
-    beqz         a3, 8f
-     nop
-    SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
-
-    li           t6, 0xff
-    addiu        t7, zero, -1 /* t7 = 0xffffffff */
-    srl          t8, a1, 24   /* t8 = srca */
-    li           t9, 0x00ff00ff
-
-    addiu        t1, a3, -1
-    beqz         t1, 4f       /* last pixel */
-     nop
-
-0:
-    lw           t0, 0(a2)    /* t0 = mask */
-    lw           t1, 4(a2)    /* t1 = mask */
-    addiu        a3, a3, -2   /* w = w - 2 */
-    or           t2, t0, t1
-    beqz         t2, 3f      /* if (t0 == 0) && (t1 == 0) */
-     addiu       a2, a2, 8
-    and          t2, t0, t1
-    beq          t2, t7, 1f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
-     nop
-
-//if(ma)
-    lw           t2, 0(a0)    /* t2 = dst */
-    lw           t3, 4(a0)    /* t3 = dst */
-    MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
-    MIPS_2xUN8x4_MUL_2xUN8   t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5
-    not          t0, t0
-    not          t1, t1
-    MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
-    addu_s.qb    t2, t4, t2
-    addu_s.qb    t3, t5, t3
-    sw           t2, 0(a0)
-    sw           t3, 4(a0)
-    addiu        t1, a3, -1
-    bgtz         t1, 0b
-     addiu       a0, a0, 8
-    b            4f
-     nop
-1:
-//if (t0 == 0xffffffff) && (t1 == 0xffffffff):
-    beq          t8, t6, 2f   /* if (srca == 0xff) */
-     nop
-    lw           t2, 0(a0)    /* t2 = dst */
-    lw           t3, 4(a0)    /* t3 = dst */
-    not          t0, a1
-    not          t1, a1
-    srl          t0, t0, 24
-    srl          t1, t1, 24
-    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
-    addu_s.qb    t2, a1, t2
-    addu_s.qb    t3, a1, t3
-    sw           t2, 0(a0)
-    sw           t3, 4(a0)
-    addiu        t1, a3, -1
-    bgtz         t1, 0b
-     addiu       a0, a0, 8
-    b            4f
-     nop
-2:
-    sw           a1, 0(a0)
-    sw           a1, 4(a0)
-3:
-    addiu        t1, a3, -1
-    bgtz         t1, 0b
-     addiu       a0, a0, 8
-
-4:
-    beqz         a3, 7f
-     nop
-                              /* a1 = src */
-    lw           t0, 0(a2)    /* t0 = mask */
-    beqz         t0, 7f       /* if (t0 == 0) */
-     nop
-    beq          t0, t7, 5f  /* if (t0 == 0xffffffff) */
-     nop
-//if(ma)
-    lw           t1, 0(a0)    /* t1 = dst */
-    MIPS_UN8x4_MUL_UN8x4  a1, t0, t2, t9, t3, t4, t5, s0
-    MIPS_UN8x4_MUL_UN8    t0, t8, t0, t9, t3, t4, t5
-    not          t0, t0
-    MIPS_UN8x4_MUL_UN8x4  t1, t0, t1, t9, t3, t4, t5, s0
-    addu_s.qb    t1, t2, t1
-    sw           t1, 0(a0)
-    RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
-    j            ra
-     nop
-5:
-//if (t0 == 0xffffffff)
-    beq          t8, t6, 6f   /* if (srca == 0xff) */
-     nop
-    lw           t1, 0(a0)    /* t1 = dst */
-    not          t0, a1
-    srl          t0, t0, 24
-    MIPS_UN8x4_MUL_UN8 t1, t0, t1, t9, t2, t3, t4
-    addu_s.qb    t1, a1, t1
-    sw           t1, 0(a0)
-    RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
-    j            ra
-     nop
-6:
-    sw           a1, 0(a0)
-7:
-    RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
-8:
-    j            ra
-     nop
-
-END(pixman_composite_over_n_8888_8888_ca_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
-/*
- * a0 - dst  (r5g6b5)
- * a1 - src  (32bit constant)
- * a2 - mask (a8r8g8b8)
- * a3 - w
- */
-
-    beqz         a3, 8f
-     nop
-    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
-
-    li           t6, 0xff
-    addiu        t7, zero, -1 /* t7 = 0xffffffff */
-    srl          t8, a1, 24   /* t8 = srca */
-    li           t9, 0x00ff00ff
-    li           s6, 0xf800f800
-    li           s7, 0x07e007e0
-    li           s8, 0x001F001F
-
-    addiu        t1, a3, -1
-    beqz         t1, 4f       /* last pixel */
-     nop
-
-0:
-    lw           t0, 0(a2)    /* t0 = mask */
-    lw           t1, 4(a2)    /* t1 = mask */
-    addiu        a3, a3, -2   /* w = w - 2 */
-    or           t2, t0, t1
-    beqz         t2, 3f      /* if (t0 == 0) && (t1 == 0) */
-     addiu       a2, a2, 8
-    and          t2, t0, t1
-    beq          t2, t7, 1f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
-     nop
-
-//if(ma)
-    lhu          t2, 0(a0)    /* t2 = dst */
-    lhu          t3, 2(a0)    /* t3 = dst */
-    MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
-    MIPS_2xUN8x4_MUL_2xUN8   t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5
-    not          t0, t0
-    not          t1, t1
-    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3
-    MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
-    addu_s.qb    t2, t4, t2
-    addu_s.qb    t3, t5, t3
-    CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1
-    sh           t2, 0(a0)
-    sh           t3, 2(a0)
-    addiu        t1, a3, -1
-    bgtz         t1, 0b
-     addiu       a0, a0, 4
-    b            4f
-     nop
-1:
-//if (t0 == 0xffffffff) && (t1 == 0xffffffff):
-    beq          t8, t6, 2f   /* if (srca == 0xff) */
-     nop
-    lhu          t2, 0(a0)    /* t2 = dst */
-    lhu          t3, 2(a0)    /* t3 = dst */
-    not          t0, a1
-    not          t1, a1
-    srl          t0, t0, 24
-    srl          t1, t1, 24
-    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3
-    MIPS_2xUN8x4_MUL_2xUN8   t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
-    addu_s.qb    t2, a1, t2
-    addu_s.qb    t3, a1, t3
-    CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1
-    sh           t2, 0(a0)
-    sh           t3, 2(a0)
-    addiu        t1, a3, -1
-    bgtz         t1, 0b
-     addiu       a0, a0, 4
-    b            4f
-     nop
-2:
-    CONVERT_1x8888_TO_1x0565 a1, t2, s0, s1
-    sh           t2, 0(a0)
-    sh           t2, 2(a0)
-3:
-    addiu        t1, a3, -1
-    bgtz         t1, 0b
-     addiu       a0, a0, 4
-
-4:
-    beqz         a3, 7f
-     nop
-                              /* a1 = src */
-    lw           t0, 0(a2)    /* t0 = mask */
-    beqz         t0, 7f       /* if (t0 == 0) */
-     nop
-    beq          t0, t7, 5f  /* if (t0 == 0xffffffff) */
-     nop
-//if(ma)
-    lhu          t1, 0(a0)    /* t1 = dst */
-    MIPS_UN8x4_MUL_UN8x4     a1, t0, t2, t9, t3, t4, t5, s0
-    MIPS_UN8x4_MUL_UN8       t0, t8, t0, t9, t3, t4, t5
-    not          t0, t0
-    CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3
-    MIPS_UN8x4_MUL_UN8x4     s1, t0, s1, t9, t3, t4, t5, s0
-    addu_s.qb    s1, t2, s1
-    CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2
-    sh           t1, 0(a0)
-    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
-    j            ra
-     nop
-5:
-//if (t0 == 0xffffffff)
-    beq          t8, t6, 6f   /* if (srca == 0xff) */
-     nop
-    lhu          t1, 0(a0)    /* t1 = dst */
-    not          t0, a1
-    srl          t0, t0, 24
-    CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3
-    MIPS_UN8x4_MUL_UN8       s1, t0, s1, t9, t2, t3, t4
-    addu_s.qb    s1, a1, s1
-    CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2
-    sh           t1, 0(a0)
-    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
-    j            ra
-     nop
-6:
-    CONVERT_1x8888_TO_1x0565 a1, t1, s0, s2
-    sh           t1, 0(a0)
-7:
-    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
-8:
-    j            ra
-     nop
-
-END(pixman_composite_over_n_8888_0565_ca_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8_asm_mips)
-/*
- * a0 - dst  (a8)
- * a1 - src  (32bit constant)
- * a2 - mask (a8)
- * a3 - w
- */
-
-    SAVE_REGS_ON_STACK 0, v0
-    li                t9, 0x00ff00ff
-    beqz              a3, 3f
-     nop
-    srl               v0, a3, 2   /* v0 = how many multiples of 4 dst pixels */
-    beqz              v0, 1f      /* branch if less than 4 src pixels */
-     nop
-
-    srl               t8, a1, 24
-    replv.ph          t8, t8
-
-0:
-    beqz              v0, 1f
-     addiu            v0, v0, -1
-    lbu               t0, 0(a2)
-    lbu               t1, 1(a2)
-    lbu               t2, 2(a2)
-    lbu               t3, 3(a2)
-    lbu               t4, 0(a0)
-    lbu               t5, 1(a0)
-    lbu               t6, 2(a0)
-    lbu               t7, 3(a0)
-
-    addiu             a2, a2, 4
-
-    precr_sra.ph.w    t1, t0, 0
-    precr_sra.ph.w    t3, t2, 0
-    precr_sra.ph.w    t5, t4, 0
-    precr_sra.ph.w    t7, t6, 0
-
-    precr.qb.ph       t0, t3, t1
-    precr.qb.ph       t1, t7, t5
-
-    muleu_s.ph.qbl    t2, t0, t8
-    muleu_s.ph.qbr    t3, t0, t8
-    shra_r.ph         t4, t2, 8
-    shra_r.ph         t5, t3, 8
-    and               t4, t4, t9
-    and               t5, t5, t9
-    addq.ph           t2, t2, t4
-    addq.ph           t3, t3, t5
-    shra_r.ph         t2, t2, 8
-    shra_r.ph         t3, t3, 8
-    precr.qb.ph       t0, t2, t3
-    not               t6, t0
-
-    preceu.ph.qbl     t7, t6
-    preceu.ph.qbr     t6, t6
-
-    muleu_s.ph.qbl    t2, t1, t7
-    muleu_s.ph.qbr    t3, t1, t6
-    shra_r.ph         t4, t2, 8
-    shra_r.ph         t5, t3, 8
-    and               t4, t4, t9
-    and               t5, t5, t9
-    addq.ph           t2, t2, t4
-    addq.ph           t3, t3, t5
-    shra_r.ph         t2, t2, 8
-    shra_r.ph         t3, t3, 8
-    precr.qb.ph       t1, t2, t3
-
-    addu_s.qb         t2, t0, t1
-
-    sb                t2, 0(a0)
-    srl               t2, t2, 8
-    sb                t2, 1(a0)
-    srl               t2, t2, 8
-    sb                t2, 2(a0)
-    srl               t2, t2, 8
-    sb                t2, 3(a0)
-    addiu             a3, a3, -4
-    b                 0b
-     addiu            a0, a0, 4
-
-1:
-    beqz              a3, 3f
-     nop
-    srl               t8, a1, 24
-2:
-    lbu               t0, 0(a2)
-    lbu               t1, 0(a0)
-    addiu             a2, a2, 1
-
-    mul               t2, t0, t8
-    shra_r.ph         t3, t2, 8
-    andi              t3, t3, 0x00ff
-    addq.ph           t2, t2, t3
-    shra_r.ph         t2, t2, 8
-    not               t3, t2
-    andi              t3, t3, 0x00ff
-
-
-    mul               t4, t1, t3
-    shra_r.ph         t5, t4, 8
-    andi              t5, t5, 0x00ff
-    addq.ph           t4, t4, t5
-    shra_r.ph         t4, t4, 8
-    andi              t4, t4, 0x00ff
-
-    addu_s.qb         t2, t2, t4
-    sb                t2, 0(a0)
-    addiu             a3, a3, -1
-    bnez              a3, 2b
-     addiu            a0, a0, 1
-
-3:
-    RESTORE_REGS_FROM_STACK 0, v0
-    j                 ra
-     nop
-
-END(pixman_composite_over_n_8_8_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips)
-/*
- * a0 - dst  (a8r8g8b8)
- * a1 - src  (32bit constant)
- * a2 - mask (a8)
- * a3 - w
- */
-
-    SAVE_REGS_ON_STACK 4, s0, s1, s2, s3, s4
-    beqz      a3, 4f
-     nop
-    li        t4, 0x00ff00ff
-    li        t5, 0xff
-    addiu     t0, a3, -1
-    beqz      t0, 3f         /* last pixel */
-     srl      t6, a1, 24     /* t6 = srca */
-    not       s4, a1
-    beq       t5, t6, 2f     /* if (srca == 0xff) */
-     srl      s4, s4, 24
-1:
-                             /* a1 = src */
-    lbu       t0, 0(a2)      /* t0 = mask */
-    lbu       t1, 1(a2)      /* t1 = mask */
-    or        t2, t0, t1
-    beqz      t2, 111f       /* if (t0 == 0) && (t1 == 0) */
-     addiu    a2, a2, 2
-    and       t3, t0, t1
-
-    lw        t2, 0(a0)      /* t2 = dst */
-    beq       t3, t5, 11f    /* if (t0 == 0xff) && (t1 == 0xff) */
-     lw       t3, 4(a0)      /* t3 = dst */
-
-    MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, s0, s1, t4, t6, t7, t8, t9, s2, s3
-    not       s2, s0
-    not       s3, s1
-    srl       s2, s2, 24
-    srl       s3, s3, 24
-    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s2, s3, t2, t3, t4, t0, t1, t6, t7, t8, t9
-    addu_s.qb s2, t2, s0
-    addu_s.qb s3, t3, s1
-    sw        s2, 0(a0)
-    b         111f
-     sw       s3, 4(a0)
-11:
-    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s4, s4, t2, t3, t4, t0, t1, t6, t7, t8, t9
-    addu_s.qb s2, t2, a1
-    addu_s.qb s3, t3, a1
-    sw        s2, 0(a0)
-    sw        s3, 4(a0)
-
-111:
-    addiu     a3, a3, -2
-    addiu     t0, a3, -1
-    bgtz      t0, 1b
-     addiu    a0, a0, 8
-    b         3f
-     nop
-2:
-                             /* a1 = src */
-    lbu       t0, 0(a2)      /* t0 = mask */
-    lbu       t1, 1(a2)      /* t1 = mask */
-    or        t2, t0, t1
-    beqz      t2, 222f       /* if (t0 == 0) && (t1 == 0) */
-     addiu    a2, a2, 2
-    and       t3, t0, t1
-    beq       t3, t5, 22f    /* if (t0 == 0xff) && (t1 == 0xff) */
-     nop
-    lw        t2, 0(a0)      /* t2 = dst */
-    lw        t3, 4(a0)      /* t3 = dst */
-
-    OVER_2x8888_2x8_2x8888 a1, a1, t0, t1, t2, t3, \
-                           t6, t7, t4, t8, t9, s0, s1, s2, s3
-    sw        t6, 0(a0)
-    b         222f
-     sw        t7, 4(a0)
-22:
-    sw        a1, 0(a0)
-    sw        a1, 4(a0)
-222:
-    addiu     a3, a3, -2
-    addiu     t0, a3, -1
-    bgtz      t0, 2b
-     addiu    a0, a0, 8
-3:
-    blez      a3, 4f
-     nop
-                             /* a1 = src */
-    lbu       t0, 0(a2)      /* t0 = mask */
-    beqz      t0, 4f         /* if (t0 == 0) */
-     addiu    a2, a2, 1
-    move      t3, a1
-    beq       t0, t5, 31f    /* if (t0 == 0xff) */
-     lw       t1, 0(a0)      /* t1 = dst */
-
-    MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t6, t7, t8
-31:
-    not       t2, t3
-    srl       t2, t2, 24
-    MIPS_UN8x4_MUL_UN8 t1, t2, t1, t4, t6, t7, t8
-    addu_s.qb t2, t1, t3
-    sw        t2, 0(a0)
-4:
-    RESTORE_REGS_FROM_STACK 4, s0, s1, s2, s3, s4
-    j         ra
-     nop
-
-END(pixman_composite_over_n_8_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
-/*
- * a0 - dst  (r5g6b5)
- * a1 - src  (32bit constant)
- * a2 - mask (a8)
- * a3 - w
- */
-    SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
-    beqz     a3, 4f
-     nop
-    li       t4, 0x00ff00ff
-    li       t5, 0xff
-    li       t6, 0xf800f800
-    li       t7, 0x07e007e0
-    li       t8, 0x001F001F
-    addiu    t1, a3, -1
-    beqz     t1, 3f         /* last pixel */
-     srl     t0, a1, 24     /* t0 = srca */
-    not      v0, a1
-    beq      t0, t5, 2f     /* if (srca == 0xff) */
-     srl     v0, v0, 24
-1:
-                            /* a1 = src */
-    lbu      t0, 0(a2)      /* t0 = mask */
-    lbu      t1, 1(a2)      /* t1 = mask */
-    or       t2, t0, t1
-    beqz     t2, 111f       /* if (t0 == 0) && (t1 == 0) */
-     addiu   a2, a2, 2
-    lhu      t2, 0(a0)      /* t2 = dst */
-    lhu      t3, 2(a0)      /* t3 = dst */
-    CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t7, t8, t9, s2, s3, s4
-    and      t9, t0, t1
-    beq      t9, t5, 11f    /* if (t0 == 0xff) && (t1 == 0xff) */
-     nop
-
-    MIPS_2xUN8x4_MUL_2xUN8   a1, a1, t0, t1, s2, s3, t4, t9, s4, s5, s6, s7, s8
-    not      s4, s2
-    not      s5, s3
-    srl      s4, s4, 24
-    srl      s5, s5, 24
-    MIPS_2xUN8x4_MUL_2xUN8   s0, s1, s4, s5, s0, s1, t4, t9, t0, t1, s6, s7, s8
-    addu_s.qb                s4, s2, s0
-    addu_s.qb                s5, s3, s1
-    CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1
-    sh       t2, 0(a0)
-    b        111f
-     sh      t3, 2(a0)
-11:
-    MIPS_2xUN8x4_MUL_2xUN8   s0, s1, v0, v0, s0, s1, t4, t9, t0, t1, s6, s7, s8
-    addu_s.qb                s4, a1, s0
-    addu_s.qb                s5, a1, s1
-    CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1
-    sh       t2, 0(a0)
-    sh       t3, 2(a0)
-111:
-    addiu    a3, a3, -2
-    addiu    t0, a3, -1
-    bgtz     t0, 1b
-     addiu   a0, a0, 4
-    b        3f
-     nop
-2:
-    CONVERT_1x8888_TO_1x0565 a1, s0, s1, s2
-21:
-                            /* a1 = src */
-    lbu      t0, 0(a2)      /* t0 = mask */
-    lbu      t1, 1(a2)      /* t1 = mask */
-    or       t2, t0, t1
-    beqz     t2, 222f       /* if (t0 == 0) && (t1 == 0) */
-     addiu   a2, a2, 2
-    and      t9, t0, t1
-    move     s2, s0
-    beq      t9, t5, 22f    /* if (t0 == 0xff) && (t2 == 0xff) */
-     move    s3, s0
-    lhu      t2, 0(a0)      /* t2 = dst */
-    lhu      t3, 2(a0)      /* t3 = dst */
-
-    CONVERT_2x0565_TO_2x8888 t2, t3, s2, s3, t7, t8, s4, s5, s6, s7
-    OVER_2x8888_2x8_2x8888   a1, a1, t0, t1, s2, s3, \
-                             t2, t3, t4, t9, s4, s5, s6, s7, s8
-    CONVERT_2x8888_TO_2x0565 t2, t3, s2, s3, t6, t7, t8, s4, s5
-22:
-    sh       s2, 0(a0)
-    sh       s3, 2(a0)
-222:
-    addiu    a3, a3, -2
-    addiu    t0, a3, -1
-    bgtz     t0, 21b
-     addiu   a0, a0, 4
-3:
-    blez      a3, 4f
-     nop
-                            /* a1 = src */
-    lbu      t0, 0(a2)      /* t0 = mask */
-    beqz     t0, 4f         /* if (t0 == 0) */
-     nop
-    lhu      t1, 0(a0)      /* t1 = dst */
-    CONVERT_1x0565_TO_1x8888 t1, t2, t3, t7
-    beq      t0, t5, 31f    /* if (t0 == 0xff) */
-     move    t3, a1
-
-    MIPS_UN8x4_MUL_UN8       a1, t0, t3, t4, t7, t8, t9
-31:
-    not      t6, t3
-    srl      t6, t6, 24
-    MIPS_UN8x4_MUL_UN8       t2, t6, t2, t4, t7, t8, t9
-    addu_s.qb                t1, t2, t3
-    CONVERT_1x8888_TO_1x0565 t1, t2, t3, t7
-    sh       t2, 0(a0)
-4:
-    RESTORE_REGS_FROM_STACK  24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
-    j        ra
-     nop
-
-END(pixman_composite_over_n_8_0565_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm_mips)
-/*
- * a0 - dst  (a8r8g8b8)
- * a1 - src  (a8r8g8b8)
- * a2 - mask (32bit constant)
- * a3 - w
- */
-
-    SAVE_REGS_ON_STACK 0, s0
-    li       t4, 0x00ff00ff
-    beqz     a3, 3f
-     nop
-    addiu    t1, a3, -1
-    srl      a2, a2, 24
-    beqz     t1, 2f
-     nop
-
-1:
-    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
-                       /* a2 = mask        (32bit constant) */
-    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
-    lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
-    addiu    a1, a1, 8
-
-    OVER_2x8888_2x8_2x8888 t0, t1, a2, a2, t2, t3, \
-                           t5, t6, t4, t7, t8, t9, t0, t1, s0
-
-    sw       t5, 0(a0)
-    sw       t6, 4(a0)
-    addiu    a3, a3, -2
-    addiu    t1, a3, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 8
-2:
-    beqz     a3, 3f
-     nop
-    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-                       /* a2 = mask        (32bit constant) */
-    lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
-
-    OVER_8888_8_8888 t0, a2, t1, t3, t4, t5, t6, t7, t8
-
-    sw       t3, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 0, s0
-    j        ra
-     nop
-
-END(pixman_composite_over_8888_n_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_0565_asm_mips)
-/*
- * a0 - dst  (r5g6b5)
- * a1 - src  (a8r8g8b8)
- * a2 - mask (32bit constant)
- * a3 - w
- */
-
-    SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
-    li       t6, 0x00ff00ff
-    li       t7, 0xf800f800
-    li       t8, 0x07e007e0
-    li       t9, 0x001F001F
-    beqz     a3, 3f
-     nop
-    srl      a2, a2, 24
-    addiu    t1, a3, -1
-    beqz     t1, 2f
-     nop
-1:
-    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
-                       /* a2 = mask        (32bit constant) */
-    lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
-    lhu      t3, 2(a0) /* t2 = destination (r5g6b5) */
-    addiu    a1, a1, 8
-
-    CONVERT_2x0565_TO_2x8888 t2, t3, t4, t5, t8, t9, s0, s1, t2, t3
-    OVER_2x8888_2x8_2x8888   t0, t1, a2, a2, t4, t5, \
-                             t2, t3, t6, t0, t1, s0, s1, s2, s3
-    CONVERT_2x8888_TO_2x0565 t2, t3, t4, t5, t7, t8, t9, s0, s1
-
-    sh       t4, 0(a0)
-    sh       t5, 2(a0)
-    addiu    a3, a3, -2
-    addiu    t1, a3, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 4
-2:
-    beqz     a3, 3f
-     nop
-    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-                       /* a2 = mask        (32bit constant) */
-    lhu      t1, 0(a0) /* t1 = destination (r5g6b5) */
-
-    CONVERT_1x0565_TO_1x8888 t1, t2, t4, t5
-    OVER_8888_8_8888         t0, a2, t2, t1, t6, t3, t4, t5, t7
-    CONVERT_1x8888_TO_1x0565 t1, t3, t4, t5
-
-    sh       t3, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
-    j                 ra
-     nop
-
-END(pixman_composite_over_8888_n_0565_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_over_0565_n_0565_asm_mips)
-/*
- * a0 - dst  (r5g6b5)
- * a1 - src  (r5g6b5)
- * a2 - mask (32bit constant)
- * a3 - w
- */
-
-    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5
-    li       t6, 0x00ff00ff
-    li       t7, 0xf800f800
-    li       t8, 0x07e007e0
-    li       t9, 0x001F001F
-    beqz     a3, 3f
-     nop
-    srl      a2, a2, 24
-    addiu    t1, a3, -1
-    beqz     t1, 2f
-     nop
-1:
-    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
-    lhu      t1, 2(a1) /* t1 = source      (r5g6b5) */
-                       /* a2 = mask        (32bit constant) */
-    lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
-    lhu      t3, 2(a0) /* t3 = destination (r5g6b5) */
-    addiu    a1, a1, 4
-
-    CONVERT_2x0565_TO_2x8888 t0, t1, t4, t5, t8, t9, s0, s1, s2, s3
-    CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t8, t9, s2, s3, s4, s5
-    OVER_2x8888_2x8_2x8888   t4, t5, a2, a2, s0, s1, \
-                             t0, t1, t6, s2, s3, s4, s5, t4, t5
-    CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t7, t8, t9, s2, s3
-
-    sh       s0, 0(a0)
-    sh       s1, 2(a0)
-    addiu    a3, a3, -2
-    addiu    t1, a3, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 4
-2:
-    beqz     a3, 3f
-     nop
-    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
-                       /* a2 = mask        (32bit constant) */
-    lhu      t1, 0(a0) /* t1 = destination (r5g6b5) */
-
-    CONVERT_1x0565_TO_1x8888 t0, t2, t4, t5
-    CONVERT_1x0565_TO_1x8888 t1, t3, t4, t5
-    OVER_8888_8_8888         t2, a2, t3, t0, t6, t1, t4, t5, t7
-    CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
-
-    sh       t3, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5
-    j        ra
-     nop
-
-END(pixman_composite_over_0565_n_0565_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm_mips)
-/*
- * a0 - dst  (a8r8g8b8)
- * a1 - src  (a8r8g8b8)
- * a2 - mask (a8)
- * a3 - w
- */
-
-    SAVE_REGS_ON_STACK 0, s0, s1
-    li       t4, 0x00ff00ff
-    beqz     a3, 3f
-     nop
-    addiu    t1, a3, -1
-    beqz     t1, 2f
-     nop
-1:
-    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
-    lbu      t2, 0(a2) /* t2 = mask        (a8) */
-    lbu      t3, 1(a2) /* t3 = mask        (a8) */
-    lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
-    lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
-    addiu    a1, a1, 8
-    addiu    a2, a2, 2
-
-    OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, \
-                           t7, t8, t4, t9, s0, s1, t0, t1, t2
-
-    sw       t7, 0(a0)
-    sw       t8, 4(a0)
-    addiu    a3, a3, -2
-    addiu    t1, a3, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 8
-2:
-    beqz     a3, 3f
-     nop
-    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-    lbu      t1, 0(a2) /* t1 = mask        (a8) */
-    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
-
-    OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8
-
-    sw       t3, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 0, s0, s1
-    j        ra
-     nop
-
-END(pixman_composite_over_8888_8_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_0565_asm_mips)
-/*
- * a0 - dst  (r5g6b5)
- * a1 - src  (a8r8g8b8)
- * a2 - mask (a8)
- * a3 - w
- */
-
-    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5
-    li       t6, 0x00ff00ff
-    li       t7, 0xf800f800
-    li       t8, 0x07e007e0
-    li       t9, 0x001F001F
-    beqz     a3, 3f
-     nop
-    addiu    t1, a3, -1
-    beqz     t1, 2f
-     nop
-1:
-    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
-    lbu      t2, 0(a2) /* t2 = mask        (a8) */
-    lbu      t3, 1(a2) /* t3 = mask        (a8) */
-    lhu      t4, 0(a0) /* t4 = destination (r5g6b5) */
-    lhu      t5, 2(a0) /* t5 = destination (r5g6b5) */
-    addiu    a1, a1, 8
-    addiu    a2, a2, 2
-
-    CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5
-    OVER_2x8888_2x8_2x8888   t0, t1, t2, t3, s0, s1, \
-                             t4, t5, t6, s2, s3, s4, s5, t0, t1
-    CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3
-
-    sh       s0, 0(a0)
-    sh       s1, 2(a0)
-    addiu    a3, a3, -2
-    addiu    t1, a3, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 4
-2:
-    beqz     a3, 3f
-     nop
-    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-    lbu      t1, 0(a2) /* t1 = mask        (a8) */
-    lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
-
-    CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5
-    OVER_8888_8_8888         t0, t1, t3, t2, t6, t4, t5, t7, t8
-    CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5
-
-    sh       t3, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5
-    j        ra
-     nop
-
-END(pixman_composite_over_8888_8_0565_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_over_0565_8_0565_asm_mips)
-/*
- * a0 - dst  (r5g6b5)
- * a1 - src  (r5g6b5)
- * a2 - mask (a8)
- * a3 - w
- */
-
-    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5
-    li       t4, 0xf800f800
-    li       t5, 0x07e007e0
-    li       t6, 0x001F001F
-    li       t7, 0x00ff00ff
-    beqz     a3, 3f
-     nop
-    addiu    t1, a3, -1
-    beqz     t1, 2f
-     nop
-1:
-    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
-    lhu      t1, 2(a1) /* t1 = source      (r5g6b5) */
-    lbu      t2, 0(a2) /* t2 = mask        (a8) */
-    lbu      t3, 1(a2) /* t3 = mask        (a8) */
-    lhu      t8, 0(a0) /* t8 = destination (r5g6b5) */
-    lhu      t9, 2(a0) /* t9 = destination (r5g6b5) */
-    addiu    a1, a1, 4
-    addiu    a2, a2, 2
-
-    CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
-    CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1
-    OVER_2x8888_2x8_2x8888   s0, s1, t2, t3, s2, s3, \
-                             t0, t1, t7, s4, s5, t8, t9, s0, s1
-    CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3
-
-    sh       s0, 0(a0)
-    sh       s1, 2(a0)
-    addiu    a3, a3, -2
-    addiu    t1, a3, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 4
-2:
-    beqz     a3, 3f
-     nop
-    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
-    lbu      t1, 0(a2) /* t1 = mask        (a8) */
-    lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
-
-    CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5
-    CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6
-    OVER_8888_8_8888         t3, t1, t4, t0, t7, t2, t5, t6, t8
-    CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
-
-    sh       t3, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5
-    j        ra
-     nop
-
-END(pixman_composite_over_0565_8_0565_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_8888_asm_mips)
-/*
- * a0 - dst  (a8r8g8b8)
- * a1 - src  (a8r8g8b8)
- * a2 - mask (a8r8g8b8)
- * a3 - w
- */
-
-    SAVE_REGS_ON_STACK 0, s0, s1, s2
-    li       t4, 0x00ff00ff
-    beqz     a3, 3f
-     nop
-    addiu    t1, a3, -1
-    beqz     t1, 2f
-     nop
-1:
-    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
-    lw       t2, 0(a2) /* t2 = mask        (a8r8g8b8) */
-    lw       t3, 4(a2) /* t3 = mask        (a8r8g8b8) */
-    lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
-    lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
-    addiu    a1, a1, 8
-    addiu    a2, a2, 8
-    srl      t2, t2, 24
-    srl      t3, t3, 24
-
-    OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, t7, t8, t4, t9, s0, s1, s2, t0, t1
-
-    sw       t7, 0(a0)
-    sw       t8, 4(a0)
-    addiu    a3, a3, -2
-    addiu    t1, a3, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 8
-2:
-    beqz     a3, 3f
-     nop
-    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-    lw       t1, 0(a2) /* t1 = mask        (a8r8g8b8) */
-    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
-    srl      t1, t1, 24
-
-    OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8
-
-    sw       t3, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
-    j        ra
-     nop
-
-END(pixman_composite_over_8888_8888_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips)
-/*
- * a0 - dst  (a8r8g8b8)
- * a1 - src  (a8r8g8b8)
- * a2 - w
- */
-
-    SAVE_REGS_ON_STACK 0, s0, s1, s2
-    li           t4, 0x00ff00ff
-    beqz         a2, 3f
-     nop
-    addiu        t1, a2, -1
-    beqz         t1, 2f
-     nop
-1:
-    lw           t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-    lw           t1, 4(a1) /* t1 = source      (a8r8g8b8) */
-    lw           t2, 0(a0) /* t2 = destination (a8r8g8b8) */
-    lw           t3, 4(a0) /* t3 = destination (a8r8g8b8) */
-    addiu        a1, a1, 8
-
-    not          t5, t0
-    srl          t5, t5, 24
-    not          t6, t1
-    srl          t6, t6, 24
-
-    or           t7, t5, t6
-    beqz         t7, 11f
-     or          t8, t0, t1
-    beqz         t8, 12f
-
-    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t5, t6, t7, t8, t4, t9, s0, s1, s2, t2, t3
-
-    addu_s.qb    t0, t7, t0
-    addu_s.qb    t1, t8, t1
-11:
-    sw           t0, 0(a0)
-    sw           t1, 4(a0)
-12:
-    addiu        a2, a2, -2
-    addiu        t1, a2, -1
-    bgtz         t1, 1b
-     addiu       a0, a0, 8
-2:
-    beqz         a2, 3f
-     nop
-
-    lw           t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-    lw           t1, 0(a0) /* t1 = destination (a8r8g8b8) */
-    addiu        a1, a1, 4
-
-    not          t2, t0
-    srl          t2, t2, 24
-
-    beqz         t2, 21f
-     nop
-    beqz         t0, 3f
-
-    MIPS_UN8x4_MUL_UN8 t1, t2, t3, t4, t5, t6, t7
-
-    addu_s.qb    t0, t3, t0
-21:
-    sw           t0, 0(a0)
-
-3:
-    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
-    j            ra
-     nop
-
-END(pixman_composite_over_8888_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_0565_asm_mips)
-/*
- * a0 - dst  (r5g6b5)
- * a1 - src  (a8r8g8b8)
- * a2 - w
- */
-
-    SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
-    li           t4, 0x00ff00ff
-    li           s3, 0xf800f800
-    li           s4, 0x07e007e0
-    li           s5, 0x001F001F
-    beqz         a2, 3f
-     nop
-    addiu        t1, a2, -1
-    beqz         t1, 2f
-     nop
-1:
-    lw           t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-    lw           t1, 4(a1) /* t1 = source      (a8r8g8b8) */
-    lhu          t2, 0(a0) /* t2 = destination (r5g6b5) */
-    lhu          t3, 2(a0) /* t3 = destination (r5g6b5) */
-    addiu        a1, a1, 8
-
-    not          t5, t0
-    srl          t5, t5, 24
-    not          t6, t1
-    srl          t6, t6, 24
-
-    or           t7, t5, t6
-    beqz         t7, 11f
-     or          t8, t0, t1
-    beqz         t8, 12f
-
-    CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, s4, s5, t7, t8, t9, s2
-    MIPS_2xUN8x4_MUL_2xUN8   s0, s1, t5, t6, t7, t8, t4, t9, t2, t3, s2, s0, s1
-
-    addu_s.qb    t0, t7, t0
-    addu_s.qb    t1, t8, t1
-11:
-    CONVERT_2x8888_TO_2x0565 t0, t1, t7, t8, s3, s4, s5, t2, t3
-    sh           t7, 0(a0)
-    sh           t8, 2(a0)
-12:
-    addiu        a2, a2, -2
-    addiu        t1, a2, -1
-    bgtz         t1, 1b
-     addiu       a0, a0, 4
-2:
-    beqz         a2, 3f
-     nop
-
-    lw           t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-    lhu          t1, 0(a0) /* t1 = destination (r5g6b5) */
-    addiu        a1, a1, 4
-
-    not          t2, t0
-    srl          t2, t2, 24
-
-    beqz         t2, 21f
-     nop
-    beqz         t0, 3f
-
-    CONVERT_1x0565_TO_1x8888 t1, s0, t8, t9
-    MIPS_UN8x4_MUL_UN8       s0, t2, t3, t4, t5, t6, t7
-
-    addu_s.qb    t0, t3, t0
-21:
-    CONVERT_1x8888_TO_1x0565 t0, s0, t8, t9
-    sh           s0, 0(a0)
-
-3:
-    RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
-    j            ra
-     nop
-
-END(pixman_composite_over_8888_0565_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips)
-/*
- * a0 - dst  (r5g6b5)
- * a1 - src  (32bit constant)
- * a2 - w
- */
-
-    beqz         a2, 5f
-     nop
-
-    not          t0, a1
-    srl          t0, t0, 24
-    bgtz         t0, 1f
-     nop
-    CONVERT_1x8888_TO_1x0565 a1, t1, t2, t3
-0:
-    sh           t1, 0(a0)
-    addiu        a2, a2, -1
-    bgtz         a2, 0b
-     addiu       a0, a0, 2
-    j            ra
-     nop
-
-1:
-    SAVE_REGS_ON_STACK 0, s0, s1, s2
-    li           t4, 0x00ff00ff
-    li           t5, 0xf800f800
-    li           t6, 0x07e007e0
-    li           t7, 0x001F001F
-    addiu        t1, a2, -1
-    beqz         t1, 3f
-     nop
-2:
-    lhu          t1, 0(a0) /* t1 = destination (r5g6b5) */
-    lhu          t2, 2(a0) /* t2 = destination (r5g6b5) */
-
-    CONVERT_2x0565_TO_2x8888 t1, t2, t3, t8, t6, t7, t9, s0, s1, s2
-    MIPS_2xUN8x4_MUL_2xUN8   t3, t8, t0, t0, t1, t2, t4, t9, s0, s1, s2, t3, t8
-    addu_s.qb                t1, t1, a1
-    addu_s.qb                t2, t2, a1
-    CONVERT_2x8888_TO_2x0565 t1, t2, t3, t8, t5, t6, t7, s0, s1
-
-    sh           t3, 0(a0)
-    sh           t8, 2(a0)
-
-    addiu        a2, a2, -2
-    addiu        t1, a2, -1
-    bgtz         t1, 2b
-     addiu       a0, a0, 4
-3:
-    beqz         a2, 4f
-     nop
-
-    lhu          t1, 0(a0) /* t1 = destination (r5g6b5) */
-
-    CONVERT_1x0565_TO_1x8888 t1, t2, s0, s1
-    MIPS_UN8x4_MUL_UN8       t2, t0, t1, t4, s0, s1, s2
-    addu_s.qb                t1, t1, a1
-    CONVERT_1x8888_TO_1x0565 t1, t2, s0, s1
-
-    sh           t2, 0(a0)
-
-4:
-    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
-5:
-    j            ra
-     nop
-
-END(pixman_composite_over_n_0565_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips)
-/*
- * a0 - dst  (a8r8g8b8)
- * a1 - src  (32bit constant)
- * a2 - w
- */
-
-    beqz         a2, 5f
-     nop
-
-    not          t0, a1
-    srl          t0, t0, 24
-    bgtz         t0, 1f
-     nop
-0:
-    sw           a1, 0(a0)
-    addiu        a2, a2, -1
-    bgtz         a2, 0b
-     addiu       a0, a0, 4
-    j            ra
-     nop
-
-1:
-    SAVE_REGS_ON_STACK 0, s0, s1, s2
-    li           t4, 0x00ff00ff
-    addiu        t1, a2, -1
-    beqz         t1, 3f
-     nop
-2:
-    lw           t2, 0(a0) /* t2 = destination (a8r8g8b8) */
-    lw           t3, 4(a0) /* t3 = destination (a8r8g8b8) */
-
-    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t0, t7, t8, t4, t9, s0, s1, s2, t2, t3
-
-    addu_s.qb    t7, t7, a1
-    addu_s.qb    t8, t8, a1
-
-    sw           t7, 0(a0)
-    sw           t8, 4(a0)
-
-    addiu        a2, a2, -2
-    addiu        t1, a2, -1
-    bgtz         t1, 2b
-     addiu       a0, a0, 8
-3:
-    beqz         a2, 4f
-     nop
-
-    lw           t1, 0(a0) /* t1 = destination (a8r8g8b8) */
-
-    MIPS_UN8x4_MUL_UN8 t1, t0, t3, t4, t5, t6, t7
-
-    addu_s.qb    t3, t3, a1
-
-    sw           t3, 0(a0)
-
-4:
-    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
-5:
-    j            ra
-     nop
-
-END(pixman_composite_over_n_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips)
-/*
- * a0 - dst  (a8)
- * a1 - src  (a8)
- * a2 - mask (a8)
- * a3 - w
- */
-
-    SAVE_REGS_ON_STACK 0, v0, v1
-    li                t9, 0x00ff00ff
-    beqz              a3, 3f
-     nop
-
-    srl               v0, a3, 2   /* v0 = how many multiples of 4 dst pixels */
-    beqz              v0, 1f      /* branch if less than 4 src pixels */
-     nop
-
-0:
-    beqz              v0, 1f
-     addiu            v0, v0, -1
-    lbu               t0, 0(a2)
-    lbu               t1, 1(a2)
-    lbu               t2, 2(a2)
-    lbu               t3, 3(a2)
-    lbu               t4, 0(a0)
-    lbu               t5, 1(a0)
-    lbu               t6, 2(a0)
-    lbu               t7, 3(a0)
-
-    addiu             a2, a2, 4
-
-    precr_sra.ph.w    t1, t0, 0
-    precr_sra.ph.w    t3, t2, 0
-    precr_sra.ph.w    t5, t4, 0
-    precr_sra.ph.w    t7, t6, 0
-
-    precr.qb.ph       t0, t3, t1
-    precr.qb.ph       t1, t7, t5
-
-    lbu               t4, 0(a1)
-    lbu               v1, 1(a1)
-    lbu               t7, 2(a1)
-    lbu               t8, 3(a1)
-
-    addiu             a1, a1, 4
-
-    precr_sra.ph.w    v1, t4, 0
-    precr_sra.ph.w    t8, t7, 0
-
-    muleu_s.ph.qbl    t2, t0, t8
-    muleu_s.ph.qbr    t3, t0, v1
-    shra_r.ph         t4, t2, 8
-    shra_r.ph         t5, t3, 8
-    and               t4, t4, t9
-    and               t5, t5, t9
-    addq.ph           t2, t2, t4
-    addq.ph           t3, t3, t5
-    shra_r.ph         t2, t2, 8
-    shra_r.ph         t3, t3, 8
-    precr.qb.ph       t0, t2, t3
-
-    addu_s.qb         t2, t0, t1
-
-    sb                t2, 0(a0)
-    srl               t2, t2, 8
-    sb                t2, 1(a0)
-    srl               t2, t2, 8
-    sb                t2, 2(a0)
-    srl               t2, t2, 8
-    sb                t2, 3(a0)
-    addiu             a3, a3, -4
-    b                 0b
-     addiu            a0, a0, 4
-
-1:
-    beqz              a3, 3f
-     nop
-2:
-    lbu               t8, 0(a1)
-    lbu               t0, 0(a2)
-    lbu               t1, 0(a0)
-    addiu             a1, a1, 1
-    addiu             a2, a2, 1
-
-    mul               t2, t0, t8
-    shra_r.ph         t3, t2, 8
-    andi              t3, t3, 0xff
-    addq.ph           t2, t2, t3
-    shra_r.ph         t2, t2, 8
-    andi              t2, t2, 0xff
-
-    addu_s.qb         t2, t2, t1
-    sb                t2, 0(a0)
-    addiu             a3, a3, -1
-    bnez              a3, 2b
-     addiu            a0, a0, 1
-
-3:
-    RESTORE_REGS_FROM_STACK 0, v0, v1
-    j                 ra
-     nop
-
-END(pixman_composite_add_8_8_8_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8_asm_mips)
-/*
- * a0 - dst  (a8)
- * a1 - src  (32bit constant)
- * a2 - mask (a8)
- * a3 - w
- */
-
-    SAVE_REGS_ON_STACK 0, v0
-    li                t9, 0x00ff00ff
-    beqz              a3, 3f
-     nop
-
-    srl               v0, a3, 2   /* v0 = how many multiples of 4 dst pixels */
-    beqz              v0, 1f      /* branch if less than 4 src pixels */
-     nop
-
-    srl               t8, a1, 24
-    replv.ph          t8, t8
-
-0:
-    beqz              v0, 1f
-     addiu            v0, v0, -1
-    lbu               t0, 0(a2)
-    lbu               t1, 1(a2)
-    lbu               t2, 2(a2)
-    lbu               t3, 3(a2)
-    lbu               t4, 0(a0)
-    lbu               t5, 1(a0)
-    lbu               t6, 2(a0)
-    lbu               t7, 3(a0)
-
-    addiu             a2, a2, 4
-
-    precr_sra.ph.w    t1, t0, 0
-    precr_sra.ph.w    t3, t2, 0
-    precr_sra.ph.w    t5, t4, 0
-    precr_sra.ph.w    t7, t6, 0
-
-    precr.qb.ph       t0, t3, t1
-    precr.qb.ph       t1, t7, t5
-
-    muleu_s.ph.qbl    t2, t0, t8
-    muleu_s.ph.qbr    t3, t0, t8
-    shra_r.ph         t4, t2, 8
-    shra_r.ph         t5, t3, 8
-    and               t4, t4, t9
-    and               t5, t5, t9
-    addq.ph           t2, t2, t4
-    addq.ph           t3, t3, t5
-    shra_r.ph         t2, t2, 8
-    shra_r.ph         t3, t3, 8
-    precr.qb.ph       t0, t2, t3
-
-    addu_s.qb         t2, t0, t1
-
-    sb                t2, 0(a0)
-    srl               t2, t2, 8
-    sb                t2, 1(a0)
-    srl               t2, t2, 8
-    sb                t2, 2(a0)
-    srl               t2, t2, 8
-    sb                t2, 3(a0)
-    addiu             a3, a3, -4
-    b                 0b
-     addiu            a0, a0, 4
-
-1:
-    beqz              a3, 3f
-     nop
-    srl               t8, a1, 24
-2:
-    lbu               t0, 0(a2)
-    lbu               t1, 0(a0)
-    addiu             a2, a2, 1
-
-    mul               t2, t0, t8
-    shra_r.ph         t3, t2, 8
-    andi              t3, t3, 0xff
-    addq.ph           t2, t2, t3
-    shra_r.ph         t2, t2, 8
-    andi              t2, t2, 0xff
-
-    addu_s.qb         t2, t2, t1
-    sb                t2, 0(a0)
-    addiu             a3, a3, -1
-    bnez              a3, 2b
-     addiu            a0, a0, 1
-
-3:
-    RESTORE_REGS_FROM_STACK 0, v0
-    j                 ra
-     nop
-
-END(pixman_composite_add_n_8_8_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm_mips)
-/*
- * a0 - dst  (a8r8g8b8)
- * a1 - src  (32bit constant)
- * a2 - mask (a8)
- * a3 - w
- */
-
-    SAVE_REGS_ON_STACK 0, s0, s1, s2
-    li       t4, 0x00ff00ff
-    beqz     a3, 3f
-     nop
-    addiu    t1, a3, -1
-    beqz     t1, 2f
-     nop
-1:
-                       /* a1 = source      (32bit constant) */
-    lbu      t0, 0(a2) /* t0 = mask        (a8) */
-    lbu      t1, 1(a2) /* t1 = mask        (a8) */
-    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
-    lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
-    addiu    a2, a2, 2
-
-    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 a1, a1, \
-                                       t0, t1, \
-                                       t2, t3, \
-                                       t5, t6, \
-                                       t4, t7, t8, t9, s0, s1, s2
-
-    sw       t5, 0(a0)
-    sw       t6, 4(a0)
-    addiu    a3, a3, -2
-    addiu    t1, a3, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 8
-2:
-    beqz     a3, 3f
-     nop
-                       /* a1 = source      (32bit constant) */
-    lbu      t0, 0(a2) /* t0 = mask        (a8) */
-    lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
-
-    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 a1, t0, t1, t2, t4, t3, t5, t6
-
-    sw       t2, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
-    j        ra
-     nop
-
-END(pixman_composite_add_n_8_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm_mips)
-/*
- * a0 - dst  (r5g6b5)
- * a1 - src  (r5g6b5)
- * a2 - mask (a8)
- * a3 - w
- */
-
-    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
-    li       t4, 0xf800f800
-    li       t5, 0x07e007e0
-    li       t6, 0x001F001F
-    li       t7, 0x00ff00ff
-    beqz     a3, 3f
-     nop
-    addiu    t1, a3, -1
-    beqz     t1, 2f
-     nop
-1:
-    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
-    lhu      t1, 2(a1) /* t1 = source      (r5g6b5) */
-    lbu      t2, 0(a2) /* t2 = mask        (a8) */
-    lbu      t3, 1(a2) /* t3 = mask        (a8) */
-    lhu      t8, 0(a0) /* t8 = destination (r5g6b5) */
-    lhu      t9, 2(a0) /* t9 = destination (r5g6b5) */
-    addiu    a1, a1, 4
-    addiu    a2, a2, 2
-
-    CONVERT_2x0565_TO_2x8888  t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
-    CONVERT_2x0565_TO_2x8888  t8, t9, s2, s3, t5, t6, s4, s5, s6, s7
-    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4  s0, s1, \
-                                        t2, t3, \
-                                        s2, s3, \
-                                        t0, t1, \
-                                        t7, s4, s5, s6, s7, t8, t9
-    CONVERT_2x8888_TO_2x0565  t0, t1, s0, s1, t4, t5, t6, s2, s3
-
-    sh       s0, 0(a0)
-    sh       s1, 2(a0)
-    addiu    a3, a3, -2
-    addiu    t1, a3, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 4
-2:
-    beqz     a3, 3f
-     nop
-    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
-    lbu      t1, 0(a2) /* t1 = mask        (a8) */
-    lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
-
-    CONVERT_1x0565_TO_1x8888  t0, t3, t4, t5
-    CONVERT_1x0565_TO_1x8888  t2, t4, t5, t6
-    MIPS_UN8x4_MUL_UN8_ADD_UN8x4  t3, t1, t4, t0, t7, t2, t5, t6
-    CONVERT_1x8888_TO_1x0565  t0, t3, t4, t5
-
-    sh       t3, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
-    j        ra
-     nop
-
-END(pixman_composite_add_0565_8_0565_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm_mips)
-/*
- * a0 - dst  (a8r8g8b8)
- * a1 - src  (a8r8g8b8)
- * a2 - mask (a8)
- * a3 - w
- */
-
-    SAVE_REGS_ON_STACK 0, s0, s1, s2
-    li       t4, 0x00ff00ff
-    beqz     a3, 3f
-     nop
-    addiu    t1, a3, -1
-    beqz     t1, 2f
-     nop
-1:
-    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
-    lbu      t2, 0(a2) /* t2 = mask        (a8) */
-    lbu      t3, 1(a2) /* t3 = mask        (a8) */
-    lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
-    lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
-    addiu    a1, a1, 8
-    addiu    a2, a2, 2
-
-    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
-                                       t2, t3, \
-                                       t5, t6, \
-                                       t7, t8, \
-                                       t4, t9, s0, s1, s2, t0, t1
-
-    sw       t7, 0(a0)
-    sw       t8, 4(a0)
-    addiu    a3, a3, -2
-    addiu    t1, a3, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 8
-2:
-    beqz     a3, 3f
-     nop
-    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-    lbu      t1, 0(a2) /* t1 = mask        (a8) */
-    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
-
-    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7
-
-    sw       t3, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
-    j        ra
-     nop
-
-END(pixman_composite_add_8888_8_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm_mips)
-/*
- * a0 - dst  (a8r8g8b8)
- * a1 - src  (a8r8g8b8)
- * a2 - mask (32bit constant)
- * a3 - w
- */
-
-    SAVE_REGS_ON_STACK 0, s0, s1, s2
-    li       t4, 0x00ff00ff
-    beqz     a3, 3f
-     nop
-    srl      a2, a2, 24
-    addiu    t1, a3, -1
-    beqz     t1, 2f
-     nop
-1:
-    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
-                       /* a2 = mask        (32bit constant) */
-    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
-    lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
-    addiu    a1, a1, 8
-
-    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
-                                       a2, a2, \
-                                       t2, t3, \
-                                       t5, t6, \
-                                       t4, t7, t8, t9, s0, s1, s2
-
-    sw       t5, 0(a0)
-    sw       t6, 4(a0)
-    addiu    a3, a3, -2
-    addiu    t1, a3, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 8
-2:
-    beqz     a3, 3f
-     nop
-    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-                       /* a2 = mask        (32bit constant) */
-    lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
-
-    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, a2, t1, t3, t4, t5, t6, t7
-
-    sw       t3, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
-    j        ra
-     nop
-
-END(pixman_composite_add_8888_n_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm_mips)
-/*
- * a0 - dst  (a8r8g8b8)
- * a1 - src  (a8r8g8b8)
- * a2 - mask (a8r8g8b8)
- * a3 - w
- */
-
-    SAVE_REGS_ON_STACK 0, s0, s1, s2
-    li       t4, 0x00ff00ff
-    beqz     a3, 3f
-     nop
-    addiu    t1, a3, -1
-    beqz     t1, 2f
-     nop
-1:
-    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
-    lw       t2, 0(a2) /* t2 = mask        (a8r8g8b8) */
-    lw       t3, 4(a2) /* t3 = mask        (a8r8g8b8) */
-    lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
-    lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
-    addiu    a1, a1, 8
-    addiu    a2, a2, 8
-    srl      t2, t2, 24
-    srl      t3, t3, 24
-
-    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
-                                       t2, t3, \
-                                       t5, t6, \
-                                       t7, t8, \
-                                       t4, t9, s0, s1, s2, t0, t1
-
-    sw       t7, 0(a0)
-    sw       t8, 4(a0)
-    addiu    a3, a3, -2
-    addiu    t1, a3, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 8
-2:
-    beqz     a3, 3f
-     nop
-    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
-    lw       t1, 0(a2) /* t1 = mask        (a8r8g8b8) */
-    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
-    srl      t1, t1, 24
-
-    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7
-
-    sw       t3, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
-    j        ra
-     nop
-
-END(pixman_composite_add_8888_8888_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_add_8_8_asm_mips)
-/*
- * a0 - dst  (a8)
- * a1 - src  (a8)
- * a2 - w
- */
-
-    beqz              a2, 3f
-     nop
-    srl               t9, a2, 2   /* t9 = how many multiples of 4 dst pixels */
-    beqz              t9, 1f      /* branch if less than 4 src pixels */
-     nop
-
-0:
-    beqz              t9, 1f
-     addiu            t9, t9, -1
-    lbu               t0, 0(a1)
-    lbu               t1, 1(a1)
-    lbu               t2, 2(a1)
-    lbu               t3, 3(a1)
-    lbu               t4, 0(a0)
-    lbu               t5, 1(a0)
-    lbu               t6, 2(a0)
-    lbu               t7, 3(a0)
-
-    addiu             a1, a1, 4
-
-    precr_sra.ph.w    t1, t0, 0
-    precr_sra.ph.w    t3, t2, 0
-    precr_sra.ph.w    t5, t4, 0
-    precr_sra.ph.w    t7, t6, 0
-
-    precr.qb.ph       t0, t3, t1
-    precr.qb.ph       t1, t7, t5
-
-    addu_s.qb         t2, t0, t1
-
-    sb                t2, 0(a0)
-    srl               t2, t2, 8
-    sb                t2, 1(a0)
-    srl               t2, t2, 8
-    sb                t2, 2(a0)
-    srl               t2, t2, 8
-    sb                t2, 3(a0)
-    addiu             a2, a2, -4
-    b                 0b
-     addiu            a0, a0, 4
-
-1:
-    beqz              a2, 3f
-     nop
-2:
-    lbu               t0, 0(a1)
-    lbu               t1, 0(a0)
-    addiu             a1, a1, 1
-
-    addu_s.qb         t2, t0, t1
-    sb                t2, 0(a0)
-    addiu             a2, a2, -1
-    bnez              a2, 2b
-     addiu            a0, a0, 1
-
-3:
-    j                 ra
-     nop
-
-END(pixman_composite_add_8_8_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips)
-/*
- * a0 - dst (a8r8g8b8)
- * a1 - src (a8r8g8b8)
- * a2 - w
- */
-
-    beqz         a2, 4f
-     nop
-
-    srl          t9, a2, 2      /* t1 = how many multiples of 4 src pixels */
-    beqz         t9, 3f         /* branch if less than 4 src pixels */
-     nop
-1:
-    addiu        t9, t9, -1
-    beqz         t9, 2f
-     addiu       a2, a2, -4
-
-    lw           t0, 0(a1)
-    lw           t1, 4(a1)
-    lw           t2, 8(a1)
-    lw           t3, 12(a1)
-    lw           t4, 0(a0)
-    lw           t5, 4(a0)
-    lw           t6, 8(a0)
-    lw           t7, 12(a0)
-    addiu        a1, a1, 16
-
-    addu_s.qb    t4, t4, t0
-    addu_s.qb    t5, t5, t1
-    addu_s.qb    t6, t6, t2
-    addu_s.qb    t7, t7, t3
-
-    sw           t4, 0(a0)
-    sw           t5, 4(a0)
-    sw           t6, 8(a0)
-    sw           t7, 12(a0)
-    b            1b
-     addiu       a0, a0, 16
-2:
-    lw           t0, 0(a1)
-    lw           t1, 4(a1)
-    lw           t2, 8(a1)
-    lw           t3, 12(a1)
-    lw           t4, 0(a0)
-    lw           t5, 4(a0)
-    lw           t6, 8(a0)
-    lw           t7, 12(a0)
-    addiu        a1, a1, 16
-
-    addu_s.qb    t4, t4, t0
-    addu_s.qb    t5, t5, t1
-    addu_s.qb    t6, t6, t2
-    addu_s.qb    t7, t7, t3
-
-    sw           t4, 0(a0)
-    sw           t5, 4(a0)
-    sw           t6, 8(a0)
-    sw           t7, 12(a0)
-
-    beqz         a2, 4f
-     addiu       a0, a0, 16
-3:
-    lw           t0, 0(a1)
-    lw           t1, 0(a0)
-    addiu        a1, a1, 4
-    addiu        a2, a2, -1
-    addu_s.qb    t1, t1, t0
-    sw           t1, 0(a0)
-    bnez         a2, 3b
-     addiu       a0, a0, 4
-4:
-    jr           ra
-     nop
-
-END(pixman_composite_add_8888_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm_mips)
-/*
- * a0 - dst  (r5g6b5)
- * a1 - src  (a8)
- * a2 - w
- */
-
-    beqz     a2, 4f
-     nop
-
-    SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
-    li       t2, 0xf800f800
-    li       t3, 0x07e007e0
-    li       t4, 0x001F001F
-    li       t5, 0x00ff00ff
-
-    addiu    t1, a2, -1
-    beqz     t1, 2f
-     nop
-1:
-    lbu      t0, 0(a1) /* t0 = source      (a8) */
-    lbu      t1, 1(a1) /* t1 = source      (a8) */
-    lhu      t6, 0(a0) /* t6 = destination (r5g6b5) */
-    lhu      t7, 2(a0) /* t7 = destination (r5g6b5) */
-    addiu    a1, a1, 2
-
-    not      t0, t0
-    not      t1, t1
-    andi     t0, 0xff  /* t0 = neg source1 */
-    andi     t1, 0xff  /* t1 = neg source2 */
-    CONVERT_2x0565_TO_2x8888 t6, t7, t8, t9, t3, t4, s0, s1, s2, s3
-    MIPS_2xUN8x4_MUL_2xUN8   t8, t9, t0, t1, t6, t7, t5, s0, s1, s2, s3, t8, t9
-    CONVERT_2x8888_TO_2x0565 t6, t7, t8, t9, t2, t3, t4, s0, s1
-
-    sh       t8, 0(a0)
-    sh       t9, 2(a0)
-    addiu    a2, a2, -2
-    addiu    t1, a2, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 4
-2:
-    beqz     a2, 3f
-     nop
-    lbu      t0, 0(a1) /* t0 = source      (a8) */
-    lhu      t1, 0(a0) /* t1 = destination (r5g6b5) */
-
-    not      t0, t0
-    andi     t0, 0xff  /* t0 = neg source */
-    CONVERT_1x0565_TO_1x8888 t1, t2, t3, t4
-    MIPS_UN8x4_MUL_UN8        t2, t0, t1, t5, t3, t4, t6
-    CONVERT_1x8888_TO_1x0565 t1, t2, t3, t4
-
-    sh       t2, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
-4:
-    j        ra
-     nop
-
-END(pixman_composite_out_reverse_8_0565_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm_mips)
-/*
- * a0 - dst  (a8r8g8b8)
- * a1 - src  (a8)
- * a2 - w
- */
-
-    beqz     a2, 3f
-     nop
-    li       t4, 0x00ff00ff
-    addiu    t1, a2, -1
-    beqz     t1, 2f
-     nop
-1:
-    lbu      t0, 0(a1) /* t0 = source      (a8) */
-    lbu      t1, 1(a1) /* t1 = source      (a8) */
-    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
-    lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
-    addiu    a1, a1, 2
-    not      t0, t0
-    not      t1, t1
-    andi     t0, 0xff  /* t0 = neg source */
-    andi     t1, 0xff  /* t1 = neg source */
-
-    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t5, t6, t4, t7, t8, t9, t2, t3, t0
-
-    sw       t5, 0(a0)
-    sw       t6, 4(a0)
-    addiu    a2, a2, -2
-    addiu    t1, a2, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 8
-2:
-    beqz     a2, 3f
-     nop
-    lbu      t0, 0(a1) /* t0 = source      (a8) */
-    lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
-    not      t0, t0
-    andi     t0, 0xff  /* t0 = neg source */
-
-    MIPS_UN8x4_MUL_UN8 t1, t0, t2, t4, t3, t5, t6
-
-    sw       t2, 0(a0)
-3:
-    j        ra
-     nop
-
-END(pixman_composite_out_reverse_8_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm_mips)
-/*
- * a0 - dst  (a8r8g8b8)
- * a1 - src  (32bit constant)
- * a2 - w
- */
-
-    beqz              a2, 5f
-     nop
-
-    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
-    li                t0, 0x00ff00ff
-    srl               t9, a2, 2   /* t9 = how many multiples of 4 src pixels */
-    beqz              t9, 2f      /* branch if less than 4 src pixels */
-     nop
-1:
-    beqz              t9, 2f
-     addiu            t9, t9, -1
-
-    lw                t1, 0(a0)
-    lw                t2, 4(a0)
-    lw                t3, 8(a0)
-    lw                t4, 12(a0)
-
-    addiu             a2, a2, -4
-
-    not               t5, t1
-    not               t6, t2
-    not               t7, t3
-    not               t8, t4
-    srl               t5, t5, 24
-    srl               t6, t6, 24
-    srl               t7, t7, 24
-    srl               t8, t8, 24
-    replv.ph          t5, t5
-    replv.ph          t6, t6
-    replv.ph          t7, t7
-    replv.ph          t8, t8
-    muleu_s.ph.qbl    s0, a1, t5
-    muleu_s.ph.qbr    s1, a1, t5
-    muleu_s.ph.qbl    s2, a1, t6
-    muleu_s.ph.qbr    s3, a1, t6
-    muleu_s.ph.qbl    s4, a1, t7
-    muleu_s.ph.qbr    s5, a1, t7
-    muleu_s.ph.qbl    s6, a1, t8
-    muleu_s.ph.qbr    s7, a1, t8
-
-    shra_r.ph         t5, s0, 8
-    shra_r.ph         t6, s1, 8
-    shra_r.ph         t7, s2, 8
-    shra_r.ph         t8, s3, 8
-    and               t5, t5, t0
-    and               t6, t6, t0
-    and               t7, t7, t0
-    and               t8, t8, t0
-    addq.ph           s0, s0, t5
-    addq.ph           s1, s1, t6
-    addq.ph           s2, s2, t7
-    addq.ph           s3, s3, t8
-    shra_r.ph         s0, s0, 8
-    shra_r.ph         s1, s1, 8
-    shra_r.ph         s2, s2, 8
-    shra_r.ph         s3, s3, 8
-    shra_r.ph         t5, s4, 8
-    shra_r.ph         t6, s5, 8
-    shra_r.ph         t7, s6, 8
-    shra_r.ph         t8, s7, 8
-    and               t5, t5, t0
-    and               t6, t6, t0
-    and               t7, t7, t0
-    and               t8, t8, t0
-    addq.ph           s4, s4, t5
-    addq.ph           s5, s5, t6
-    addq.ph           s6, s6, t7
-    addq.ph           s7, s7, t8
-    shra_r.ph         s4, s4, 8
-    shra_r.ph         s5, s5, 8
-    shra_r.ph         s6, s6, 8
-    shra_r.ph         s7, s7, 8
-
-    precr.qb.ph       t5, s0, s1
-    precr.qb.ph       t6, s2, s3
-    precr.qb.ph       t7, s4, s5
-    precr.qb.ph       t8, s6, s7
-    addu_s.qb         t5, t1, t5
-    addu_s.qb         t6, t2, t6
-    addu_s.qb         t7, t3, t7
-    addu_s.qb         t8, t4, t8
-
-    sw                t5, 0(a0)
-    sw                t6, 4(a0)
-    sw                t7, 8(a0)
-    sw                t8, 12(a0)
-    b                 1b
-     addiu            a0, a0, 16
-
-2:
-    beqz              a2, 4f
-     nop
-3:
-    lw                t1, 0(a0)
-
-    not               t2, t1
-    srl               t2, t2, 24
-    replv.ph          t2, t2
-
-    muleu_s.ph.qbl    t4, a1, t2
-    muleu_s.ph.qbr    t5, a1, t2
-    shra_r.ph         t6, t4, 8
-    shra_r.ph         t7, t5, 8
-
-    and               t6,t6,t0
-    and               t7,t7,t0
-
-    addq.ph           t8, t4, t6
-    addq.ph           t9, t5, t7
-
-    shra_r.ph         t8, t8, 8
-    shra_r.ph         t9, t9, 8
-
-    precr.qb.ph       t9, t8, t9
-
-    addu_s.qb         t9, t1, t9
-    sw                t9, 0(a0)
-
-    addiu             a2, a2, -1
-    bnez              a2, 3b
-     addiu            a0, a0, 4
-4:
-    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
-5:
-    j                 ra
-     nop
-
-END(pixman_composite_over_reverse_n_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips)
-/*
- * a0 - dst  (a8)
- * a1 - src  (32bit constant)
- * a2 - w
- */
-
-    li                t9, 0x00ff00ff
-    beqz              a2, 3f
-     nop
-    srl               t7, a2, 2   /* t7 = how many multiples of 4 dst pixels */
-    beqz              t7, 1f      /* branch if less than 4 src pixels */
-     nop
-
-    srl               t8, a1, 24
-    replv.ph          t8, t8
-
-0:
-    beqz              t7, 1f
-     addiu            t7, t7, -1
-    lbu               t0, 0(a0)
-    lbu               t1, 1(a0)
-    lbu               t2, 2(a0)
-    lbu               t3, 3(a0)
-
-    precr_sra.ph.w    t1, t0, 0
-    precr_sra.ph.w    t3, t2, 0
-    precr.qb.ph       t0, t3, t1
-
-    muleu_s.ph.qbl    t2, t0, t8
-    muleu_s.ph.qbr    t3, t0, t8
-    shra_r.ph         t4, t2, 8
-    shra_r.ph         t5, t3, 8
-    and               t4, t4, t9
-    and               t5, t5, t9
-    addq.ph           t2, t2, t4
-    addq.ph           t3, t3, t5
-    shra_r.ph         t2, t2, 8
-    shra_r.ph         t3, t3, 8
-    precr.qb.ph       t2, t2, t3
-
-    sb                t2, 0(a0)
-    srl               t2, t2, 8
-    sb                t2, 1(a0)
-    srl               t2, t2, 8
-    sb                t2, 2(a0)
-    srl               t2, t2, 8
-    sb                t2, 3(a0)
-    addiu             a2, a2, -4
-    b                 0b
-     addiu            a0, a0, 4
-
-1:
-    beqz              a2, 3f
-     nop
-    srl               t8, a1, 24
-2:
-    lbu               t0, 0(a0)
-
-    mul               t2, t0, t8
-    shra_r.ph         t3, t2, 8
-    andi              t3, t3, 0x00ff
-    addq.ph           t2, t2, t3
-    shra_r.ph         t2, t2, 8
-
-    sb                t2, 0(a0)
-    addiu             a2, a2, -1
-    bnez              a2, 2b
-     addiu            a0, a0, 1
-
-3:
-    j                 ra
-     nop
-
-END(pixman_composite_in_n_8_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
-/*
- * a0     - dst  (a8r8g8b8)
- * a1     - src  (a8r8g8b8)
- * a2     - w
- * a3     - vx
- * 16(sp) - unit_x
- */
-
-    SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
-    lw       t8, 16(sp) /* t8 = unit_x */
-    li       t6, 0x00ff00ff
-    beqz     a2, 3f
-     nop
-    addiu    t1, a2, -1
-    beqz     t1, 2f
-     nop
-1:
-    sra      t0, a3, 16 /* t0 = vx >> 16 */
-    sll      t0, t0, 2  /* t0 = t0 * 4 (a8r8g8b8) */
-    addu     t0, a1, t0
-    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
-    addu     a3, a3, t8 /* a3 = vx + unit_x */
-
-    sra      t1, a3, 16 /* t0 = vx >> 16 */
-    sll      t1, t1, 2  /* t0 = t0 * 4 (a8r8g8b8) */
-    addu     t1, a1, t1
-    lw       t1, 0(t1)  /* t1 = source      (a8r8g8b8) */
-    addu     a3, a3, t8 /* a3 = vx + unit_x */
-
-    lw       t2, 0(a0)  /* t2 = destination (a8r8g8b8) */
-    lw       t3, 4(a0)  /* t3 = destination (a8r8g8b8) */
-
-    OVER_2x8888_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t9, s0, s1, s2, s3
-
-    sw       t4, 0(a0)
-    sw       t5, 4(a0)
-    addiu    a2, a2, -2
-    addiu    t1, a2, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 8
-2:
-    beqz     a2, 3f
-     nop
-    sra      t0, a3, 16 /* t0 = vx >> 16 */
-    sll      t0, t0, 2  /* t0 = t0 * 4 (a8r8g8b8) */
-    addu     t0, a1, t0
-    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
-    lw       t1, 0(a0)  /* t1 = destination (a8r8g8b8) */
-    addu     a3, a3, t8 /* a3 = vx + unit_x */
-
-    OVER_8888_8888 t0, t1, t2, t6, t4, t5, t3, t7
-
-    sw       t2, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
-    j        ra
-     nop
-
-END(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
-/*
- * a0     - dst  (r5g6b5)
- * a1     - src  (a8r8g8b8)
- * a2     - w
- * a3     - vx
- * 16(sp) - unit_x
- */
-
-    SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, v0, v1
-    lw       t8, 40(sp) /* t8 = unit_x */
-    li       t4, 0x00ff00ff
-    li       t5, 0xf800f800
-    li       t6, 0x07e007e0
-    li       t7, 0x001F001F
-    beqz     a2, 3f
-     nop
-    addiu    t1, a2, -1
-    beqz     t1, 2f
-     nop
-1:
-    sra      t0, a3, 16 /* t0 = vx >> 16 */
-    sll      t0, t0, 2  /* t0 = t0 * 4 (a8r8g8b8) */
-    addu     t0, a1, t0
-    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
-    addu     a3, a3, t8 /* a3 = vx + unit_x */
-    sra      t1, a3, 16 /* t0 = vx >> 16 */
-    sll      t1, t1, 2  /* t0 = t0 * 4 (a8r8g8b8) */
-    addu     t1, a1, t1
-    lw       t1, 0(t1)  /* t1 = source      (a8r8g8b8) */
-    addu     a3, a3, t8 /* a3 = vx + unit_x */
-    lhu      t2, 0(a0)  /* t2 = destination (r5g6b5) */
-    lhu      t3, 2(a0)  /* t3 = destination (r5g6b5) */
-
-    CONVERT_2x0565_TO_2x8888 t2, t3, v0, v1, t6, t7, s0, s1, s2, s3
-    OVER_2x8888_2x8888       t0, t1, v0, v1, t2, t3, t4, t9, s0, s1, s2, s3, s4
-    CONVERT_2x8888_TO_2x0565 t2, t3, v0, v1, t5, t6, t7, t9, s2
-
-    sh       v0, 0(a0)
-    sh       v1, 2(a0)
-    addiu    a2, a2, -2
-    addiu    t1, a2, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 4
-2:
-    beqz     a2, 3f
-     nop
-    sra      t0, a3, 16 /* t0 = vx >> 16 */
-    sll      t0, t0, 2  /* t0 = t0 * 4 (a8r8g8b8) */
-    addu     t0, a1, t0
-    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
-    lhu      t1, 0(a0)  /* t1 = destination (r5g6b5) */
-    addu     a3, a3, t8 /* a3 = vx + unit_x */
-
-    CONVERT_1x0565_TO_1x8888 t1, t2, t5, t6
-    OVER_8888_8888           t0, t2, t1, t4, t3, t5, t6, t7
-    CONVERT_1x8888_TO_1x0565 t1, t2, t5, t6
-
-    sh       t2, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, v0, v1
-    j        ra
-     nop
-
-END(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
-/*
- * a0     - dst (a8r8g8b8)
- * a1     - src (r5g6b5)
- * a2     - w
- * a3     - vx
- * 16(sp) - unit_x
- */
-
-    SAVE_REGS_ON_STACK 0, v0
-    beqz     a2, 3f
-     nop
-
-    lw       v0, 16(sp) /* v0 = unit_x */
-    addiu    t1, a2, -1
-    beqz     t1, 2f
-     nop
-
-    li       t4, 0x07e007e0
-    li       t5, 0x001F001F
-1:
-    sra      t0, a3, 16 /* t0 = vx >> 16 */
-    sll      t0, t0, 1  /* t0 = t0 * 2 ((r5g6b5)) */
-    addu     t0, a1, t0
-    lhu      t0, 0(t0)  /* t0 = source ((r5g6b5)) */
-    addu     a3, a3, v0 /* a3 = vx + unit_x */
-    sra      t1, a3, 16 /* t1 = vx >> 16 */
-    sll      t1, t1, 1  /* t1 = t1 * 2 ((r5g6b5)) */
-    addu     t1, a1, t1
-    lhu      t1, 0(t1)  /* t1 = source ((r5g6b5)) */
-    addu     a3, a3, v0 /* a3 = vx + unit_x */
-    addiu    a2, a2, -2
-
-    CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
-
-    sw       t2, 0(a0)
-    sw       t3, 4(a0)
-
-    addiu    t2, a2, -1
-    bgtz     t2, 1b
-     addiu   a0, a0, 8
-2:
-    beqz     a2, 3f
-     nop
-    sra      t0, a3, 16 /* t0 = vx >> 16 */
-    sll      t0, t0, 1  /* t0 = t0 * 2 ((r5g6b5)) */
-    addu     t0, a1, t0
-    lhu      t0, 0(t0)  /* t0 = source ((r5g6b5)) */
-
-    CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3
-
-    sw       t1, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 0, v0
-    j        ra
-     nop
-
-END(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
-/*
- * a0     - dst  (r5g6b5)
- * a1     - src  (a8r8g8b8)
- * a2     - mask (a8)
- * a3     - w
- * 16(sp) - vx
- * 20(sp) - unit_x
- */
-    beqz     a3, 4f
-     nop
-
-    SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
-    lw       v0, 36(sp) /* v0 = vx */
-    lw       v1, 40(sp) /* v1 = unit_x */
-    li       t6, 0x00ff00ff
-    li       t7, 0xf800f800
-    li       t8, 0x07e007e0
-    li       t9, 0x001F001F
-
-    addiu    t1, a3, -1
-    beqz     t1, 2f
-     nop
-1:
-    sra      t0, v0, 16 /* t0 = vx >> 16 */
-    sll      t0, t0, 2  /* t0 = t0 * 4      (a8r8g8b8) */
-    addu     t0, a1, t0
-    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
-    addu     v0, v0, v1 /* v0 = vx + unit_x */
-    sra      t1, v0, 16 /* t1 = vx >> 16 */
-    sll      t1, t1, 2  /* t1 = t1 * 4      (a8r8g8b8) */
-    addu     t1, a1, t1
-    lw       t1, 0(t1)  /* t1 = source      (a8r8g8b8) */
-    addu     v0, v0, v1 /* v0 = vx + unit_x */
-    lbu      t2, 0(a2)  /* t2 = mask        (a8) */
-    lbu      t3, 1(a2)  /* t3 = mask        (a8) */
-    lhu      t4, 0(a0)  /* t4 = destination (r5g6b5) */
-    lhu      t5, 2(a0)  /* t5 = destination (r5g6b5) */
-    addiu    a2, a2, 2
-
-    CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5
-    OVER_2x8888_2x8_2x8888   t0, t1, \
-                             t2, t3, \
-                             s0, s1, \
-                             t4, t5, \
-                             t6, s2, s3, s4, s5, t2, t3
-    CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3
-
-    sh       s0, 0(a0)
-    sh       s1, 2(a0)
-    addiu    a3, a3, -2
-    addiu    t1, a3, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 4
-2:
-    beqz     a3, 3f
-     nop
-    sra      t0, v0, 16 /* t0 = vx >> 16 */
-    sll      t0, t0, 2  /* t0 = t0 * 4      (a8r8g8b8) */
-    addu     t0, a1, t0
-    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
-    lbu      t1, 0(a2)  /* t1 = mask        (a8) */
-    lhu      t2, 0(a0)  /* t2 = destination (r5g6b5) */
-
-    CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5
-    OVER_8888_8_8888         t0, t1, t3, t2, t6, t4, t5, t7, t8
-    CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5
-
-    sh       t3, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
-4:
-    j        ra
-     nop
-
-END(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
-/*
- * a0     - dst  (r5g6b5)
- * a1     - src  (r5g6b5)
- * a2     - mask (a8)
- * a3     - w
- * 16(sp) - vx
- * 20(sp) - unit_x
- */
-
-    beqz     a3, 4f
-     nop
-    SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
-    lw       v0, 36(sp) /* v0 = vx */
-    lw       v1, 40(sp) /* v1 = unit_x */
-    li       t4, 0xf800f800
-    li       t5, 0x07e007e0
-    li       t6, 0x001F001F
-    li       t7, 0x00ff00ff
-
-    addiu    t1, a3, -1
-    beqz     t1, 2f
-     nop
-1:
-    sra      t0, v0, 16 /* t0 = vx >> 16 */
-    sll      t0, t0, 1  /* t0 = t0 * 2      (r5g6b5) */
-    addu     t0, a1, t0
-    lhu      t0, 0(t0)  /* t0 = source      (r5g6b5) */
-    addu     v0, v0, v1 /* v0 = vx + unit_x */
-    sra      t1, v0, 16 /* t1 = vx >> 16 */
-    sll      t1, t1, 1  /* t1 = t1 * 2      (r5g6b5) */
-    addu     t1, a1, t1
-    lhu      t1, 0(t1)  /* t1 = source      (r5g6b5) */
-    addu     v0, v0, v1 /* v0 = vx + unit_x */
-    lbu      t2, 0(a2)  /* t2 = mask        (a8) */
-    lbu      t3, 1(a2)  /* t3 = mask        (a8) */
-    lhu      t8, 0(a0)  /* t8 = destination (r5g6b5) */
-    lhu      t9, 2(a0)  /* t9 = destination (r5g6b5) */
-    addiu    a2, a2, 2
-
-    CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
-    CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1
-    OVER_2x8888_2x8_2x8888   s0, s1, \
-                             t2, t3, \
-                             s2, s3, \
-                             t0, t1, \
-                             t7, t8, t9, s4, s5, s0, s1
-    CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3
-
-    sh       s0, 0(a0)
-    sh       s1, 2(a0)
-    addiu    a3, a3, -2
-    addiu    t1, a3, -1
-    bgtz     t1, 1b
-     addiu   a0, a0, 4
-2:
-    beqz     a3, 3f
-     nop
-    sra      t0, v0, 16 /* t0 = vx >> 16 */
-    sll      t0, t0, 1  /* t0 = t0 * 2      (r5g6b5) */
-    addu     t0, a1, t0
-
-    lhu      t0, 0(t0)  /* t0 = source      (r5g6b5) */
-    lbu      t1, 0(a2)  /* t1 = mask        (a8) */
-    lhu      t2, 0(a0)  /* t2 = destination (r5g6b5) */
-
-    CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5
-    CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6
-    OVER_8888_8_8888         t3, t1, t4, t0, t7, t2, t5, t6, t8
-    CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
-
-    sh       t3, 0(a0)
-3:
-    RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
-4:
-    j        ra
-     nop
-
-END(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
-/*
- * a0     - *dst
- * a1     - *src_top
- * a2     - *src_bottom
- * a3     - w
- * 16(sp) - wt
- * 20(sp) - wb
- * 24(sp) - vx
- * 28(sp) - unit_x
- */
-
-    beqz     a3, 1f
-     nop
-
-    SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
-
-    lw       s0, 36(sp)     /* s0 = wt */
-    lw       s1, 40(sp)     /* s1 = wb */
-    lw       s2, 44(sp)     /* s2 = vx */
-    lw       s3, 48(sp)     /* s3 = unit_x */
-    li       v0, BILINEAR_INTERPOLATION_RANGE
-
-    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-0:
-    andi     t4, s2, 0xffff /* t4 = (short)vx */
-    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
-    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
-
-    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
-    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
-    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
-    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
-
-    sra      t9, s2, 16
-    sll      t9, t9, 2
-    addiu    t8, t9, 4
-    lwx      t0, t9(a1)     /* t0 = tl */
-    lwx      t1, t8(a1)     /* t1 = tr */
-    addiu    a3, a3, -1
-    lwx      t2, t9(a2)     /* t2 = bl */
-    lwx      t3, t8(a2)     /* t3 = br */
-
-    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
-
-    addu     s2, s2, s3     /* vx += unit_x; */
-    sw       t0, 0(a0)
-    bnez     a3, 0b
-     addiu   a0, a0, 4
-
-    RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
-1:
-    j        ra
-     nop
-
-END(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
-/*
- * a0     - *dst
- * a1     - *src_top
- * a2     - *src_bottom
- * a3     - w
- * 16(sp) - wt
- * 20(sp) - wb
- * 24(sp) - vx
- * 28(sp) - unit_x
- */
-
-    beqz     a3, 1f
-     nop
-
-    SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
-
-    lw       s0, 36(sp)     /* s0 = wt */
-    lw       s1, 40(sp)     /* s1 = wb */
-    lw       s2, 44(sp)     /* s2 = vx */
-    lw       s3, 48(sp)     /* s3 = unit_x */
-    li       v0, BILINEAR_INTERPOLATION_RANGE
-
-    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-0:
-    andi     t4, s2, 0xffff /* t4 = (short)vx */
-    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
-    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
-
-    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
-    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
-    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
-    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
-
-    sra      t9, s2, 16
-    sll      t9, t9, 2
-    addiu    t8, t9, 4
-    lwx      t0, t9(a1)     /* t0 = tl */
-    lwx      t1, t8(a1)     /* t1 = tr */
-    addiu    a3, a3, -1
-    lwx      t2, t9(a2)     /* t2 = bl */
-    lwx      t3, t8(a2)     /* t3 = br */
-
-    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
-    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
-
-    addu     s2, s2, s3     /* vx += unit_x; */
-    sh       t1, 0(a0)
-    bnez     a3, 0b
-     addiu   a0, a0, 2
-
-    RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
-1:
-    j        ra
-     nop
-
-END(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
-/*
- * a0     - *dst
- * a1     - *src_top
- * a2     - *src_bottom
- * a3     - w
- * 16(sp) - wt
- * 20(sp) - wb
- * 24(sp) - vx
- * 28(sp) - unit_x
- */
-
-    beqz     a3, 1f
-     nop
-
-    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
-
-    lw       s0, 44(sp)     /* s0 = wt */
-    lw       s1, 48(sp)     /* s1 = wb */
-    lw       s2, 52(sp)     /* s2 = vx */
-    lw       s3, 56(sp)     /* s3 = unit_x */
-    li       v0, BILINEAR_INTERPOLATION_RANGE
-    li       v1, 0x07e007e0
-    li       s8, 0x001f001f
-
-    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-0:
-    andi     t4, s2, 0xffff /* t4 = (short)vx */
-    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
-    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
-
-    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
-    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
-    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
-    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
-
-    sra      t9, s2, 16
-    sll      t9, t9, 1
-    addiu    t8, t9, 2
-    lhx      t0, t9(a1)     /* t0 = tl */
-    lhx      t1, t8(a1)     /* t1 = tr */
-    andi     t1, t1, 0xffff
-    addiu    a3, a3, -1
-    lhx      t2, t9(a2)     /* t2 = bl */
-    lhx      t3, t8(a2)     /* t3 = br */
-    andi     t3, t3, 0xffff
-
-    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
-    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
-    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
-
-    addu     s2, s2, s3     /* vx += unit_x; */
-    sw       t0, 0(a0)
-    bnez     a3, 0b
-     addiu   a0, a0, 4
-
-    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
-1:
-    j        ra
-     nop
-
-END(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
-/*
- * a0     - *dst
- * a1     - *src_top
- * a2     - *src_bottom
- * a3     - w
- * 16(sp) - wt
- * 20(sp) - wb
- * 24(sp) - vx
- * 28(sp) - unit_x
- */
-
-    beqz     a3, 1f
-     nop
-
-    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
-
-    lw       s0, 44(sp)     /* s0 = wt */
-    lw       s1, 48(sp)     /* s1 = wb */
-    lw       s2, 52(sp)     /* s2 = vx */
-    lw       s3, 56(sp)     /* s3 = unit_x */
-    li       v0, BILINEAR_INTERPOLATION_RANGE
-    li       v1, 0x07e007e0
-    li       s8, 0x001f001f
-
-    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-0:
-    andi     t4, s2, 0xffff /* t4 = (short)vx */
-    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
-    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
-
-    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
-    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
-    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
-    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
-
-    sra      t9, s2, 16
-    sll      t9, t9, 1
-    addiu    t8, t9, 2
-    lhx      t0, t9(a1)     /* t0 = tl */
-    lhx      t1, t8(a1)     /* t1 = tr */
-    andi     t1, t1, 0xffff
-    addiu    a3, a3, -1
-    lhx      t2, t9(a2)     /* t2 = bl */
-    lhx      t3, t8(a2)     /* t3 = br */
-    andi     t3, t3, 0xffff
-
-    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
-    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
-    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
-    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
-
-    addu     s2, s2, s3     /* vx += unit_x; */
-    sh       t1, 0(a0)
-    bnez     a3, 0b
-     addiu   a0, a0, 2
-
-    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
-1:
-    j        ra
-     nop
-
-END(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
-/*
- * a0     - *dst
- * a1     - *src_top
- * a2     - *src_bottom
- * a3     - w
- * 16(sp) - wt
- * 20(sp) - wb
- * 24(sp) - vx
- * 28(sp) - unit_x
- */
-
-    beqz     a3, 1f
-     nop
-
-    SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
-
-    lw       s0, 40(sp)     /* s0 = wt */
-    lw       s1, 44(sp)     /* s1 = wb */
-    lw       s2, 48(sp)     /* s2 = vx */
-    lw       s3, 52(sp)     /* s3 = unit_x */
-    li       v0, BILINEAR_INTERPOLATION_RANGE
-    li       s8, 0x00ff00ff
-
-    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-0:
-    andi     t4, s2, 0xffff /* t4 = (short)vx */
-    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
-    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
-
-    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
-    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
-    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
-    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
-
-    sra      t9, s2, 16
-    sll      t9, t9, 2
-    addiu    t8, t9, 4
-    lwx      t0, t9(a1)     /* t0 = tl */
-    lwx      t1, t8(a1)     /* t1 = tr */
-    addiu    a3, a3, -1
-    lwx      t2, t9(a2)     /* t2 = bl */
-    lwx      t3, t8(a2)     /* t3 = br */
-
-    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
-    lw       t1, 0(a0)      /* t1 = dest */
-    OVER_8888_8888 t0, t1, t2, s8, t3, t4, t5, t6
-
-    addu     s2, s2, s3     /* vx += unit_x; */
-    sw       t2, 0(a0)
-    bnez     a3, 0b
-     addiu   a0, a0, 4
-
-    RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
-1:
-    j        ra
-     nop
-
-END(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
-/*
- * a0     - *dst
- * a1     - *src_top
- * a2     - *src_bottom
- * a3     - w
- * 16(sp) - wt
- * 20(sp) - wb
- * 24(sp) - vx
- * 28(sp) - unit_x
- */
-
-    beqz         a3, 1f
-     nop
-
-    SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
-
-    lw           s0, 36(sp)     /* s0 = wt */
-    lw           s1, 40(sp)     /* s1 = wb */
-    lw           s2, 44(sp)     /* s2 = vx */
-    lw           s3, 48(sp)     /* s3 = unit_x */
-    li           v0, BILINEAR_INTERPOLATION_RANGE
-
-    sll          s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-    sll          s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-0:
-    andi         t4, s2, 0xffff /* t4 = (short)vx */
-    srl          t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
-    subu         t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
-
-    mul          s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
-    mul          s5, s0, t4     /* s5 = wt*(vx>>8) */
-    mul          s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
-    mul          s7, s1, t4     /* s7 = wb*(vx>>8) */
-
-    sra          t9, s2, 16
-    sll          t9, t9, 2
-    addiu        t8, t9, 4
-    lwx          t0, t9(a1)     /* t0 = tl */
-    lwx          t1, t8(a1)     /* t1 = tr */
-    addiu        a3, a3, -1
-    lwx          t2, t9(a2)     /* t2 = bl */
-    lwx          t3, t8(a2)     /* t3 = br */
-
-    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
-    lw           t1, 0(a0)
-    addu_s.qb    t2, t0, t1
-
-    addu         s2, s2, s3     /* vx += unit_x; */
-    sw           t2, 0(a0)
-    bnez         a3, 0b
-     addiu       a0, a0, 4
-
-    RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
-1:
-    j            ra
-     nop
-
-END(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
-/*
- * a0     - *dst
- * a1     - *mask
- * a2     - *src_top
- * a3     - *src_bottom
- * 16(sp) - wt
- * 20(sp) - wb
- * 24(sp) - vx
- * 28(sp) - unit_x
- * 32(sp) - w
- */
-
-    lw       v1, 32(sp)
-    beqz     v1, 1f
-     nop
-
-    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
-
-    lw       s0, 44(sp)        /* s0 = wt */
-    lw       s1, 48(sp)        /* s1 = wb */
-    lw       s2, 52(sp)        /* s2 = vx */
-    lw       s3, 56(sp)        /* s3 = unit_x */
-    li       v0, BILINEAR_INTERPOLATION_RANGE
-    li       s8, 0x00ff00ff
-
-    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-0:
-    andi     t4, s2, 0xffff    /* t4 = (short)vx */
-    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
-    subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
-
-    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
-    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
-    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
-    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
-
-    sra      t9, s2, 16
-    sll      t9, t9, 2
-    addiu    t8, t9, 4
-    lwx      t0, t9(a2)        /* t0 = tl */
-    lwx      t1, t8(a2)        /* t1 = tr */
-    addiu    v1, v1, -1
-    lwx      t2, t9(a3)        /* t2 = bl */
-    lwx      t3, t8(a3)        /* t3 = br */
-
-    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
-    lbu      t1, 0(a1)         /* t1 = mask */
-    addiu    a1, a1, 1
-    MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4
-
-    addu     s2, s2, s3        /* vx += unit_x; */
-    sw       t0, 0(a0)
-    bnez     v1, 0b
-     addiu   a0, a0, 4
-
-    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
-1:
-    j        ra
-     nop
-
-END(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
-/*
- * a0     - *dst
- * a1     - *mask
- * a2     - *src_top
- * a3     - *src_bottom
- * 16(sp) - wt
- * 20(sp) - wb
- * 24(sp) - vx
- * 28(sp) - unit_x
- * 32(sp) - w
- */
-
-    lw       v1, 32(sp)
-    beqz     v1, 1f
-     nop
-
-    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
-
-    lw       s0, 44(sp)        /* s0 = wt */
-    lw       s1, 48(sp)        /* s1 = wb */
-    lw       s2, 52(sp)        /* s2 = vx */
-    lw       s3, 56(sp)        /* s3 = unit_x */
-    li       v0, BILINEAR_INTERPOLATION_RANGE
-    li       s8, 0x00ff00ff
-
-    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-0:
-    andi     t4, s2, 0xffff    /* t4 = (short)vx */
-    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
-    subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
-
-    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
-    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
-    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
-    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
-
-    sra      t9, s2, 16
-    sll      t9, t9, 2
-    addiu    t8, t9, 4
-    lwx      t0, t9(a2)        /* t0 = tl */
-    lwx      t1, t8(a2)        /* t1 = tr */
-    addiu    v1, v1, -1
-    lwx      t2, t9(a3)        /* t2 = bl */
-    lwx      t3, t8(a3)        /* t3 = br */
-
-    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
-    lbu      t1, 0(a1)         /* t1 = mask */
-    addiu    a1, a1, 1
-    MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4
-    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
-
-    addu     s2, s2, s3        /* vx += unit_x; */
-    sh       t1, 0(a0)
-    bnez     v1, 0b
-     addiu   a0, a0, 2
-
-    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
-1:
-    j        ra
-     nop
-
-END(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
-/*
- * a0     - *dst
- * a1     - *mask
- * a2     - *src_top
- * a3     - *src_bottom
- * 16(sp) - wt
- * 20(sp) - wb
- * 24(sp) - vx
- * 28(sp) - unit_x
- * 32(sp) - w
- */
-
-    lw       t0, 32(sp)
-    beqz     t0, 1f
-     nop
-
-    SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
-
-    lw       s0, 48(sp)        /* s0 = wt */
-    lw       s1, 52(sp)        /* s1 = wb */
-    lw       s2, 56(sp)        /* s2 = vx */
-    lw       s3, 60(sp)        /* s3 = unit_x */
-    lw       ra, 64(sp)        /* ra = w */
-    li       v0, 0x00ff00ff
-    li       v1, 0x07e007e0
-    li       s8, 0x001f001f
-
-    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-0:
-    andi     t4, s2, 0xffff    /* t4 = (short)vx */
-    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
-    li       t5, BILINEAR_INTERPOLATION_RANGE
-    subu     t5, t5, t4        /* t5 = ( 256 - (vx>>8)) */
-
-    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
-    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
-    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
-    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
-
-    sra      t9, s2, 16
-    sll      t9, t9, 1
-    addiu    t8, t9, 2
-    lhx      t0, t9(a2)        /* t0 = tl */
-    lhx      t1, t8(a2)        /* t1 = tr */
-    andi     t1, t1, 0xffff
-    addiu    ra, ra, -1
-    lhx      t2, t9(a3)        /* t2 = bl */
-    lhx      t3, t8(a3)        /* t3 = br */
-    andi     t3, t3, 0xffff
-
-    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
-    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
-    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
-    lbu      t1, 0(a1)         /* t1 = mask */
-    addiu    a1, a1, 1
-    MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4
-
-    addu     s2, s2, s3        /* vx += unit_x; */
-    sw       t0, 0(a0)
-    bnez     ra, 0b
-     addiu   a0, a0, 4
-
-    RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
-1:
-    j        ra
-     nop
-
-END(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
-/*
- * a0     - *dst
- * a1     - *mask
- * a2     - *src_top
- * a3     - *src_bottom
- * 16(sp) - wt
- * 20(sp) - wb
- * 24(sp) - vx
- * 28(sp) - unit_x
- * 32(sp) - w
- */
-
-    lw       t0, 32(sp)
-    beqz     t0, 1f
-     nop
-
-    SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
-
-    lw       s0, 48(sp)        /* s0 = wt */
-    lw       s1, 52(sp)        /* s1 = wb */
-    lw       s2, 56(sp)        /* s2 = vx */
-    lw       s3, 60(sp)        /* s3 = unit_x */
-    lw       ra, 64(sp)        /* ra = w */
-    li       v0, 0x00ff00ff
-    li       v1, 0x07e007e0
-    li       s8, 0x001f001f
-
-    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-0:
-    andi     t4, s2, 0xffff    /* t4 = (short)vx */
-    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
-    li       t5, BILINEAR_INTERPOLATION_RANGE
-    subu     t5, t5, t4        /* t5 = ( 256 - (vx>>8)) */
-
-    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
-    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
-    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
-    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
-
-    sra      t9, s2, 16
-    sll      t9, t9, 1
-    addiu    t8, t9, 2
-    lhx      t0, t9(a2)        /* t0 = tl */
-    lhx      t1, t8(a2)        /* t1 = tr */
-    andi     t1, t1, 0xffff
-    addiu    ra, ra, -1
-    lhx      t2, t9(a3)        /* t2 = bl */
-    lhx      t3, t8(a3)        /* t3 = br */
-    andi     t3, t3, 0xffff
-
-    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
-    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
-    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
-    lbu      t1, 0(a1)         /* t1 = mask */
-    addiu    a1, a1, 1
-    MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4
-    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
-
-    addu     s2, s2, s3        /* vx += unit_x; */
-    sh       t1, 0(a0)
-    bnez     ra, 0b
-     addiu   a0, a0, 2
-
-    RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
-1:
-    j        ra
-     nop
-
-END(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
-/*
- * a0     - dst        (a8r8g8b8)
- * a1     - mask       (a8)
- * a2     - src_top    (a8r8g8b8)
- * a3     - src_bottom (a8r8g8b8)
- * 16(sp) - wt
- * 20(sp) - wb
- * 24(sp) - vx
- * 28(sp) - unit_x
- * 32(sp) - w
- */
-
-    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
-
-    lw       v1, 60(sp)        /* v1 = w(sp + 32 + 28 save regs stack offset)*/
-    beqz     v1, 1f
-     nop
-
-    lw       s0, 44(sp)        /* s0 = wt */
-    lw       s1, 48(sp)        /* s1 = wb */
-    lw       s2, 52(sp)        /* s2 = vx */
-    lw       s3, 56(sp)        /* s3 = unit_x */
-    li       v0, BILINEAR_INTERPOLATION_RANGE
-    li       s8, 0x00ff00ff
-
-    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-
-0:
-    andi     t4, s2, 0xffff    /* t4 = (short)vx */
-    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
-    subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
-
-    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
-    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
-    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
-    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
-
-    sra      t9, s2, 16
-    sll      t9, t9, 2
-    addiu    t8, t9, 4
-    lwx      t0, t9(a2)        /* t0 = tl */
-    lwx      t1, t8(a2)        /* t1 = tr */
-    addiu    v1, v1, -1
-    lwx      t2, t9(a3)        /* t2 = bl */
-    lwx      t3, t8(a3)        /* t3 = br */
-
-    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, \
-                                      t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
-    lbu      t1, 0(a1)         /* t1 = mask */
-    lw       t2, 0(a0)         /* t2 = dst */
-    addiu    a1, a1, 1
-    OVER_8888_8_8888 t0, t1, t2, t0, s8, t3, t4, t5, t6
-
-    addu     s2, s2, s3        /* vx += unit_x; */
-    sw       t0, 0(a0)
-    bnez     v1, 0b
-     addiu   a0, a0, 4
-
-1:
-    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
-    j        ra
-     nop
-
-END(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)
-/*
- * a0     - *dst
- * a1     - *mask
- * a2     - *src_top
- * a3     - *src_bottom
- * 16(sp) - wt
- * 20(sp) - wb
- * 24(sp) - vx
- * 28(sp) - unit_x
- * 32(sp) - w
- */
-
-    lw       v1, 32(sp)
-    beqz     v1, 1f
-     nop
-
-    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
-
-    lw       s0, 44(sp)        /* s0 = wt */
-    lw       s1, 48(sp)        /* s1 = wb */
-    lw       s2, 52(sp)        /* s2 = vx */
-    lw       s3, 56(sp)        /* s3 = unit_x */
-    li       v0, BILINEAR_INTERPOLATION_RANGE
-    li       s8, 0x00ff00ff
-
-    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
-0:
-    andi     t4, s2, 0xffff    /* t4 = (short)vx */
-    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
-    subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
-
-    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
-    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
-    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
-    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
-
-    sra      t9, s2, 16
-    sll      t9, t9, 2
-    addiu    t8, t9, 4
-    lwx      t0, t9(a2)        /* t0 = tl */
-    lwx      t1, t8(a2)        /* t1 = tr */
-    addiu    v1, v1, -1
-    lwx      t2, t9(a3)        /* t2 = bl */
-    lwx      t3, t8(a3)        /* t3 = br */
-
-    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
-    lbu      t1, 0(a1)         /* t1 = mask */
-    lw       t2, 0(a0)         /* t2 = dst */
-    addiu    a1, a1, 1
-    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t0, s8, t3, t4, t5
-
-    addu     s2, s2, s3        /* vx += unit_x; */
-    sw       t0, 0(a0)
-    bnez     v1, 0b
-     addiu   a0, a0, 4
-
-    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
-1:
-    j        ra
-     nop
-
-END(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)
diff --git a/vendor/pixman/pixman/pixman-mips-dspr2-asm.h b/vendor/pixman/pixman/pixman-mips-dspr2-asm.h
deleted file mode 100644
index e23856619..000000000
--- a/vendor/pixman/pixman/pixman-mips-dspr2-asm.h
+++ /dev/null
@@ -1,711 +0,0 @@
-/*
- * Copyright (c) 2012
- *      MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Author:  Nemanja Lukic (nemanja.lukic@rt-rk.com)
- */
-
-#ifndef PIXMAN_MIPS_DSPR2_ASM_H
-#define PIXMAN_MIPS_DSPR2_ASM_H
-
-#define zero $0
-#define AT   $1
-#define v0   $2
-#define v1   $3
-#define a0   $4
-#define a1   $5
-#define a2   $6
-#define a3   $7
-#define t0   $8
-#define t1   $9
-#define t2   $10
-#define t3   $11
-#define t4   $12
-#define t5   $13
-#define t6   $14
-#define t7   $15
-#define s0   $16
-#define s1   $17
-#define s2   $18
-#define s3   $19
-#define s4   $20
-#define s5   $21
-#define s6   $22
-#define s7   $23
-#define t8   $24
-#define t9   $25
-#define k0   $26
-#define k1   $27
-#define gp   $28
-#define sp   $29
-#define fp   $30
-#define s8   $30
-#define ra   $31
-
-/*
- * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2
- */
-#define LEAF_MIPS32R2(symbol)                           \
-                .globl  symbol;                         \
-                .align  2;                              \
-                .hidden symbol;                         \
-                .type   symbol, @function;              \
-                .ent    symbol, 0;                      \
-symbol:         .frame  sp, 0, ra;                      \
-                .set    push;                           \
-                .set    arch=mips32r2;                  \
-                .set    noreorder;                      \
-                .set    noat;
-
-/*
- * LEAF_MIPS32R2 - declare leaf routine for MIPS DSPr2
- */
-#define LEAF_MIPS_DSPR2(symbol)                         \
-LEAF_MIPS32R2(symbol)                                   \
-                .set    dspr2;
-
-/*
- * END - mark end of function
- */
-#define END(function)                                   \
-                .set    pop;                            \
-                .end    function;                       \
-                .size   function,.-function
-
-/*
- * Checks if stack offset is big enough for storing/restoring regs_num
- * number of register to/from stack. Stack offset must be greater than
- * or equal to the number of bytes needed for storing registers (regs_num*4).
- * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is
- * preserved for input arguments of the functions, already stored in a0-a3),
- * stack size can be further optimized by utilizing this space.
- */
-.macro CHECK_STACK_OFFSET regs_num, stack_offset
-.if \stack_offset < \regs_num * 4 - 16
-.error "Stack offset too small."
-.endif
-.endm
-
-/*
- * Saves set of registers on stack. Maximum number of registers that
- * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
- * Stack offset is number of bytes that are added to stack pointer (sp)
- * before registers are pushed in order to provide enough space on stack
- * (offset must be multiple of 4, and must be big enough, as described by
- * CHECK_STACK_OFFSET macro). This macro is intended to be used in
- * combination with RESTORE_REGS_FROM_STACK macro. Example:
- *  SAVE_REGS_ON_STACK      4, v0, v1, s0, s1
- *  RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
- */
-.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \
-                          r2  = 0, r3  = 0, r4  = 0, \
-                          r5  = 0, r6  = 0, r7  = 0, \
-                          r8  = 0, r9  = 0, r10 = 0, \
-                          r11 = 0, r12 = 0, r13 = 0, \
-                          r14 = 0
-    .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
-    .error "Stack offset must be pozitive and multiple of 4."
-    .endif
-    .if \stack_offset != 0
-    addiu           sp, sp, -\stack_offset
-    .endif
-    sw              \r1, 0(sp)
-    .if \r2 != 0
-    sw              \r2, 4(sp)
-    .endif
-    .if \r3 != 0
-    sw              \r3, 8(sp)
-    .endif
-    .if \r4 != 0
-    sw              \r4, 12(sp)
-    .endif
-    .if \r5 != 0
-    CHECK_STACK_OFFSET 5, \stack_offset
-    sw              \r5, 16(sp)
-    .endif
-    .if \r6 != 0
-    CHECK_STACK_OFFSET 6, \stack_offset
-    sw              \r6, 20(sp)
-    .endif
-    .if \r7 != 0
-    CHECK_STACK_OFFSET 7, \stack_offset
-    sw              \r7, 24(sp)
-    .endif
-    .if \r8 != 0
-    CHECK_STACK_OFFSET 8, \stack_offset
-    sw              \r8, 28(sp)
-    .endif
-    .if \r9 != 0
-    CHECK_STACK_OFFSET 9, \stack_offset
-    sw              \r9, 32(sp)
-    .endif
-    .if \r10 != 0
-    CHECK_STACK_OFFSET 10, \stack_offset
-    sw              \r10, 36(sp)
-    .endif
-    .if \r11 != 0
-    CHECK_STACK_OFFSET 11, \stack_offset
-    sw              \r11, 40(sp)
-    .endif
-    .if \r12 != 0
-    CHECK_STACK_OFFSET 12, \stack_offset
-    sw              \r12, 44(sp)
-    .endif
-    .if \r13 != 0
-    CHECK_STACK_OFFSET 13, \stack_offset
-    sw              \r13, 48(sp)
-    .endif
-    .if \r14 != 0
-    CHECK_STACK_OFFSET 14, \stack_offset
-    sw              \r14, 52(sp)
-    .endif
-.endm
-
-/*
- * Restores set of registers from stack. Maximum number of registers that
- * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
- * Stack offset is number of bytes that are added to stack pointer (sp)
- * after registers are restored (offset must be multiple of 4, and must
- * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is
- * intended to be used in combination with RESTORE_REGS_FROM_STACK macro.
- * Example:
- *  SAVE_REGS_ON_STACK      4, v0, v1, s0, s1
- *  RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
- */
-.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \
-                               r2  = 0, r3  = 0, r4  = 0, \
-                               r5  = 0, r6  = 0, r7  = 0, \
-                               r8  = 0, r9  = 0, r10 = 0, \
-                               r11 = 0, r12 = 0, r13 = 0, \
-                               r14 = 0
-    .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4)
-    .error "Stack offset must be pozitive and multiple of 4."
-    .endif
-    lw              \r1, 0(sp)
-    .if \r2 != 0
-    lw              \r2, 4(sp)
-    .endif
-    .if \r3 != 0
-    lw              \r3, 8(sp)
-    .endif
-    .if \r4 != 0
-    lw              \r4, 12(sp)
-    .endif
-    .if \r5 != 0
-    CHECK_STACK_OFFSET 5, \stack_offset
-    lw              \r5, 16(sp)
-    .endif
-    .if \r6 != 0
-    CHECK_STACK_OFFSET 6, \stack_offset
-    lw              \r6, 20(sp)
-    .endif
-    .if \r7 != 0
-    CHECK_STACK_OFFSET 7, \stack_offset
-    lw              \r7, 24(sp)
-    .endif
-    .if \r8 != 0
-    CHECK_STACK_OFFSET 8, \stack_offset
-    lw              \r8, 28(sp)
-    .endif
-    .if \r9 != 0
-    CHECK_STACK_OFFSET 9, \stack_offset
-    lw              \r9, 32(sp)
-    .endif
-    .if \r10 != 0
-    CHECK_STACK_OFFSET 10, \stack_offset
-    lw              \r10, 36(sp)
-    .endif
-    .if \r11 != 0
-    CHECK_STACK_OFFSET 11, \stack_offset
-    lw              \r11, 40(sp)
-    .endif
-    .if \r12 != 0
-    CHECK_STACK_OFFSET 12, \stack_offset
-    lw              \r12, 44(sp)
-    .endif
-    .if \r13 != 0
-    CHECK_STACK_OFFSET 13, \stack_offset
-    lw              \r13, 48(sp)
-    .endif
-    .if \r14 != 0
-    CHECK_STACK_OFFSET 14, \stack_offset
-    lw              \r14, 52(sp)
-    .endif
-    .if \stack_offset != 0
-    addiu           sp, sp, \stack_offset
-    .endif
-.endm
-
-/*
- * Conversion of single r5g6b5 pixel (in_565) to single a8r8g8b8 pixel
- * returned in (out_8888) register. Requires two temporary registers
- * (scratch1 and scratch2).
- */
-.macro CONVERT_1x0565_TO_1x8888 in_565,   \
-                                out_8888, \
-                                scratch1, scratch2
-    lui     \out_8888, 0xff00
-    sll     \scratch1, \in_565,   0x3
-    andi    \scratch2, \scratch1, 0xff
-    ext     \scratch1, \in_565,   0x2, 0x3
-    or      \scratch1, \scratch2, \scratch1
-    or      \out_8888, \out_8888, \scratch1
-
-    sll     \scratch1, \in_565,   0x5
-    andi    \scratch1, \scratch1, 0xfc00
-    srl     \scratch2, \in_565,   0x1
-    andi    \scratch2, \scratch2, 0x300
-    or      \scratch2, \scratch1, \scratch2
-    or      \out_8888, \out_8888, \scratch2
-
-    andi    \scratch1, \in_565,   0xf800
-    srl     \scratch2, \scratch1, 0x5
-    andi    \scratch2, \scratch2, 0xff00
-    or      \scratch1, \scratch1, \scratch2
-    sll     \scratch1, \scratch1, 0x8
-    or      \out_8888, \out_8888, \scratch1
-.endm
-
-/*
- * Conversion of two r5g6b5 pixels (in1_565 and in2_565) to two a8r8g8b8 pixels
- * returned in (out1_8888 and out2_8888) registers. Requires four scratch
- * registers (scratch1 ... scratch4). It also requires maskG and maskB for
- * color component extractions. These masks must have following values:
- *   li       maskG, 0x07e007e0
- *   li       maskB, 0x001F001F
- */
-.macro CONVERT_2x0565_TO_2x8888 in1_565, in2_565,     \
-                                out1_8888, out2_8888, \
-                                maskG, maskB,         \
-                                scratch1, scratch2, scratch3, scratch4
-    sll               \scratch1,  \in1_565,   16
-    or                \scratch1,  \scratch1,  \in2_565
-    lui               \out2_8888, 0xff00
-    ori               \out2_8888, \out2_8888, 0xff00
-    shrl.ph           \scratch2,  \scratch1,  11
-    and               \scratch3,  \scratch1,  \maskG
-    shra.ph           \scratch4,  \scratch2,  2
-    shll.ph           \scratch2,  \scratch2,  3
-    shll.ph           \scratch3,  \scratch3,  5
-    or                \scratch2,  \scratch2,  \scratch4
-    shrl.qb           \scratch4,  \scratch3,  6
-    or                \out2_8888, \out2_8888, \scratch2
-    or                \scratch3,  \scratch3,  \scratch4
-    and               \scratch1,  \scratch1,  \maskB
-    shll.ph           \scratch2,  \scratch1,  3
-    shra.ph           \scratch4,  \scratch1,  2
-    or                \scratch2,  \scratch2,  \scratch4
-    or                \scratch3,  \scratch2,  \scratch3
-    precrq.ph.w       \out1_8888, \out2_8888, \scratch3
-    precr_sra.ph.w    \out2_8888, \scratch3,  0
-.endm
-
-/*
- * Conversion of single a8r8g8b8 pixel (in_8888) to single r5g6b5 pixel
- * returned in (out_565) register. Requires two temporary registers
- * (scratch1 and scratch2).
- */
-.macro CONVERT_1x8888_TO_1x0565 in_8888, \
-                                out_565, \
-                                scratch1, scratch2
-    ext     \out_565,  \in_8888,  0x3, 0x5
-    srl     \scratch1, \in_8888,  0x5
-    andi    \scratch1, \scratch1, 0x07e0
-    srl     \scratch2, \in_8888,  0x8
-    andi    \scratch2, \scratch2, 0xf800
-    or      \out_565,  \out_565,  \scratch1
-    or      \out_565,  \out_565,  \scratch2
-.endm
-
-/*
- * Conversion of two a8r8g8b8 pixels (in1_8888 and in2_8888) to two r5g6b5
- * pixels returned in (out1_565 and out2_565) registers. Requires two temporary
- * registers (scratch1 and scratch2). It also requires maskR, maskG and maskB
- * for color component extractions. These masks must have following values:
- *   li       maskR, 0xf800f800
- *   li       maskG, 0x07e007e0
- *   li       maskB, 0x001F001F
- * Value of input register in2_8888 is lost.
- */
-.macro CONVERT_2x8888_TO_2x0565 in1_8888, in2_8888,  \
-                                out1_565, out2_565,  \
-                                maskR, maskG, maskB, \
-                                scratch1, scratch2
-    precr.qb.ph    \scratch1, \in2_8888, \in1_8888
-    precrq.qb.ph   \in2_8888, \in2_8888, \in1_8888
-    and            \out1_565, \scratch1, \maskR
-    shrl.ph        \scratch1, \scratch1, 3
-    shll.ph        \in2_8888, \in2_8888, 3
-    and            \scratch1, \scratch1, \maskB
-    or             \out1_565, \out1_565, \scratch1
-    and            \in2_8888, \in2_8888, \maskG
-    or             \out1_565, \out1_565, \in2_8888
-    srl            \out2_565, \out1_565, 16
-.endm
-
-/*
- * Multiply pixel (a8) with single pixel (a8r8g8b8). It requires maskLSR needed
- * for rounding process. maskLSR must have following value:
- *   li       maskLSR, 0x00ff00ff
- */
-.macro MIPS_UN8x4_MUL_UN8 s_8888,  \
-                          m_8,     \
-                          d_8888,  \
-                          maskLSR, \
-                          scratch1, scratch2, scratch3
-    replv.ph          \m_8,      \m_8                 /*   0 | M | 0 | M */
-    muleu_s.ph.qbl    \scratch1, \s_8888,   \m_8      /*    A*M  |  R*M */
-    muleu_s.ph.qbr    \scratch2, \s_8888,   \m_8      /*    G*M  |  B*M */
-    shra_r.ph         \scratch3, \scratch1, 8
-    shra_r.ph         \d_8888,   \scratch2, 8
-    and               \scratch3, \scratch3, \maskLSR  /*   0 |A*M| 0 |R*M */
-    and               \d_8888,   \d_8888,   \maskLSR  /*   0 |G*M| 0 |B*M */
-    addq.ph           \scratch1, \scratch1, \scratch3 /* A*M+A*M | R*M+R*M */
-    addq.ph           \scratch2, \scratch2, \d_8888   /* G*M+G*M | B*M+B*M */
-    shra_r.ph         \scratch1, \scratch1, 8
-    shra_r.ph         \scratch2, \scratch2, 8
-    precr.qb.ph       \d_8888,   \scratch1, \scratch2
-.endm
-
-/*
- * Multiply two pixels (a8) with two pixels (a8r8g8b8). It requires maskLSR
- * needed for rounding process. maskLSR must have following value:
- *   li       maskLSR, 0x00ff00ff
- */
-.macro MIPS_2xUN8x4_MUL_2xUN8 s1_8888, \
-                              s2_8888, \
-                              m1_8,    \
-                              m2_8,    \
-                              d1_8888, \
-                              d2_8888, \
-                              maskLSR, \
-                              scratch1, scratch2, scratch3, \
-                              scratch4, scratch5, scratch6
-    replv.ph          \m1_8,     \m1_8                /*  0 | M1 | 0 | M1 */
-    replv.ph          \m2_8,     \m2_8                /*  0 | M2 | 0 | M2 */
-    muleu_s.ph.qbl    \scratch1, \s1_8888,  \m1_8     /*  A1*M1  |  R1*M1 */
-    muleu_s.ph.qbr    \scratch2, \s1_8888,  \m1_8     /*  G1*M1  |  B1*M1 */
-    muleu_s.ph.qbl    \scratch3, \s2_8888,  \m2_8     /*  A2*M2  |  R2*M2 */
-    muleu_s.ph.qbr    \scratch4, \s2_8888,  \m2_8     /*  G2*M2  |  B2*M2 */
-    shra_r.ph         \scratch5, \scratch1, 8
-    shra_r.ph         \d1_8888,  \scratch2, 8
-    shra_r.ph         \scratch6, \scratch3, 8
-    shra_r.ph         \d2_8888,  \scratch4, 8
-    and               \scratch5, \scratch5, \maskLSR  /* 0 |A1*M1| 0 |R1*M1 */
-    and               \d1_8888,  \d1_8888,  \maskLSR  /* 0 |G1*M1| 0 |B1*M1 */
-    and               \scratch6, \scratch6, \maskLSR  /* 0 |A2*M2| 0 |R2*M2 */
-    and               \d2_8888,  \d2_8888,  \maskLSR  /* 0 |G2*M2| 0 |B2*M2 */
-    addq.ph           \scratch1, \scratch1, \scratch5
-    addq.ph           \scratch2, \scratch2, \d1_8888
-    addq.ph           \scratch3, \scratch3, \scratch6
-    addq.ph           \scratch4, \scratch4, \d2_8888
-    shra_r.ph         \scratch1, \scratch1, 8
-    shra_r.ph         \scratch2, \scratch2, 8
-    shra_r.ph         \scratch3, \scratch3, 8
-    shra_r.ph         \scratch4, \scratch4, 8
-    precr.qb.ph       \d1_8888,  \scratch1, \scratch2
-    precr.qb.ph       \d2_8888,  \scratch3, \scratch4
-.endm
-
-/*
- * Multiply pixel (a8r8g8b8) with single pixel (a8r8g8b8). It requires maskLSR
- * needed for rounding process. maskLSR must have following value:
- *   li       maskLSR, 0x00ff00ff
- */
-.macro MIPS_UN8x4_MUL_UN8x4 s_8888,  \
-                            m_8888,  \
-                            d_8888,  \
-                            maskLSR, \
-                            scratch1, scratch2, scratch3, scratch4
-    preceu.ph.qbl     \scratch1, \m_8888              /*   0 | A | 0 | R */
-    preceu.ph.qbr     \scratch2, \m_8888              /*   0 | G | 0 | B */
-    muleu_s.ph.qbl    \scratch3, \s_8888,   \scratch1 /*    A*A  |  R*R */
-    muleu_s.ph.qbr    \scratch4, \s_8888,   \scratch2 /*    G*G  |  B*B */
-    shra_r.ph         \scratch1, \scratch3, 8
-    shra_r.ph         \scratch2, \scratch4, 8
-    and               \scratch1, \scratch1, \maskLSR  /*   0 |A*A| 0 |R*R */
-    and               \scratch2, \scratch2, \maskLSR  /*   0 |G*G| 0 |B*B */
-    addq.ph           \scratch1, \scratch1, \scratch3
-    addq.ph           \scratch2, \scratch2, \scratch4
-    shra_r.ph         \scratch1, \scratch1, 8
-    shra_r.ph         \scratch2, \scratch2, 8
-    precr.qb.ph       \d_8888,   \scratch1, \scratch2
-.endm
-
-/*
- * Multiply two pixels (a8r8g8b8) with two pixels (a8r8g8b8). It requires
- * maskLSR needed for rounding process. maskLSR must have following value:
- *   li       maskLSR, 0x00ff00ff
- */
-
-.macro MIPS_2xUN8x4_MUL_2xUN8x4 s1_8888,  \
-                                s2_8888,  \
-                                m1_8888,  \
-                                m2_8888,  \
-                                d1_8888,  \
-                                d2_8888,  \
-                                maskLSR,  \
-                                scratch1, scratch2, scratch3, \
-                                scratch4, scratch5, scratch6
-    preceu.ph.qbl     \scratch1, \m1_8888             /*   0 | A | 0 | R */
-    preceu.ph.qbr     \scratch2, \m1_8888             /*   0 | G | 0 | B */
-    preceu.ph.qbl     \scratch3, \m2_8888             /*   0 | A | 0 | R */
-    preceu.ph.qbr     \scratch4, \m2_8888             /*   0 | G | 0 | B */
-    muleu_s.ph.qbl    \scratch5, \s1_8888,  \scratch1 /*    A*A  |  R*R */
-    muleu_s.ph.qbr    \scratch6, \s1_8888,  \scratch2 /*    G*G  |  B*B */
-    muleu_s.ph.qbl    \scratch1, \s2_8888,  \scratch3 /*    A*A  |  R*R */
-    muleu_s.ph.qbr    \scratch2, \s2_8888,  \scratch4 /*    G*G  |  B*B */
-    shra_r.ph         \scratch3, \scratch5, 8
-    shra_r.ph         \scratch4, \scratch6, 8
-    shra_r.ph         \d1_8888,  \scratch1, 8
-    shra_r.ph         \d2_8888,  \scratch2, 8
-    and               \scratch3, \scratch3, \maskLSR  /*   0 |A*A| 0 |R*R */
-    and               \scratch4, \scratch4, \maskLSR  /*   0 |G*G| 0 |B*B */
-    and               \d1_8888,  \d1_8888,  \maskLSR  /*   0 |A*A| 0 |R*R */
-    and               \d2_8888,  \d2_8888,  \maskLSR  /*   0 |G*G| 0 |B*B */
-    addq.ph           \scratch3, \scratch3, \scratch5
-    addq.ph           \scratch4, \scratch4, \scratch6
-    addq.ph           \d1_8888,  \d1_8888,  \scratch1
-    addq.ph           \d2_8888,  \d2_8888,  \scratch2
-    shra_r.ph         \scratch3, \scratch3, 8
-    shra_r.ph         \scratch4, \scratch4, 8
-    shra_r.ph         \scratch5, \d1_8888,  8
-    shra_r.ph         \scratch6, \d2_8888,  8
-    precr.qb.ph       \d1_8888,  \scratch3, \scratch4
-    precr.qb.ph       \d2_8888,  \scratch5, \scratch6
-.endm
-
-/*
- * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8
- * destination pixel (d_8888) using a8 mask (m_8). It also requires maskLSR
- * needed for rounding process. maskLSR must have following value:
- *   li       maskLSR, 0x00ff00ff
- */
-.macro OVER_8888_8_8888 s_8888,   \
-                        m_8,      \
-                        d_8888,   \
-                        out_8888, \
-                        maskLSR,  \
-                        scratch1, scratch2, scratch3, scratch4
-    MIPS_UN8x4_MUL_UN8 \s_8888,   \m_8, \
-                       \scratch1, \maskLSR, \
-                       \scratch2, \scratch3, \scratch4
-
-    not                \scratch2, \scratch1
-    srl                \scratch2, \scratch2, 24
-
-    MIPS_UN8x4_MUL_UN8 \d_8888,   \scratch2, \
-                       \d_8888,   \maskLSR,  \
-                       \scratch3, \scratch4, \out_8888
-
-    addu_s.qb          \out_8888, \d_8888,   \scratch1
-.endm
-
-/*
- * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two
- * a8r8g8b8 destination pixels (d1_8888 and d2_8888) using a8 masks (m1_8 and
- * m2_8). It also requires maskLSR needed for rounding process. maskLSR must
- * have following value:
- *   li       maskLSR, 0x00ff00ff
- */
-.macro OVER_2x8888_2x8_2x8888 s1_8888,   \
-                              s2_8888,   \
-                              m1_8,      \
-                              m2_8,      \
-                              d1_8888,   \
-                              d2_8888,   \
-                              out1_8888, \
-                              out2_8888, \
-                              maskLSR,   \
-                              scratch1, scratch2, scratch3, \
-                              scratch4, scratch5, scratch6
-    MIPS_2xUN8x4_MUL_2xUN8 \s1_8888,   \s2_8888, \
-                           \m1_8,      \m2_8, \
-                           \scratch1,  \scratch2, \
-                           \maskLSR, \
-                           \scratch3,  \scratch4, \out1_8888, \
-                           \out2_8888, \scratch5, \scratch6
-
-    not                    \scratch3,  \scratch1
-    srl                    \scratch3,  \scratch3, 24
-    not                    \scratch4,  \scratch2
-    srl                    \scratch4,  \scratch4, 24
-
-    MIPS_2xUN8x4_MUL_2xUN8 \d1_8888,   \d2_8888, \
-                           \scratch3,  \scratch4, \
-                           \d1_8888,   \d2_8888, \
-                           \maskLSR, \
-                           \scratch5,  \scratch6, \out1_8888, \
-                           \out2_8888, \scratch3, \scratch4
-
-    addu_s.qb              \out1_8888, \d1_8888,  \scratch1
-    addu_s.qb              \out2_8888, \d2_8888,  \scratch2
-.endm
-
-/*
- * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8
- * destination pixel (d_8888). It also requires maskLSR needed for rounding
- * process. maskLSR must have following value:
- *   li       maskLSR, 0x00ff00ff
- */
-.macro OVER_8888_8888 s_8888,   \
-                      d_8888,   \
-                      out_8888, \
-                      maskLSR,  \
-                      scratch1, scratch2, scratch3, scratch4
-    not                \scratch1, \s_8888
-    srl                \scratch1, \scratch1, 24
-
-    MIPS_UN8x4_MUL_UN8 \d_8888,   \scratch1, \
-                       \out_8888, \maskLSR, \
-                       \scratch2, \scratch3, \scratch4
-
-    addu_s.qb          \out_8888, \out_8888, \s_8888
-.endm
-
-/*
- * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two
- * a8r8g8b8 destination pixels (d1_8888 and d2_8888). It also requires maskLSR
- * needed for rounding process. maskLSR must have following value:
- *   li       maskLSR, 0x00ff00ff
- */
-.macro OVER_2x8888_2x8888 s1_8888,   \
-                          s2_8888,   \
-                          d1_8888,   \
-                          d2_8888,   \
-                          out1_8888, \
-                          out2_8888, \
-                          maskLSR,   \
-                          scratch1, scratch2, scratch3, \
-                          scratch4, scratch5, scratch6
-    not                    \scratch1,  \s1_8888
-    srl                    \scratch1,  \scratch1,  24
-    not                    \scratch2,  \s2_8888
-    srl                    \scratch2,  \scratch2,  24
-    MIPS_2xUN8x4_MUL_2xUN8 \d1_8888,   \d2_8888, \
-                           \scratch1,  \scratch2,  \
-                           \out1_8888, \out2_8888, \
-                           \maskLSR, \
-                           \scratch3,  \scratch4, \scratch5, \
-                           \scratch6,  \d1_8888,  \d2_8888
-
-    addu_s.qb              \out1_8888, \out1_8888, \s1_8888
-    addu_s.qb              \out2_8888, \out2_8888, \s2_8888
-.endm
-
-.macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888,   \
-                                    m_8,      \
-                                    d_8888,   \
-                                    out_8888, \
-                                    maskLSR,  \
-                                    scratch1, scratch2, scratch3
-    MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \
-                       \out_8888, \maskLSR, \
-                       \scratch1, \scratch2, \scratch3
-
-    addu_s.qb          \out_8888, \out_8888, \d_8888
-.endm
-
-.macro MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 s1_8888,   \
-                             s2_8888,   \
-                             m1_8,      \
-                             m2_8,      \
-                             d1_8888,   \
-                             d2_8888,   \
-                             out1_8888, \
-                             out2_8888, \
-                             maskLSR,   \
-                             scratch1,  scratch2, scratch3, \
-                             scratch4, scratch5, scratch6
-    MIPS_2xUN8x4_MUL_2xUN8 \s1_8888,   \s2_8888, \
-                           \m1_8,      \m2_8, \
-                           \out1_8888, \out2_8888, \
-                           \maskLSR, \
-                           \scratch1,  \scratch2, \scratch3, \
-                           \scratch4,  \scratch5, \scratch6
-
-    addu_s.qb             \out1_8888, \out1_8888, \d1_8888
-    addu_s.qb             \out2_8888, \out2_8888, \d2_8888
-.endm
-
-.macro BILINEAR_INTERPOLATE_SINGLE_PIXEL tl, tr, bl, br,         \
-                                         scratch1, scratch2,     \
-                                         alpha, red, green, blue \
-                                         wt1, wt2, wb1, wb2
-    andi            \scratch1, \tl,  0xff
-    andi            \scratch2, \tr,  0xff
-    andi            \alpha,    \bl,  0xff
-    andi            \red,      \br,  0xff
-
-    multu           $ac0,      \wt1, \scratch1
-    maddu           $ac0,      \wt2, \scratch2
-    maddu           $ac0,      \wb1, \alpha
-    maddu           $ac0,      \wb2, \red
-
-    ext             \scratch1, \tl,  8, 8
-    ext             \scratch2, \tr,  8, 8
-    ext             \alpha,    \bl,  8, 8
-    ext             \red,      \br,  8, 8
-
-    multu           $ac1,      \wt1, \scratch1
-    maddu           $ac1,      \wt2, \scratch2
-    maddu           $ac1,      \wb1, \alpha
-    maddu           $ac1,      \wb2, \red
-
-    ext             \scratch1, \tl,  16, 8
-    ext             \scratch2, \tr,  16, 8
-    ext             \alpha,    \bl,  16, 8
-    ext             \red,      \br,  16, 8
-
-    mflo            \blue,     $ac0
-
-    multu           $ac2,      \wt1, \scratch1
-    maddu           $ac2,      \wt2, \scratch2
-    maddu           $ac2,      \wb1, \alpha
-    maddu           $ac2,      \wb2, \red
-
-    ext             \scratch1, \tl,  24, 8
-    ext             \scratch2, \tr,  24, 8
-    ext             \alpha,    \bl,  24, 8
-    ext             \red,      \br,  24, 8
-
-    mflo            \green,    $ac1
-
-    multu           $ac3,      \wt1, \scratch1
-    maddu           $ac3,      \wt2, \scratch2
-    maddu           $ac3,      \wb1, \alpha
-    maddu           $ac3,      \wb2, \red
-
-    mflo            \red,      $ac2
-    mflo            \alpha,    $ac3
-
-    precr.qb.ph     \alpha,    \alpha, \red
-    precr.qb.ph     \scratch1, \green, \blue
-    precrq.qb.ph    \tl,       \alpha, \scratch1
-.endm
-
-#endif //PIXMAN_MIPS_DSPR2_ASM_H
diff --git a/vendor/pixman/pixman/pixman-mips-dspr2.c b/vendor/pixman/pixman/pixman-mips-dspr2.c
deleted file mode 100644
index c43eb1e89..000000000
--- a/vendor/pixman/pixman/pixman-mips-dspr2.c
+++ /dev/null
@@ -1,459 +0,0 @@
-/*
- * Copyright (c) 2012
- *      MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Author:  Nemanja Lukic (nemanja.lukic@rt-rk.com)
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include "pixman-private.h"
-#include "pixman-mips-dspr2.h"
-
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_x888_8888,
-                                    uint32_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_8888_0565,
-                                    uint32_t, 1, uint16_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0565_8888,
-                                    uint16_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0565_0565,
-                                    uint16_t, 1, uint16_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888,
-                                    uint32_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888,
-                                    uint8_t, 3, uint8_t, 3)
-#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_8888_rev,
-                                    uint8_t, 3, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_0565_rev,
-                                    uint8_t, 3, uint16_t, 1)
-#endif
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_pixbuf_8888,
-                                    uint32_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_rpixbuf_8888,
-                                    uint32_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_8888,
-                                    uint32_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_0565,
-                                    uint32_t, 1, uint16_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8_8,
-                                    uint8_t, 1, uint8_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8888_8888,
-                                    uint32_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_0565,
-                                    uint8_t, 1, uint16_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_8888,
-                                    uint8_t,  1, uint32_t, 1)
-
-PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8888,
-                                       uint8_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8,
-                                       uint8_t, 1, uint8_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca,
-                                       uint32_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca,
-                                       uint32_t, 1, uint16_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8,
-                                       uint8_t, 1, uint8_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888,
-                                       uint8_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
-                                       uint8_t, 1, uint16_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, add_n_8_8,
-                                       uint8_t, 1, uint8_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, add_n_8_8888,
-                                       uint8_t, 1, uint32_t, 1)
-
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_8888_n_8888,
-                                      uint32_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_8888_n_0565,
-                                      uint32_t, 1, uint16_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_0565_n_0565,
-                                      uint16_t, 1, uint16_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, add_8888_n_8888,
-                                      uint32_t, 1, uint32_t, 1)
-
-PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_0565,
-                                  uint16_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_8888,
-                                  uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_reverse_n_8888,
-                                  uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_N_DST (0, in_n_8,
-                                  uint8_t, 1)
-
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8_8_8, uint8_t,  1,
-                                         uint8_t,  1, uint8_t,  1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8_8888, uint32_t, 1,
-                                         uint8_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8888_8888, uint32_t, 1,
-                                         uint32_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_0565_8_0565, uint16_t, 1,
-                                         uint8_t,  1, uint16_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_8888, uint32_t, 1,
-                                         uint8_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_0565, uint32_t, 1,
-                                         uint8_t, 1, uint16_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_0565_8_0565, uint16_t, 1,
-                                         uint8_t, 1, uint16_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8888_8888, uint32_t, 1,
-                                         uint32_t, 1, uint32_t, 1)
-
-PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_8888, OVER,
-                                         uint32_t, uint32_t)
-PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_0565, OVER,
-                                         uint32_t, uint16_t)
-PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (0565_8888, SRC,
-                                         uint16_t, uint32_t)
-
-PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC,
-                                          uint32_t, uint32_t)
-PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC,
-                                          uint32_t, uint16_t)
-PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_8888, SRC,
-                                          uint16_t, uint32_t)
-PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_0565, SRC,
-                                          uint16_t, uint16_t)
-PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, OVER,
-                                          uint32_t, uint32_t)
-PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, ADD,
-                                          uint32_t, uint32_t)
-
-PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_0565,
-                                            OVER, uint32_t, uint16_t)
-PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, 0565_8_0565,
-                                            OVER, uint16_t, uint16_t)
-
-PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_8888, SRC,
-                                             uint32_t, uint32_t)
-PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_0565, SRC,
-                                             uint32_t, uint16_t)
-PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 0565_8_x888, SRC,
-                                             uint16_t, uint32_t)
-PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 0565_8_0565, SRC,
-                                             uint16_t, uint16_t)
-PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_8888, OVER,
-                                             uint32_t, uint32_t)
-PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_8888, ADD,
-                                             uint32_t, uint32_t)
-
-static pixman_bool_t
-mips_dspr2_fill (pixman_implementation_t *imp,
-                 uint32_t *               bits,
-                 int                      stride,
-                 int                      bpp,
-                 int                      x,
-                 int                      y,
-                 int                      width,
-                 int                      height,
-                 uint32_t                 _xor)
-{
-    uint8_t *byte_line;
-    uint32_t byte_width;
-    switch (bpp)
-    {
-    case 16:
-        stride = stride * (int) sizeof (uint32_t) / 2;
-        byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
-        byte_width = width * 2;
-        stride *= 2;
-
-        while (height--)
-        {
-            uint8_t *dst = byte_line;
-            byte_line += stride;
-            pixman_fill_buff16_mips (dst, byte_width, _xor & 0xffff);
-        }
-        return TRUE;
-    case 32:
-        stride = stride * (int) sizeof (uint32_t) / 4;
-        byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
-        byte_width = width * 4;
-        stride *= 4;
-
-        while (height--)
-        {
-            uint8_t *dst = byte_line;
-            byte_line += stride;
-            pixman_fill_buff32_mips (dst, byte_width, _xor);
-        }
-        return TRUE;
-    default:
-        return FALSE;
-    }
-}
-
-static pixman_bool_t
-mips_dspr2_blt (pixman_implementation_t *imp,
-                uint32_t *               src_bits,
-                uint32_t *               dst_bits,
-                int                      src_stride,
-                int                      dst_stride,
-                int                      src_bpp,
-                int                      dst_bpp,
-                int                      src_x,
-                int                      src_y,
-                int                      dest_x,
-                int                      dest_y,
-                int                      width,
-                int                      height)
-{
-    if (src_bpp != dst_bpp)
-        return FALSE;
-
-    uint8_t *src_bytes;
-    uint8_t *dst_bytes;
-    uint32_t byte_width;
-
-    switch (src_bpp)
-    {
-    case 16:
-        src_stride = src_stride * (int) sizeof (uint32_t) / 2;
-        dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
-        src_bytes =(uint8_t *)(((uint16_t *)src_bits)
-                                          + src_stride * (src_y) + (src_x));
-        dst_bytes = (uint8_t *)(((uint16_t *)dst_bits)
-                                           + dst_stride * (dest_y) + (dest_x));
-        byte_width = width * 2;
-        src_stride *= 2;
-        dst_stride *= 2;
-
-        while (height--)
-        {
-            uint8_t *src = src_bytes;
-            uint8_t *dst = dst_bytes;
-            src_bytes += src_stride;
-            dst_bytes += dst_stride;
-            pixman_mips_fast_memcpy (dst, src, byte_width);
-        }
-        return TRUE;
-    case 32:
-        src_stride = src_stride * (int) sizeof (uint32_t) / 4;
-        dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
-        src_bytes = (uint8_t *)(((uint32_t *)src_bits)
-                                           + src_stride * (src_y) + (src_x));
-        dst_bytes = (uint8_t *)(((uint32_t *)dst_bits)
-                                           + dst_stride * (dest_y) + (dest_x));
-        byte_width = width * 4;
-        src_stride *= 4;
-        dst_stride *= 4;
-
-        while (height--)
-        {
-            uint8_t *src = src_bytes;
-            uint8_t *dst = dst_bytes;
-            src_bytes += src_stride;
-            dst_bytes += dst_stride;
-            pixman_mips_fast_memcpy (dst, src, byte_width);
-        }
-        return TRUE;
-    default:
-        return FALSE;
-    }
-}
-
-static const pixman_fast_path_t mips_dspr2_fast_paths[] =
-{
-    PIXMAN_STD_FAST_PATH (SRC, r5g6b5,   null, r5g6b5,   mips_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC, b5g6r5,   null, b5g6r5,   mips_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5,   mips_composite_src_8888_0565),
-    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5,   mips_composite_src_8888_0565),
-    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5,   mips_composite_src_8888_0565),
-    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5,   mips_composite_src_8888_0565),
-    PIXMAN_STD_FAST_PATH (SRC, r5g6b5,   null, a8r8g8b8, mips_composite_src_0565_8888),
-    PIXMAN_STD_FAST_PATH (SRC, r5g6b5,   null, x8r8g8b8, mips_composite_src_0565_8888),
-    PIXMAN_STD_FAST_PATH (SRC, b5g6r5,   null, a8b8g8r8, mips_composite_src_0565_8888),
-    PIXMAN_STD_FAST_PATH (SRC, b5g6r5,   null, x8b8g8r8, mips_composite_src_0565_8888),
-    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, mips_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, mips_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, mips_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, mips_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, r8g8b8,   null, r8g8b8,   mips_composite_src_0888_0888),
-#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-    PIXMAN_STD_FAST_PATH (SRC, b8g8r8,   null, x8r8g8b8, mips_composite_src_0888_8888_rev),
-    PIXMAN_STD_FAST_PATH (SRC, b8g8r8,   null, r5g6b5,   mips_composite_src_0888_0565_rev),
-#endif
-    PIXMAN_STD_FAST_PATH (SRC, pixbuf,   pixbuf,  a8r8g8b8, mips_composite_src_pixbuf_8888),
-    PIXMAN_STD_FAST_PATH (SRC, pixbuf,   pixbuf,  a8b8g8r8, mips_composite_src_rpixbuf_8888),
-    PIXMAN_STD_FAST_PATH (SRC, rpixbuf,  rpixbuf, a8r8g8b8, mips_composite_src_rpixbuf_8888),
-    PIXMAN_STD_FAST_PATH (SRC, rpixbuf,  rpixbuf, a8b8g8r8, mips_composite_src_pixbuf_8888),
-    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   a8r8g8b8, mips_composite_src_n_8_8888),
-    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   x8r8g8b8, mips_composite_src_n_8_8888),
-    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   a8b8g8r8, mips_composite_src_n_8_8888),
-    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   x8b8g8r8, mips_composite_src_n_8_8888),
-    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   a8,       mips_composite_src_n_8_8),
-
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, mips_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, mips_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, mips_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, mips_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5,   mips_composite_over_n_8888_0565_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5,   mips_composite_over_n_8888_0565_ca),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8,       mips_composite_over_n_8_8),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8r8g8b8, mips_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8r8g8b8, mips_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8b8g8r8, mips_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8b8g8r8, mips_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   mips_composite_over_n_8_0565),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   mips_composite_over_n_8_0565),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     r5g6b5,   mips_composite_over_n_0565),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     a8r8g8b8, mips_composite_over_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     x8r8g8b8, mips_composite_over_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    a8r8g8b8, mips_composite_over_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    x8r8g8b8, mips_composite_over_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    r5g6b5,   mips_composite_over_8888_n_0565),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid,    b5g6r5,   mips_composite_over_8888_n_0565),
-    PIXMAN_STD_FAST_PATH (OVER, r5g6b5,   solid,    r5g6b5,   mips_composite_over_0565_n_0565),
-    PIXMAN_STD_FAST_PATH (OVER, b5g6r5,   solid,    b5g6r5,   mips_composite_over_0565_n_0565),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       a8r8g8b8, mips_composite_over_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       x8r8g8b8, mips_composite_over_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       a8b8g8r8, mips_composite_over_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       x8b8g8r8, mips_composite_over_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       r5g6b5,   mips_composite_over_8888_8_0565),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       b5g6r5,   mips_composite_over_8888_8_0565),
-    PIXMAN_STD_FAST_PATH (OVER, r5g6b5,   a8,       r5g6b5,   mips_composite_over_0565_8_0565),
-    PIXMAN_STD_FAST_PATH (OVER, b5g6r5,   a8,       b5g6r5,   mips_composite_over_0565_8_0565),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, mips_composite_over_8888_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     a8r8g8b8, mips_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     x8r8g8b8, mips_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     a8b8g8r8, mips_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     x8b8g8r8, mips_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     r5g6b5,   mips_composite_over_8888_0565),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     b5g6r5,   mips_composite_over_8888_0565),
-    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8,       mips_composite_add_n_8_8),
-    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8r8g8b8, mips_composite_add_n_8_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8b8g8r8, mips_composite_add_n_8_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8,       a8,       a8,       mips_composite_add_8_8_8),
-    PIXMAN_STD_FAST_PATH (ADD,  r5g6b5,   a8,       r5g6b5,   mips_composite_add_0565_8_0565),
-    PIXMAN_STD_FAST_PATH (ADD,  b5g6r5,   a8,       b5g6r5,   mips_composite_add_0565_8_0565),
-    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8,       a8r8g8b8, mips_composite_add_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, a8,       a8b8g8r8, mips_composite_add_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8r8g8b8, a8r8g8b8, mips_composite_add_8888_8888_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, solid,    a8r8g8b8, mips_composite_add_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, solid,    a8b8g8r8, mips_composite_add_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8,       null,     a8,       mips_composite_add_8_8),
-    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     a8r8g8b8, mips_composite_add_8888_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null,     a8b8g8r8, mips_composite_add_8888_8888),
-    PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8,    null, r5g6b5,   mips_composite_out_reverse_8_0565),
-    PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8,    null, b5g6r5,   mips_composite_out_reverse_8_0565),
-    PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8,    null, a8r8g8b8, mips_composite_out_reverse_8_8888),
-    PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8,    null, a8b8g8r8, mips_composite_out_reverse_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, mips_composite_over_reverse_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, mips_composite_over_reverse_n_8888),
-    PIXMAN_STD_FAST_PATH (IN,           solid, null, a8,       mips_composite_in_n_8),
-
-    PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888),
-    PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mips_8888_8888),
-    PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888),
-    PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mips_8888_8888),
-
-    PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_0565),
-    PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_0565),
-
-    PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, mips_0565_8888),
-    PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888),
-    /* Note: NONE repeat is not supported yet */
-    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, mips_0565_8888),
-    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, mips_0565_8888),
-    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, mips_0565_8888),
-    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, mips_0565_8888),
-
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565),
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565),
-
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, mips_0565_8_0565),
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, mips_0565_8_0565),
-
-    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8888),
-
-    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, mips_8888_0565),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, mips_8888_0565),
-
-    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, mips_0565_0565),
-
-    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888),
-    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888),
-
-    SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, mips_8888_8888),
-    SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, mips_8888_8888),
-
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8_8888),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8_8888),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8_8888),
-
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, r5g6b5, mips_8888_8_0565),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, r5g6b5, mips_8888_8_0565),
-
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8_x888),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, mips_0565_8_0565),
-
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8_8888),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8_8888),
-
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, mips_8888_8_8888),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, mips_8888_8_8888),
-    { PIXMAN_OP_NONE },
-};
-
-static void
-mips_dspr2_combine_over_u (pixman_implementation_t *imp,
-                           pixman_op_t              op,
-                           uint32_t *               dest,
-                           const uint32_t *         src,
-                           const uint32_t *         mask,
-                           int                      width)
-{
-    if (mask)
-        pixman_composite_over_8888_8888_8888_asm_mips (
-            dest, (uint32_t *)src, (uint32_t *)mask, width);
-    else
-        pixman_composite_over_8888_8888_asm_mips (
-		    dest, (uint32_t *)src, width);
-}
-
-pixman_implementation_t *
-_pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback)
-{
-    pixman_implementation_t *imp =
-        _pixman_implementation_create (fallback, mips_dspr2_fast_paths);
-
-    imp->combine_32[PIXMAN_OP_OVER] = mips_dspr2_combine_over_u;
-
-    imp->blt = mips_dspr2_blt;
-    imp->fill = mips_dspr2_fill;
-
-    return imp;
-}
diff --git a/vendor/pixman/pixman/pixman-mips-dspr2.h b/vendor/pixman/pixman/pixman-mips-dspr2.h
deleted file mode 100644
index 57b38359e..000000000
--- a/vendor/pixman/pixman/pixman-mips-dspr2.h
+++ /dev/null
@@ -1,432 +0,0 @@
-/*
- * Copyright (c) 2012
- *      MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Author:  Nemanja Lukic (nemanja.lukic@rt-rk.com)
- */
-
-#ifndef PIXMAN_MIPS_DSPR2_H
-#define PIXMAN_MIPS_DSPR2_H
-
-#include "pixman-private.h"
-#include "pixman-inlines.h"
-
-#define SKIP_ZERO_SRC  1
-#define SKIP_ZERO_MASK 2
-#define DO_FAST_MEMCPY 3
-
-void
-pixman_mips_fast_memcpy (void *dst, void *src, uint32_t n_bytes);
-void
-pixman_fill_buff16_mips (void *dst, uint32_t n_bytes, uint16_t value);
-void
-pixman_fill_buff32_mips (void *dst, uint32_t n_bytes, uint32_t value);
-
-/****************************************************************/
-
-#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST(flags, name,          \
-                                           src_type, src_cnt,    \
-                                           dst_type, dst_cnt)    \
-void                                                             \
-pixman_composite_##name##_asm_mips (dst_type *dst,               \
-                                    src_type *src,               \
-                                    int32_t   w);                \
-                                                                 \
-static void                                                      \
-mips_composite_##name (pixman_implementation_t *imp,             \
-                       pixman_composite_info_t *info)            \
-{                                                                \
-    PIXMAN_COMPOSITE_ARGS (info);                                \
-    dst_type *dst_line, *dst;                                    \
-    src_type *src_line, *src;                                    \
-    int32_t dst_stride, src_stride;                              \
-    int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8;   \
-                                                                 \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,    \
-                           src_stride, src_line, src_cnt);       \
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
-                           dst_stride, dst_line, dst_cnt);       \
-                                                                 \
-    while (height--)                                             \
-    {                                                            \
-      dst = dst_line;                                            \
-      dst_line += dst_stride;                                    \
-      src = src_line;                                            \
-      src_line += src_stride;                                    \
-                                                                 \
-      if (flags == DO_FAST_MEMCPY)                               \
-        pixman_mips_fast_memcpy (dst, src, width * bpp);         \
-      else                                                       \
-        pixman_composite_##name##_asm_mips (dst, src, width);    \
-    }                                                            \
-}
-
-/****************************************************************/
-
-#define PIXMAN_MIPS_BIND_FAST_PATH_N_DST(flags, name,            \
-                                         dst_type, dst_cnt)      \
-void                                                             \
-pixman_composite_##name##_asm_mips (dst_type *dst,               \
-                                    uint32_t  src,               \
-                                    int32_t   w);                \
-                                                                 \
-static void                                                      \
-mips_composite_##name (pixman_implementation_t *imp,             \
-                       pixman_composite_info_t *info)            \
-{                                                                \
-    PIXMAN_COMPOSITE_ARGS (info);                                \
-    dst_type  *dst_line, *dst;                                   \
-    int32_t    dst_stride;                                       \
-    uint32_t   src;                                              \
-                                                                 \
-    src = _pixman_image_get_solid (                              \
-    imp, src_image, dest_image->bits.format);                    \
-                                                                 \
-    if ((flags & SKIP_ZERO_SRC) && src == 0)                     \
-        return;                                                  \
-                                                                 \
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
-                           dst_stride, dst_line, dst_cnt);       \
-                                                                 \
-    while (height--)                                             \
-    {                                                            \
-        dst = dst_line;                                          \
-        dst_line += dst_stride;                                  \
-                                                                 \
-        pixman_composite_##name##_asm_mips (dst, src, width);    \
-    }                                                            \
-}
-
-/*******************************************************************/
-
-#define PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST(flags, name,          \
-                                              mask_type, mask_cnt,  \
-                                              dst_type, dst_cnt)    \
-void                                                                \
-pixman_composite_##name##_asm_mips (dst_type  *dst,                 \
-                                    uint32_t  src,                  \
-                                    mask_type *mask,                \
-                                    int32_t   w);                   \
-                                                                    \
-static void                                                         \
-mips_composite_##name (pixman_implementation_t *imp,                \
-                       pixman_composite_info_t *info)               \
-{                                                                   \
-    PIXMAN_COMPOSITE_ARGS (info);                                   \
-    dst_type  *dst_line, *dst;                                      \
-    mask_type *mask_line, *mask;                                    \
-    int32_t    dst_stride, mask_stride;                             \
-    uint32_t   src;                                                 \
-                                                                    \
-    src = _pixman_image_get_solid (                                 \
-        imp, src_image, dest_image->bits.format);                   \
-                                                                    \
-    if ((flags & SKIP_ZERO_SRC) && src == 0)                        \
-        return;                                                     \
-                                                                    \
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type,    \
-                           dst_stride, dst_line, dst_cnt);          \
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,   \
-                           mask_stride, mask_line, mask_cnt);       \
-                                                                    \
-    while (height--)                                                \
-    {                                                               \
-        dst = dst_line;                                             \
-        dst_line += dst_stride;                                     \
-        mask = mask_line;                                           \
-        mask_line += mask_stride;                                   \
-        pixman_composite_##name##_asm_mips (dst, src, mask, width); \
-    }                                                               \
-}
-
-/*******************************************************************/
-
-#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST(flags, name,           \
-                                            src_type, src_cnt,      \
-                                            dst_type, dst_cnt)      \
-void                                                                \
-pixman_composite_##name##_asm_mips (dst_type  *dst,                 \
-                                    src_type  *src,                 \
-                                    uint32_t   mask,                \
-                                    int32_t    w);                  \
-                                                                    \
-static void                                                         \
-mips_composite_##name (pixman_implementation_t *imp,                \
-                       pixman_composite_info_t *info)               \
-{                                                                   \
-    PIXMAN_COMPOSITE_ARGS (info);                                   \
-    dst_type  *dst_line, *dst;                                      \
-    src_type  *src_line, *src;                                      \
-    int32_t    dst_stride, src_stride;                              \
-    uint32_t   mask;                                                \
-                                                                    \
-    mask = _pixman_image_get_solid (                                \
-        imp, mask_image, dest_image->bits.format);                  \
-                                                                    \
-    if ((flags & SKIP_ZERO_MASK) && mask == 0)                      \
-        return;                                                     \
-                                                                    \
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type,    \
-                           dst_stride, dst_line, dst_cnt);          \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,       \
-                           src_stride, src_line, src_cnt);          \
-                                                                    \
-    while (height--)                                                \
-    {                                                               \
-        dst = dst_line;                                             \
-        dst_line += dst_stride;                                     \
-        src = src_line;                                             \
-        src_line += src_stride;                                     \
-                                                                    \
-        pixman_composite_##name##_asm_mips (dst, src, mask, width); \
-    }                                                               \
-}
-
-/************************************************************************/
-
-#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST(name, src_type, src_cnt, \
-                                                mask_type, mask_cnt,     \
-                                                dst_type, dst_cnt)       \
-void                                                                     \
-pixman_composite_##name##_asm_mips (dst_type  *dst,                      \
-                                    src_type  *src,                      \
-                                    mask_type *mask,                     \
-                                    int32_t   w);                        \
-                                                                         \
-static void                                                              \
-mips_composite_##name (pixman_implementation_t *imp,                     \
-                       pixman_composite_info_t *info)                    \
-{                                                                        \
-    PIXMAN_COMPOSITE_ARGS (info);                                        \
-    dst_type  *dst_line, *dst;                                           \
-    src_type  *src_line, *src;                                           \
-    mask_type *mask_line, *mask;                                         \
-    int32_t    dst_stride, src_stride, mask_stride;                      \
-                                                                         \
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type,         \
-                           dst_stride, dst_line, dst_cnt);               \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,            \
-                           src_stride, src_line, src_cnt);               \
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,        \
-                           mask_stride, mask_line, mask_cnt);            \
-                                                                         \
-    while (height--)                                                     \
-    {                                                                    \
-        dst = dst_line;                                                  \
-        dst_line += dst_stride;                                          \
-        mask = mask_line;                                                \
-        mask_line += mask_stride;                                        \
-        src = src_line;                                                  \
-        src_line += src_stride;                                          \
-        pixman_composite_##name##_asm_mips (dst, src, mask, width);      \
-    }                                                                    \
-}
-
-/****************************************************************************/
-
-#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST(name, op,                    \
-                                                src_type, dst_type)          \
-void                                                                         \
-pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (                    \
-                                                   dst_type *       dst,     \
-                                                   const src_type * src,     \
-                                                   int32_t          w,       \
-                                                   pixman_fixed_t   vx,      \
-                                                   pixman_fixed_t   unit_x); \
-                                                                             \
-static force_inline void                                                     \
-scaled_nearest_scanline_mips_##name##_##op (dst_type *       pd,             \
-                                            const src_type * ps,             \
-                                            int32_t          w,              \
-                                            pixman_fixed_t   vx,             \
-                                            pixman_fixed_t   unit_x,         \
-                                            pixman_fixed_t   max_vx,         \
-                                            pixman_bool_t    zero_src)       \
-{                                                                            \
-    pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps, w,      \
-                                                             vx, unit_x);    \
-}                                                                            \
-                                                                             \
-FAST_NEAREST_MAINLOOP (mips_##name##_cover_##op,                             \
-                       scaled_nearest_scanline_mips_##name##_##op,           \
-                       src_type, dst_type, COVER)                            \
-FAST_NEAREST_MAINLOOP (mips_##name##_none_##op,                              \
-                       scaled_nearest_scanline_mips_##name##_##op,           \
-                       src_type, dst_type, NONE)                             \
-FAST_NEAREST_MAINLOOP (mips_##name##_pad_##op,                               \
-                       scaled_nearest_scanline_mips_##name##_##op,           \
-                       src_type, dst_type, PAD)
-
-/* Provide entries for the fast path table */
-#define PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                    \
-    SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),                            \
-    SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),                             \
-    SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
-
-
-/*****************************************************************************/
-
-#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST(flags, name, op,           \
-                                                  src_type, dst_type)         \
-void                                                                          \
-pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (                     \
-                                                   dst_type *       dst,      \
-                                                   const src_type * src,      \
-                                                   const uint8_t *  mask,     \
-                                                   int32_t          w,        \
-                                                   pixman_fixed_t   vx,       \
-                                                   pixman_fixed_t   unit_x);  \
-                                                                              \
-static force_inline void                                                      \
-scaled_nearest_scanline_mips_##name##_##op (const uint8_t *  mask,            \
-                                            dst_type *       pd,              \
-                                            const src_type * ps,              \
-                                            int32_t          w,               \
-                                            pixman_fixed_t   vx,              \
-                                            pixman_fixed_t   unit_x,          \
-                                            pixman_fixed_t   max_vx,          \
-                                            pixman_bool_t    zero_src)        \
-{                                                                             \
-    if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
-        return;                                                               \
-    pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps,          \
-                                                             mask, w,         \
-                                                             vx, unit_x);     \
-}                                                                             \
-                                                                              \
-FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_cover_##op,                       \
-                              scaled_nearest_scanline_mips_##name##_##op,     \
-                              src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\
-FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_none_##op,                        \
-                              scaled_nearest_scanline_mips_##name##_##op,     \
-                              src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \
-FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_pad_##op,                         \
-                              scaled_nearest_scanline_mips_##name##_##op,     \
-                              src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
-
-/****************************************************************************/
-
-#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op,            \
-                                                 src_type, dst_type)         \
-void                                                                         \
-pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips(                    \
-                                             dst_type *       dst,           \
-                                             const src_type * src_top,       \
-                                             const src_type * src_bottom,    \
-                                             int32_t          w,             \
-                                             int              wt,            \
-                                             int              wb,            \
-                                             pixman_fixed_t   vx,            \
-                                             pixman_fixed_t   unit_x);       \
-static force_inline void                                                     \
-scaled_bilinear_scanline_mips_##name##_##op (dst_type *       dst,           \
-                                             const uint32_t * mask,          \
-                                             const src_type * src_top,       \
-                                             const src_type * src_bottom,    \
-                                             int32_t          w,             \
-                                             int              wt,            \
-                                             int              wb,            \
-                                             pixman_fixed_t   vx,            \
-                                             pixman_fixed_t   unit_x,        \
-                                             pixman_fixed_t   max_vx,        \
-                                             pixman_bool_t    zero_src)      \
-{                                                                            \
-    if ((flags & SKIP_ZERO_SRC) && zero_src)                                 \
-        return;                                                              \
-    pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips (dst, src_top,  \
-                                                              src_bottom, w, \
-                                                              wt, wb,        \
-                                                              vx, unit_x);   \
-}                                                                            \
-                                                                             \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op,                     \
-                       scaled_bilinear_scanline_mips_##name##_##op,          \
-                       src_type, uint32_t, dst_type, COVER, FLAG_NONE)       \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op,                      \
-                       scaled_bilinear_scanline_mips_##name##_##op,          \
-                       src_type, uint32_t, dst_type, NONE, FLAG_NONE)        \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op,                       \
-                       scaled_bilinear_scanline_mips_##name##_##op,          \
-                       src_type, uint32_t, dst_type, PAD, FLAG_NONE)         \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op,                    \
-                       scaled_bilinear_scanline_mips_##name##_##op,          \
-                       src_type, uint32_t, dst_type, NORMAL,                 \
-                       FLAG_NONE)
-
-/*****************************************************************************/
-
-#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, name, op,          \
-                                                src_type, dst_type)           \
-void                                                                          \
-pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips (                    \
-                                             dst_type *       dst,            \
-                                             const uint8_t *  mask,           \
-                                             const src_type * top,            \
-                                             const src_type * bottom,         \
-                                             int              wt,             \
-                                             int              wb,             \
-                                             pixman_fixed_t   x,              \
-                                             pixman_fixed_t   ux,             \
-                                             int              width);         \
-                                                                              \
-static force_inline void                                                      \
-scaled_bilinear_scanline_mips_##name##_##op (dst_type *       dst,            \
-                                             const uint8_t *  mask,           \
-                                             const src_type * src_top,        \
-                                             const src_type * src_bottom,     \
-                                             int32_t          w,              \
-                                             int              wt,             \
-                                             int              wb,             \
-                                             pixman_fixed_t   vx,             \
-                                             pixman_fixed_t   unit_x,         \
-                                             pixman_fixed_t   max_vx,         \
-                                             pixman_bool_t    zero_src)       \
-{                                                                             \
-    if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
-        return;                                                               \
-    pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips (                \
-                      dst, mask, src_top, src_bottom, wt, wb, vx, unit_x, w); \
-}                                                                             \
-                                                                              \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op,                      \
-                       scaled_bilinear_scanline_mips_##name##_##op,           \
-                       src_type, uint8_t, dst_type, COVER,                    \
-                       FLAG_HAVE_NON_SOLID_MASK)                              \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op,                       \
-                       scaled_bilinear_scanline_mips_##name##_##op,           \
-                       src_type, uint8_t, dst_type, NONE,                     \
-                       FLAG_HAVE_NON_SOLID_MASK)                              \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op,                        \
-                       scaled_bilinear_scanline_mips_##name##_##op,           \
-                       src_type, uint8_t, dst_type, PAD,                      \
-                       FLAG_HAVE_NON_SOLID_MASK)                              \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op,                     \
-                       scaled_bilinear_scanline_mips_##name##_##op,           \
-                       src_type, uint8_t, dst_type, NORMAL,                   \
-                       FLAG_HAVE_NON_SOLID_MASK)
-
-#endif //PIXMAN_MIPS_DSPR2_H
diff --git a/vendor/pixman/pixman/pixman-mips-memcpy-asm.S b/vendor/pixman/pixman/pixman-mips-memcpy-asm.S
deleted file mode 100644
index 9ad6da537..000000000
--- a/vendor/pixman/pixman/pixman-mips-memcpy-asm.S
+++ /dev/null
@@ -1,382 +0,0 @@
-/*
- * Copyright (c) 2012
- *      MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "pixman-mips-dspr2-asm.h"
-
-/*
- * This routine could be optimized for MIPS64. The current code only
- * uses MIPS32 instructions.
- */
-
-#ifdef EB
-#  define LWHI	lwl		/* high part is left in big-endian */
-#  define SWHI	swl		/* high part is left in big-endian */
-#  define LWLO	lwr		/* low part is right in big-endian */
-#  define SWLO	swr		/* low part is right in big-endian */
-#else
-#  define LWHI	lwr		/* high part is right in little-endian */
-#  define SWHI	swr		/* high part is right in little-endian */
-#  define LWLO	lwl		/* low part is left in big-endian */
-#  define SWLO	swl		/* low part is left in big-endian */
-#endif
-
-LEAF_MIPS32R2(pixman_mips_fast_memcpy)
-
-	slti	AT, a2, 8
-	bne	AT, zero, $last8
-	move	v0, a0	/* memcpy returns the dst pointer */
-
-/* Test if the src and dst are word-aligned, or can be made word-aligned */
-	xor	t8, a1, a0
-	andi	t8, t8, 0x3		/* t8 is a0/a1 word-displacement */
-
-	bne	t8, zero, $unaligned
-	negu	a3, a0
-
-	andi	a3, a3, 0x3	/* we need to copy a3 bytes to make a0/a1 aligned */
-	beq	a3, zero, $chk16w	/* when a3=0 then the dst (a0) is word-aligned */
-	subu	a2, a2, a3	/* now a2 is the remining bytes count */
-
-	LWHI	t8, 0(a1)
-	addu	a1, a1, a3
-	SWHI	t8, 0(a0)
-	addu	a0, a0, a3
-
-/* Now the dst/src are mutually word-aligned with word-aligned addresses */
-$chk16w:	andi	t8, a2, 0x3f	/* any whole 64-byte chunks? */
-				/* t8 is the byte count after 64-byte chunks */
-
-	beq	a2, t8, $chk8w	/* if a2==t8, no 64-byte chunks */
-				/* There will be at most 1 32-byte chunk after it */
-	subu	a3, a2, t8	/* subtract from a2 the reminder */
-                                /* Here a3 counts bytes in 16w chunks */
-	addu	a3, a0, a3	/* Now a3 is the final dst after 64-byte chunks */
-
-	addu	t0, a0, a2	/* t0 is the "past the end" address */
-
-/*
- * When in the loop we exercise "pref 30, x(a0)", the a0+x should not be past
- * the "t0-32" address
- * This means: for x=128 the last "safe" a0 address is "t0-160"
- * Alternatively, for x=64 the last "safe" a0 address is "t0-96"
- * In the current version we use "pref 30, 128(a0)", so "t0-160" is the limit
- */
-	subu	t9, t0, 160	/* t9 is the "last safe pref 30, 128(a0)" address */
-
-	pref    0, 0(a1)		/* bring the first line of src, addr 0 */
-	pref    0, 32(a1)	/* bring the second line of src, addr 32 */
-	pref    0, 64(a1)	/* bring the third line of src, addr 64 */
-	pref	30, 32(a0)	/* safe, as we have at least 64 bytes ahead */
-/* In case the a0 > t9 don't use "pref 30" at all */
-	sgtu	v1, a0, t9
-	bgtz	v1, $loop16w	/* skip "pref 30, 64(a0)" for too short arrays */
-	nop
-/* otherwise, start with using pref30 */
-	pref	30, 64(a0)
-$loop16w:
-	pref	0, 96(a1)
-	lw	t0, 0(a1)
-	bgtz	v1, $skip_pref30_96	/* skip "pref 30, 96(a0)" */
-	lw	t1, 4(a1)
-	pref    30, 96(a0)   /* continue setting up the dest, addr 96 */
-$skip_pref30_96:
-	lw	t2, 8(a1)
-	lw	t3, 12(a1)
-	lw	t4, 16(a1)
-	lw	t5, 20(a1)
-	lw	t6, 24(a1)
-	lw	t7, 28(a1)
-        pref    0, 128(a1)    /* bring the next lines of src, addr 128 */
-
-	sw	t0, 0(a0)
-	sw	t1, 4(a0)
-	sw	t2, 8(a0)
-	sw	t3, 12(a0)
-	sw	t4, 16(a0)
-	sw	t5, 20(a0)
-	sw	t6, 24(a0)
-	sw	t7, 28(a0)
-
-	lw	t0, 32(a1)
-	bgtz	v1, $skip_pref30_128	/* skip "pref 30, 128(a0)" */
-	lw	t1, 36(a1)
-	pref    30, 128(a0)   /* continue setting up the dest, addr 128 */
-$skip_pref30_128:
-	lw	t2, 40(a1)
-	lw	t3, 44(a1)
-	lw	t4, 48(a1)
-	lw	t5, 52(a1)
-	lw	t6, 56(a1)
-	lw	t7, 60(a1)
-        pref    0, 160(a1)    /* bring the next lines of src, addr 160 */
-
-	sw	t0, 32(a0)
-	sw	t1, 36(a0)
-	sw	t2, 40(a0)
-	sw	t3, 44(a0)
-	sw	t4, 48(a0)
-	sw	t5, 52(a0)
-	sw	t6, 56(a0)
-	sw	t7, 60(a0)
-
-	addiu	a0, a0, 64	/* adding 64 to dest */
-	sgtu	v1, a0, t9
-	bne	a0, a3, $loop16w
-	addiu	a1, a1, 64	/* adding 64 to src */
-	move	a2, t8
-
-/* Here we have src and dest word-aligned but less than 64-bytes to go */
-
-$chk8w:
-	pref 0, 0x0(a1)
-	andi	t8, a2, 0x1f	/* is there a 32-byte chunk? */
-				/* the t8 is the reminder count past 32-bytes */
-	beq	a2, t8, $chk1w	/* when a2=t8, no 32-byte chunk */
-	 nop
-
-	lw	t0, 0(a1)
-	lw	t1, 4(a1)
-	lw	t2, 8(a1)
-	lw	t3, 12(a1)
-	lw	t4, 16(a1)
-	lw	t5, 20(a1)
-	lw	t6, 24(a1)
-	lw	t7, 28(a1)
-	addiu	a1, a1, 32
-
-	sw	t0, 0(a0)
-	sw	t1, 4(a0)
-	sw	t2, 8(a0)
-	sw	t3, 12(a0)
-	sw	t4, 16(a0)
-	sw	t5, 20(a0)
-	sw	t6, 24(a0)
-	sw	t7, 28(a0)
-	addiu	a0, a0, 32
-
-$chk1w:
-	andi	a2, t8, 0x3	/* now a2 is the reminder past 1w chunks */
-	beq	a2, t8, $last8
-	subu	a3, t8, a2	/* a3 is count of bytes in 1w chunks */
-	addu	a3, a0, a3	/* now a3 is the dst address past the 1w chunks */
-
-/* copying in words (4-byte chunks) */
-$wordCopy_loop:
-	lw	t3, 0(a1)	/* the first t3 may be equal t0 ... optimize? */
-	addiu	a1, a1, 4
-	addiu	a0, a0, 4
-	bne	a0, a3, $wordCopy_loop
-	sw	t3, -4(a0)
-
-/* For the last (<8) bytes */
-$last8:
-	blez	a2, leave
-	addu	a3, a0, a2	/* a3 is the last dst address */
-$last8loop:
-	lb	v1, 0(a1)
-	addiu	a1, a1, 1
-	addiu	a0, a0, 1
-	bne	a0, a3, $last8loop
-	sb	v1, -1(a0)
-
-leave:	j	ra
-	nop
-
-/*
- * UNALIGNED case
- */
-
-$unaligned:
-	/* got here with a3="negu a0" */
-	andi	a3, a3, 0x3	/* test if the a0 is word aligned */
-	beqz	a3, $ua_chk16w
-	subu	a2, a2, a3	/* bytes left after initial a3 bytes */
-
-	LWHI	v1, 0(a1)
-	LWLO	v1, 3(a1)
-	addu	a1, a1, a3	/* a3 may be here 1, 2 or 3 */
-	SWHI	v1, 0(a0)
-	addu	a0, a0, a3	/* below the dst will be word aligned (NOTE1) */
-
-$ua_chk16w:	andi	t8, a2, 0x3f	/* any whole 64-byte chunks? */
-				/* t8 is the byte count after 64-byte chunks */
-	beq	a2, t8, $ua_chk8w	/* if a2==t8, no 64-byte chunks */
-				/* There will be at most 1 32-byte chunk after it */
-	subu	a3, a2, t8	/* subtract from a2 the reminder */
-                                /* Here a3 counts bytes in 16w chunks */
-	addu	a3, a0, a3	/* Now a3 is the final dst after 64-byte chunks */
-
-	addu	t0, a0, a2	/* t0 is the "past the end" address */
-
-	subu	t9, t0, 160	/* t9 is the "last safe pref 30, 128(a0)" address */
-
-	pref    0, 0(a1)		/* bring the first line of src, addr 0 */
-	pref    0, 32(a1)	/* bring the second line of src, addr 32 */
-	pref    0, 64(a1)	/* bring the third line of src, addr 64 */
-	pref	30, 32(a0)	/* safe, as we have at least 64 bytes ahead */
-/* In case the a0 > t9 don't use "pref 30" at all */
-	sgtu	v1, a0, t9
-	bgtz	v1, $ua_loop16w	/* skip "pref 30, 64(a0)" for too short arrays */
-	nop
-/* otherwise,  start with using pref30 */
-	pref	30, 64(a0)
-$ua_loop16w:
-	pref	0, 96(a1)
-	LWHI	t0, 0(a1)
-	LWLO	t0, 3(a1)
-	LWHI	t1, 4(a1)
-	bgtz	v1, $ua_skip_pref30_96
-	LWLO	t1, 7(a1)
-	pref    30, 96(a0)   /* continue setting up the dest, addr 96 */
-$ua_skip_pref30_96:
-	LWHI	t2, 8(a1)
-	LWLO	t2, 11(a1)
-	LWHI	t3, 12(a1)
-	LWLO	t3, 15(a1)
-	LWHI	t4, 16(a1)
-	LWLO	t4, 19(a1)
-	LWHI	t5, 20(a1)
-	LWLO	t5, 23(a1)
-	LWHI	t6, 24(a1)
-	LWLO	t6, 27(a1)
-	LWHI	t7, 28(a1)
-	LWLO	t7, 31(a1)
-        pref    0, 128(a1)    /* bring the next lines of src, addr 128 */
-
-	sw	t0, 0(a0)
-	sw	t1, 4(a0)
-	sw	t2, 8(a0)
-	sw	t3, 12(a0)
-	sw	t4, 16(a0)
-	sw	t5, 20(a0)
-	sw	t6, 24(a0)
-	sw	t7, 28(a0)
-
-	LWHI	t0, 32(a1)
-	LWLO	t0, 35(a1)
-	LWHI	t1, 36(a1)
-	bgtz	v1, $ua_skip_pref30_128
-	LWLO	t1, 39(a1)
-	pref    30, 128(a0)   /* continue setting up the dest, addr 128 */
-$ua_skip_pref30_128:
-	LWHI	t2, 40(a1)
-	LWLO	t2, 43(a1)
-	LWHI	t3, 44(a1)
-	LWLO	t3, 47(a1)
-	LWHI	t4, 48(a1)
-	LWLO	t4, 51(a1)
-	LWHI	t5, 52(a1)
-	LWLO	t5, 55(a1)
-	LWHI	t6, 56(a1)
-	LWLO	t6, 59(a1)
-	LWHI	t7, 60(a1)
-	LWLO	t7, 63(a1)
-        pref    0, 160(a1)    /* bring the next lines of src, addr 160 */
-
-	sw	t0, 32(a0)
-	sw	t1, 36(a0)
-	sw	t2, 40(a0)
-	sw	t3, 44(a0)
-	sw	t4, 48(a0)
-	sw	t5, 52(a0)
-	sw	t6, 56(a0)
-	sw	t7, 60(a0)
-
-	addiu	a0, a0, 64	/* adding 64 to dest */
-	sgtu	v1, a0, t9
-	bne	a0, a3, $ua_loop16w
-	addiu	a1, a1, 64	/* adding 64 to src */
-	move	a2, t8
-
-/* Here we have src and dest word-aligned but less than 64-bytes to go */
-
-$ua_chk8w:
-	pref 0, 0x0(a1)
-	andi	t8, a2, 0x1f	/* is there a 32-byte chunk? */
-				/* the t8 is the reminder count */
-	beq	a2, t8, $ua_chk1w	/* when a2=t8, no 32-byte chunk */
-
-	LWHI	t0, 0(a1)
-	LWLO	t0, 3(a1)
-	LWHI	t1, 4(a1)
-	LWLO	t1, 7(a1)
-	LWHI	t2, 8(a1)
-	LWLO	t2, 11(a1)
-	LWHI	t3, 12(a1)
-	LWLO	t3, 15(a1)
-	LWHI	t4, 16(a1)
-	LWLO	t4, 19(a1)
-	LWHI	t5, 20(a1)
-	LWLO	t5, 23(a1)
-	LWHI	t6, 24(a1)
-	LWLO	t6, 27(a1)
-	LWHI	t7, 28(a1)
-	LWLO	t7, 31(a1)
-	addiu	a1, a1, 32
-
-	sw	t0, 0(a0)
-	sw	t1, 4(a0)
-	sw	t2, 8(a0)
-	sw	t3, 12(a0)
-	sw	t4, 16(a0)
-	sw	t5, 20(a0)
-	sw	t6, 24(a0)
-	sw	t7, 28(a0)
-	addiu	a0, a0, 32
-
-$ua_chk1w:
-	andi	a2, t8, 0x3	/* now a2 is the reminder past 1w chunks */
-	beq	a2, t8, $ua_smallCopy
-	subu	a3, t8, a2	/* a3 is count of bytes in 1w chunks */
-	addu	a3, a0, a3	/* now a3 is the dst address past the 1w chunks */
-
-/* copying in words (4-byte chunks) */
-$ua_wordCopy_loop:
-	LWHI	v1, 0(a1)
-	LWLO	v1, 3(a1)
-	addiu	a1, a1, 4
-	addiu	a0, a0, 4		/* note: dst=a0 is word aligned here, see NOTE1 */
-	bne	a0, a3, $ua_wordCopy_loop
-	sw	v1, -4(a0)
-
-/* Now less than 4 bytes (value in a2) left to copy */
-$ua_smallCopy:
-	beqz	a2, leave
-	addu	a3, a0, a2	/* a3 is the last dst address */
-$ua_smallCopy_loop:
-	lb	v1, 0(a1)
-	addiu	a1, a1, 1
-	addiu	a0, a0, 1
-	bne	a0, a3, $ua_smallCopy_loop
-	sb	v1, -1(a0)
-
-	j	ra
-	nop
-
-END(pixman_mips_fast_memcpy)
diff --git a/vendor/pixman/pixman/pixman-mips.c b/vendor/pixman/pixman/pixman-mips.c
deleted file mode 100644
index 7479a0884..000000000
--- a/vendor/pixman/pixman/pixman-mips.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  SuSE makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include "pixman-private.h"
-
-#if defined(USE_MIPS_DSPR2) || defined(USE_LOONGSON_MMI)
-
-#include <string.h>
-#include <stdlib.h>
-
-static pixman_bool_t
-have_feature (const char *search_string)
-{
-#if defined (__linux__) /* linux ELF */
-    /* Simple detection of MIPS features at runtime for Linux.
-     * It is based on /proc/cpuinfo, which reveals hardware configuration
-     * to user-space applications.  According to MIPS (early 2010), no similar
-     * facility is universally available on the MIPS architectures, so it's up
-     * to individual OSes to provide such.
-     */
-    const char *file_name = "/proc/cpuinfo";
-    char cpuinfo_line[256];
-    FILE *f = NULL;
-
-    if ((f = fopen (file_name, "r")) == NULL)
-        return FALSE;
-
-    while (fgets (cpuinfo_line, sizeof (cpuinfo_line), f) != NULL)
-    {
-        if (strstr (cpuinfo_line, search_string) != NULL)
-        {
-            fclose (f);
-            return TRUE;
-        }
-    }
-
-    fclose (f);
-#endif
-
-    /* Did not find string in the proc file, or not Linux ELF. */
-    return FALSE;
-}
-
-#endif
-
-pixman_implementation_t *
-_pixman_mips_get_implementations (pixman_implementation_t *imp)
-{
-#ifdef USE_LOONGSON_MMI
-    /* I really don't know if some Loongson CPUs don't have MMI. */
-    if (!_pixman_disabled ("loongson-mmi") && have_feature ("Loongson"))
-	imp = _pixman_implementation_create_mmx (imp);
-#endif
-
-#ifdef USE_MIPS_DSPR2
-    if (!_pixman_disabled ("mips-dspr2"))
-    {
-	int already_compiling_everything_for_dspr2 = 0;
-#if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-	already_compiling_everything_for_dspr2 = 1;
-#endif
-	if (already_compiling_everything_for_dspr2 ||
-	    /* Only currently available MIPS core that supports DSPr2 is 74K. */
-	    have_feature ("MIPS 74K"))
-	{
-	    imp = _pixman_implementation_create_mips_dspr2 (imp);
-	}
-    }
-#endif
-
-    return imp;
-}
diff --git a/vendor/pixman/pixman/pixman-mmx.c b/vendor/pixman/pixman/pixman-mmx.c
deleted file mode 100644
index 3a85616b7..000000000
--- a/vendor/pixman/pixman/pixman-mmx.c
+++ /dev/null
@@ -1,4153 +0,0 @@
-/*
- * Copyright © 2004, 2005 Red Hat, Inc.
- * Copyright © 2004 Nicholas Miell
- * Copyright © 2005 Trolltech AS
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Red Hat not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  Red Hat makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author:  Søren Sandmann (sandmann@redhat.com)
- * Minor Improvements: Nicholas Miell (nmiell@gmail.com)
- * MMX code paths for fbcompose.c by Lars Knoll (lars@trolltech.com)
- *
- * Based on work by Owen Taylor
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#if defined USE_X86_MMX || defined USE_ARM_IWMMXT || defined USE_LOONGSON_MMI
-
-#ifdef USE_LOONGSON_MMI
-#include <loongson-mmintrin.h>
-#else
-#include <mmintrin.h>
-#endif
-#include "pixman-private.h"
-#include "pixman-combine32.h"
-#include "pixman-inlines.h"
-
-#ifdef VERBOSE
-#define CHECKPOINT() error_f ("at %s %d\n", __FUNCTION__, __LINE__)
-#else
-#define CHECKPOINT()
-#endif
-
-#if defined USE_ARM_IWMMXT && __GNUC__ == 4 && __GNUC_MINOR__ < 8
-/* Empty the multimedia state. For some reason, ARM's mmintrin.h doesn't provide this.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_empty (void)
-{
-
-}
-#endif
-
-#ifdef USE_X86_MMX
-# if (defined(__SSE2__) || defined(__SUNPRO_C) || defined(_MSC_VER) || defined(_WIN64))
-#  include <xmmintrin.h>
-# else
-/* We have to compile with -msse to use xmmintrin.h, but that causes SSE
- * instructions to be generated that we don't want. Just duplicate the
- * functions we want to use.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movemask_pi8 (__m64 __A)
-{
-    int ret;
-
-    asm ("pmovmskb %1, %0\n\t"
-	: "=r" (ret)
-	: "y" (__A)
-    );
-
-    return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mulhi_pu16 (__m64 __A, __m64 __B)
-{
-    asm ("pmulhuw %1, %0\n\t"
-	: "+y" (__A)
-	: "y" (__B)
-    );
-    return __A;
-}
-
-# define _mm_shuffle_pi16(A, N)						\
-    ({									\
-	__m64 ret;							\
-									\
-	asm ("pshufw %2, %1, %0\n\t"					\
-	     : "=y" (ret)						\
-	     : "y" (A), "K" ((const int8_t)N)				\
-	);								\
-									\
-	ret;								\
-    })
-# endif
-#endif
-
-#ifndef _MSC_VER
-#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
- (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
-#endif
-
-/* Notes about writing mmx code
- *
- * give memory operands as the second operand. If you give it as the
- * first, gcc will first load it into a register, then use that
- * register
- *
- *   ie. use
- *
- *         _mm_mullo_pi16 (x, mmx_constant);
- *
- *   not
- *
- *         _mm_mullo_pi16 (mmx_constant, x);
- *
- * Also try to minimize dependencies. i.e. when you need a value, try
- * to calculate it from a value that was calculated as early as
- * possible.
- */
-
-/* --------------- MMX primitives ------------------------------------- */
-
-/* If __m64 is defined as a struct or union, then define M64_MEMBER to be
- * the name of the member used to access the data.
- * If __m64 requires using mm_cvt* intrinsics functions to convert between
- * uint64_t and __m64 values, then define USE_CVT_INTRINSICS.
- * If __m64 and uint64_t values can just be cast to each other directly,
- * then define USE_M64_CASTS.
- * If __m64 is a double datatype, then define USE_M64_DOUBLE.
- */
-#ifdef _MSC_VER
-# define M64_MEMBER m64_u64
-#elif defined(__ICC)
-# define USE_CVT_INTRINSICS
-#elif defined(USE_LOONGSON_MMI)
-# define USE_M64_DOUBLE
-#elif defined(__GNUC__)
-# define USE_M64_CASTS
-#elif defined(__SUNPRO_C)
-# if (__SUNPRO_C >= 0x5120) && !defined(__NOVECTORSIZE__)
-/* Solaris Studio 12.3 (Sun C 5.12) introduces __attribute__(__vector_size__)
- * support, and defaults to using it to define __m64, unless __NOVECTORSIZE__
- * is defined.   If it is used, then the mm_cvt* intrinsics must be used.
- */
-#  define USE_CVT_INTRINSICS
-# else
-/* For Studio 12.2 or older, or when __attribute__(__vector_size__) is
- * disabled, __m64 is defined as a struct containing "unsigned long long l_".
- */
-#  define M64_MEMBER l_
-# endif
-#endif
-
-#if defined(USE_M64_CASTS) || defined(USE_CVT_INTRINSICS) || defined(USE_M64_DOUBLE)
-typedef uint64_t mmxdatafield;
-#else
-typedef __m64 mmxdatafield;
-#endif
-
-typedef struct
-{
-    mmxdatafield mmx_4x00ff;
-    mmxdatafield mmx_4x0080;
-    mmxdatafield mmx_565_rgb;
-    mmxdatafield mmx_565_unpack_multiplier;
-    mmxdatafield mmx_565_pack_multiplier;
-    mmxdatafield mmx_565_r;
-    mmxdatafield mmx_565_g;
-    mmxdatafield mmx_565_b;
-    mmxdatafield mmx_packed_565_rb;
-    mmxdatafield mmx_packed_565_g;
-    mmxdatafield mmx_expand_565_g;
-    mmxdatafield mmx_expand_565_b;
-    mmxdatafield mmx_expand_565_r;
-#ifndef USE_LOONGSON_MMI
-    mmxdatafield mmx_mask_0;
-    mmxdatafield mmx_mask_1;
-    mmxdatafield mmx_mask_2;
-    mmxdatafield mmx_mask_3;
-#endif
-    mmxdatafield mmx_full_alpha;
-    mmxdatafield mmx_4x0101;
-    mmxdatafield mmx_ff000000;
-} mmx_data_t;
-
-#if defined(_MSC_VER)
-# define MMXDATA_INIT(field, val) { val ## UI64 }
-#elif defined(M64_MEMBER)       /* __m64 is a struct, not an integral type */
-# define MMXDATA_INIT(field, val) field =   { val ## ULL }
-#else                           /* mmxdatafield is an integral type */
-# define MMXDATA_INIT(field, val) field =   val ## ULL
-#endif
-
-static const mmx_data_t c =
-{
-    MMXDATA_INIT (.mmx_4x00ff,                   0x00ff00ff00ff00ff),
-    MMXDATA_INIT (.mmx_4x0080,                   0x0080008000800080),
-    MMXDATA_INIT (.mmx_565_rgb,                  0x000001f0003f001f),
-    MMXDATA_INIT (.mmx_565_unpack_multiplier,    0x0000008404100840),
-    MMXDATA_INIT (.mmx_565_pack_multiplier,      0x2000000420000004),
-    MMXDATA_INIT (.mmx_565_r,                    0x000000f800000000),
-    MMXDATA_INIT (.mmx_565_g,                    0x0000000000fc0000),
-    MMXDATA_INIT (.mmx_565_b,                    0x00000000000000f8),
-    MMXDATA_INIT (.mmx_packed_565_rb,            0x00f800f800f800f8),
-    MMXDATA_INIT (.mmx_packed_565_g,             0x0000fc000000fc00),
-    MMXDATA_INIT (.mmx_expand_565_g,             0x07e007e007e007e0),
-    MMXDATA_INIT (.mmx_expand_565_b,             0x001f001f001f001f),
-    MMXDATA_INIT (.mmx_expand_565_r,             0xf800f800f800f800),
-#ifndef USE_LOONGSON_MMI
-    MMXDATA_INIT (.mmx_mask_0,                   0xffffffffffff0000),
-    MMXDATA_INIT (.mmx_mask_1,                   0xffffffff0000ffff),
-    MMXDATA_INIT (.mmx_mask_2,                   0xffff0000ffffffff),
-    MMXDATA_INIT (.mmx_mask_3,                   0x0000ffffffffffff),
-#endif
-    MMXDATA_INIT (.mmx_full_alpha,               0x00ff000000000000),
-    MMXDATA_INIT (.mmx_4x0101,                   0x0101010101010101),
-    MMXDATA_INIT (.mmx_ff000000,                 0xff000000ff000000),
-};
-
-#ifdef USE_CVT_INTRINSICS
-#    define MC(x) to_m64 (c.mmx_ ## x)
-#elif defined(USE_M64_CASTS)
-#    define MC(x) ((__m64)c.mmx_ ## x)
-#elif defined(USE_M64_DOUBLE)
-#    define MC(x) (*(__m64 *)&c.mmx_ ## x)
-#else
-#    define MC(x) c.mmx_ ## x
-#endif
-
-static force_inline __m64
-to_m64 (uint64_t x)
-{
-#ifdef USE_CVT_INTRINSICS
-    return _mm_cvtsi64_m64 (x);
-#elif defined M64_MEMBER        /* __m64 is a struct, not an integral type */
-    __m64 res;
-
-    res.M64_MEMBER = x;
-    return res;
-#elif defined USE_M64_DOUBLE
-    return *(__m64 *)&x;
-#else /* USE_M64_CASTS */
-    return (__m64)x;
-#endif
-}
-
-static force_inline uint64_t
-to_uint64 (__m64 x)
-{
-#ifdef USE_CVT_INTRINSICS
-    return _mm_cvtm64_si64 (x);
-#elif defined M64_MEMBER        /* __m64 is a struct, not an integral type */
-    uint64_t res = x.M64_MEMBER;
-    return res;
-#elif defined USE_M64_DOUBLE
-    return *(uint64_t *)&x;
-#else /* USE_M64_CASTS */
-    return (uint64_t)x;
-#endif
-}
-
-static force_inline __m64
-shift (__m64 v,
-       int   s)
-{
-    if (s > 0)
-	return _mm_slli_si64 (v, s);
-    else if (s < 0)
-	return _mm_srli_si64 (v, -s);
-    else
-	return v;
-}
-
-static force_inline __m64
-negate (__m64 mask)
-{
-    return _mm_xor_si64 (mask, MC (4x00ff));
-}
-
-/* Computes the product of two unsigned fixed-point 8-bit values from 0 to 1
- * and maps its result to the same range.
- *
- * Jim Blinn gives multiple ways to compute this in "Jim Blinn's Corner:
- * Notation, Notation, Notation", the first of which is
- *
- *   prod(a, b) = (a * b + 128) / 255.
- *
- * By approximating the division by 255 as 257/65536 it can be replaced by a
- * multiply and a right shift. This is the implementation that we use in
- * pix_multiply(), but we _mm_mulhi_pu16() by 257 (part of SSE1 or Extended
- * 3DNow!, and unavailable at the time of the book's publication) to perform
- * the multiplication and right shift in a single operation.
- *
- *   prod(a, b) = ((a * b + 128) * 257) >> 16.
- *
- * A third way (how pix_multiply() was implemented prior to 14208344) exists
- * also that performs the multiplication by 257 with adds and shifts.
- *
- * Where temp = a * b + 128
- *
- *   prod(a, b) = (temp + (temp >> 8)) >> 8.
- */
-static force_inline __m64
-pix_multiply (__m64 a, __m64 b)
-{
-    __m64 res;
-
-    res = _mm_mullo_pi16 (a, b);
-    res = _mm_adds_pu16 (res, MC (4x0080));
-    res = _mm_mulhi_pu16 (res, MC (4x0101));
-
-    return res;
-}
-
-static force_inline __m64
-pix_add (__m64 a, __m64 b)
-{
-    return _mm_adds_pu8 (a, b);
-}
-
-static force_inline __m64
-expand_alpha (__m64 pixel)
-{
-    return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (3, 3, 3, 3));
-}
-
-static force_inline __m64
-expand_alpha_rev (__m64 pixel)
-{
-    return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (0, 0, 0, 0));
-}
-
-static force_inline __m64
-invert_colors (__m64 pixel)
-{
-    return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (3, 0, 1, 2));
-}
-
-static force_inline __m64
-over (__m64 src,
-      __m64 srca,
-      __m64 dest)
-{
-    return _mm_adds_pu8 (src, pix_multiply (dest, negate (srca)));
-}
-
-static force_inline __m64
-over_rev_non_pre (__m64 src, __m64 dest)
-{
-    __m64 srca = expand_alpha (src);
-    __m64 srcfaaa = _mm_or_si64 (srca, MC (full_alpha));
-
-    return over (pix_multiply (invert_colors (src), srcfaaa), srca, dest);
-}
-
-static force_inline __m64
-in (__m64 src, __m64 mask)
-{
-    return pix_multiply (src, mask);
-}
-
-#ifndef _MSC_VER
-static force_inline __m64
-in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest)
-{
-    return over (in (src, mask), pix_multiply (srca, mask), dest);
-}
-
-#else
-
-#define in_over(src, srca, mask, dest)					\
-    over (in (src, mask), pix_multiply (srca, mask), dest)
-
-#endif
-
-/* Elemental unaligned loads */
-
-static force_inline __m64 ldq_u(__m64 *p)
-{
-#ifdef USE_X86_MMX
-    /* x86's alignment restrictions are very relaxed, but that's no excuse */
-    __m64 r;
-    memcpy(&r, p, sizeof(__m64));
-    return r;
-#elif defined USE_ARM_IWMMXT
-    int align = (uintptr_t)p & 7;
-    __m64 *aligned_p;
-    if (align == 0)
-	return *p;
-    aligned_p = (__m64 *)((uintptr_t)p & ~7);
-    return (__m64) _mm_align_si64 (aligned_p[0], aligned_p[1], align);
-#else
-    struct __una_u64 { __m64 x __attribute__((packed)); };
-    const struct __una_u64 *ptr = (const struct __una_u64 *) p;
-    return (__m64) ptr->x;
-#endif
-}
-
-static force_inline uint32_t ldl_u(const uint32_t *p)
-{
-#ifdef USE_X86_MMX
-    /* x86's alignment restrictions are very relaxed. */
-    uint32_t r;
-    memcpy(&r, p, sizeof(uint32_t));
-    return r;
-#else
-    struct __una_u32 { uint32_t x __attribute__((packed)); };
-    const struct __una_u32 *ptr = (const struct __una_u32 *) p;
-    return ptr->x;
-#endif
-}
-
-static force_inline __m64
-load (const uint32_t *v)
-{
-#ifdef USE_LOONGSON_MMI
-    __m64 ret;
-    asm ("lwc1 %0, %1\n\t"
-	: "=f" (ret)
-	: "m" (*v)
-    );
-    return ret;
-#else
-    return _mm_cvtsi32_si64 (*v);
-#endif
-}
-
-static force_inline __m64
-load8888 (const uint32_t *v)
-{
-#ifdef USE_LOONGSON_MMI
-    return _mm_unpacklo_pi8_f (*(__m32 *)v, _mm_setzero_si64 ());
-#else
-    return _mm_unpacklo_pi8 (load (v), _mm_setzero_si64 ());
-#endif
-}
-
-static force_inline __m64
-load8888u (const uint32_t *v)
-{
-    uint32_t l = ldl_u (v);
-    return load8888 (&l);
-}
-
-static force_inline __m64
-pack8888 (__m64 lo, __m64 hi)
-{
-    return _mm_packs_pu16 (lo, hi);
-}
-
-static force_inline void
-store (uint32_t *dest, __m64 v)
-{
-#ifdef USE_LOONGSON_MMI
-    asm ("swc1 %1, %0\n\t"
-	: "=m" (*dest)
-	: "f" (v)
-	: "memory"
-    );
-#else
-    *dest = _mm_cvtsi64_si32 (v);
-#endif
-}
-
-static force_inline void
-store8888 (uint32_t *dest, __m64 v)
-{
-    v = pack8888 (v, _mm_setzero_si64 ());
-    store (dest, v);
-}
-
-static force_inline pixman_bool_t
-is_equal (__m64 a, __m64 b)
-{
-#ifdef USE_LOONGSON_MMI
-    /* __m64 is double, we can compare directly. */
-    return a == b;
-#else
-    return _mm_movemask_pi8 (_mm_cmpeq_pi8 (a, b)) == 0xff;
-#endif
-}
-
-static force_inline pixman_bool_t
-is_opaque (__m64 v)
-{
-#ifdef USE_LOONGSON_MMI
-    return is_equal (_mm_and_si64 (v, MC (full_alpha)), MC (full_alpha));
-#else
-    __m64 ffs = _mm_cmpeq_pi8 (v, v);
-    return (_mm_movemask_pi8 (_mm_cmpeq_pi8 (v, ffs)) & 0x40);
-#endif
-}
-
-static force_inline pixman_bool_t
-is_zero (__m64 v)
-{
-    return is_equal (v, _mm_setzero_si64 ());
-}
-
-/* Expand 16 bits positioned at @pos (0-3) of a mmx register into
- *
- *    00RR00GG00BB
- *
- * --- Expanding 565 in the low word ---
- *
- * m = (m << (32 - 3)) | (m << (16 - 5)) | m;
- * m = m & (01f0003f001f);
- * m = m * (008404100840);
- * m = m >> 8;
- *
- * Note the trick here - the top word is shifted by another nibble to
- * avoid it bumping into the middle word
- */
-static force_inline __m64
-expand565 (__m64 pixel, int pos)
-{
-    __m64 p = pixel;
-    __m64 t1, t2;
-
-    /* move pixel to low 16 bit and zero the rest */
-#ifdef USE_LOONGSON_MMI
-    p = loongson_extract_pi16 (p, pos);
-#else
-    p = shift (shift (p, (3 - pos) * 16), -48);
-#endif
-
-    t1 = shift (p, 36 - 11);
-    t2 = shift (p, 16 - 5);
-
-    p = _mm_or_si64 (t1, p);
-    p = _mm_or_si64 (t2, p);
-    p = _mm_and_si64 (p, MC (565_rgb));
-
-    pixel = _mm_mullo_pi16 (p, MC (565_unpack_multiplier));
-    return _mm_srli_pi16 (pixel, 8);
-}
-
-/* Expand 4 16 bit pixels in an mmx register into two mmx registers of
- *
- *    AARRGGBBRRGGBB
- */
-static force_inline void
-expand_4xpacked565 (__m64 vin, __m64 *vout0, __m64 *vout1, int full_alpha)
-{
-    __m64 t0, t1, alpha = _mm_setzero_si64 ();
-    __m64 r = _mm_and_si64 (vin, MC (expand_565_r));
-    __m64 g = _mm_and_si64 (vin, MC (expand_565_g));
-    __m64 b = _mm_and_si64 (vin, MC (expand_565_b));
-    if (full_alpha)
-	alpha = _mm_cmpeq_pi32 (alpha, alpha);
-
-    /* Replicate high bits into empty low bits. */
-    r = _mm_or_si64 (_mm_srli_pi16 (r, 8), _mm_srli_pi16 (r, 13));
-    g = _mm_or_si64 (_mm_srli_pi16 (g, 3), _mm_srli_pi16 (g, 9));
-    b = _mm_or_si64 (_mm_slli_pi16 (b, 3), _mm_srli_pi16 (b, 2));
-
-    r = _mm_packs_pu16 (r, _mm_setzero_si64 ());	/* 00 00 00 00 R3 R2 R1 R0 */
-    g = _mm_packs_pu16 (g, _mm_setzero_si64 ());	/* 00 00 00 00 G3 G2 G1 G0 */
-    b = _mm_packs_pu16 (b, _mm_setzero_si64 ());	/* 00 00 00 00 B3 B2 B1 B0 */
-
-    t1 = _mm_unpacklo_pi8 (r, alpha);			/* A3 R3 A2 R2 A1 R1 A0 R0 */
-    t0 = _mm_unpacklo_pi8 (b, g);			/* G3 B3 G2 B2 G1 B1 G0 B0 */
-
-    *vout0 = _mm_unpacklo_pi16 (t0, t1);		/* A1 R1 G1 B1 A0 R0 G0 B0 */
-    *vout1 = _mm_unpackhi_pi16 (t0, t1);		/* A3 R3 G3 B3 A2 R2 G2 B2 */
-}
-
-static force_inline __m64
-expand8888 (__m64 in, int pos)
-{
-    if (pos == 0)
-	return _mm_unpacklo_pi8 (in, _mm_setzero_si64 ());
-    else
-	return _mm_unpackhi_pi8 (in, _mm_setzero_si64 ());
-}
-
-static force_inline __m64
-expandx888 (__m64 in, int pos)
-{
-    return _mm_or_si64 (expand8888 (in, pos), MC (full_alpha));
-}
-
-static force_inline void
-expand_4x565 (__m64 vin, __m64 *vout0, __m64 *vout1, __m64 *vout2, __m64 *vout3, int full_alpha)
-{
-    __m64 v0, v1;
-    expand_4xpacked565 (vin, &v0, &v1, full_alpha);
-    *vout0 = expand8888 (v0, 0);
-    *vout1 = expand8888 (v0, 1);
-    *vout2 = expand8888 (v1, 0);
-    *vout3 = expand8888 (v1, 1);
-}
-
-static force_inline __m64
-pack_565 (__m64 pixel, __m64 target, int pos)
-{
-    __m64 p = pixel;
-    __m64 t = target;
-    __m64 r, g, b;
-
-    r = _mm_and_si64 (p, MC (565_r));
-    g = _mm_and_si64 (p, MC (565_g));
-    b = _mm_and_si64 (p, MC (565_b));
-
-#ifdef USE_LOONGSON_MMI
-    r = shift (r, -(32 - 8));
-    g = shift (g, -(16 - 3));
-    b = shift (b, -(0  + 3));
-
-    p = _mm_or_si64 (r, g);
-    p = _mm_or_si64 (p, b);
-    return loongson_insert_pi16 (t, p, pos);
-#else
-    r = shift (r, -(32 - 8) + pos * 16);
-    g = shift (g, -(16 - 3) + pos * 16);
-    b = shift (b, -(0  + 3) + pos * 16);
-
-    if (pos == 0)
-	t = _mm_and_si64 (t, MC (mask_0));
-    else if (pos == 1)
-	t = _mm_and_si64 (t, MC (mask_1));
-    else if (pos == 2)
-	t = _mm_and_si64 (t, MC (mask_2));
-    else if (pos == 3)
-	t = _mm_and_si64 (t, MC (mask_3));
-
-    p = _mm_or_si64 (r, t);
-    p = _mm_or_si64 (g, p);
-
-    return _mm_or_si64 (b, p);
-#endif
-}
-
-static force_inline __m64
-pack_4xpacked565 (__m64 a, __m64 b)
-{
-    __m64 rb0 = _mm_and_si64 (a, MC (packed_565_rb));
-    __m64 rb1 = _mm_and_si64 (b, MC (packed_565_rb));
-
-    __m64 t0 = _mm_madd_pi16 (rb0, MC (565_pack_multiplier));
-    __m64 t1 = _mm_madd_pi16 (rb1, MC (565_pack_multiplier));
-
-    __m64 g0 = _mm_and_si64 (a, MC (packed_565_g));
-    __m64 g1 = _mm_and_si64 (b, MC (packed_565_g));
-
-    t0 = _mm_or_si64 (t0, g0);
-    t1 = _mm_or_si64 (t1, g1);
-
-    t0 = shift(t0, -5);
-#ifdef USE_ARM_IWMMXT
-    t1 = shift(t1, -5);
-    return _mm_packs_pu32 (t0, t1);
-#else
-    t1 = shift(t1, -5 + 16);
-    return _mm_shuffle_pi16 (_mm_or_si64 (t0, t1), _MM_SHUFFLE (3, 1, 2, 0));
-#endif
-}
-
-#ifndef _MSC_VER
-
-static force_inline __m64
-pack_4x565 (__m64 v0, __m64 v1, __m64 v2, __m64 v3)
-{
-    return pack_4xpacked565 (pack8888 (v0, v1), pack8888 (v2, v3));
-}
-
-static force_inline __m64
-pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b)
-{
-    x = pix_multiply (x, a);
-    y = pix_multiply (y, b);
-
-    return pix_add (x, y);
-}
-
-#else
-
-/* MSVC only handles a "pass by register" of up to three SSE intrinsics */
-
-#define pack_4x565(v0, v1, v2, v3) \
-    pack_4xpacked565 (pack8888 (v0, v1), pack8888 (v2, v3))
-
-#define pix_add_mul(x, a, y, b)	 \
-    ( x = pix_multiply (x, a),	 \
-      y = pix_multiply (y, b),	 \
-      pix_add (x, y) )
-
-#endif
-
-/* --------------- MMX code patch for fbcompose.c --------------------- */
-
-static force_inline __m64
-combine (const uint32_t *src, const uint32_t *mask)
-{
-    __m64 vsrc = load8888 (src);
-
-    if (mask)
-    {
-	__m64 m = load8888 (mask);
-
-	m = expand_alpha (m);
-	vsrc = pix_multiply (vsrc, m);
-    }
-
-    return vsrc;
-}
-
-static force_inline __m64
-core_combine_over_u_pixel_mmx (__m64 vsrc, __m64 vdst)
-{
-    vsrc = _mm_unpacklo_pi8 (vsrc, _mm_setzero_si64 ());
-
-    if (is_opaque (vsrc))
-    {
-	return vsrc;
-    }
-    else if (!is_zero (vsrc))
-    {
-	return over (vsrc, expand_alpha (vsrc),
-		     _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ()));
-    }
-
-    return _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ());
-}
-
-static void
-mmx_combine_over_u (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 vsrc = combine (src, mask);
-
-	if (is_opaque (vsrc))
-	{
-	    store8888 (dest, vsrc);
-	}
-	else if (!is_zero (vsrc))
-	{
-	    __m64 sa = expand_alpha (vsrc);
-	    store8888 (dest, over (vsrc, sa, load8888 (dest)));
-	}
-
-	++dest;
-	++src;
-	if (mask)
-	    ++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_over_reverse_u (pixman_implementation_t *imp,
-                            pixman_op_t              op,
-                            uint32_t *               dest,
-                            const uint32_t *         src,
-                            const uint32_t *         mask,
-                            int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 d, da;
-	__m64 s = combine (src, mask);
-
-	d = load8888 (dest);
-	da = expand_alpha (d);
-	store8888 (dest, over (d, da, s));
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_in_u (pixman_implementation_t *imp,
-                  pixman_op_t              op,
-                  uint32_t *               dest,
-                  const uint32_t *         src,
-                  const uint32_t *         mask,
-                  int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 a;
-	__m64 x = combine (src, mask);
-
-	a = load8888 (dest);
-	a = expand_alpha (a);
-	x = pix_multiply (x, a);
-
-	store8888 (dest, x);
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_in_reverse_u (pixman_implementation_t *imp,
-                          pixman_op_t              op,
-                          uint32_t *               dest,
-                          const uint32_t *         src,
-                          const uint32_t *         mask,
-                          int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 a = combine (src, mask);
-	__m64 x;
-
-	x = load8888 (dest);
-	a = expand_alpha (a);
-	x = pix_multiply (x, a);
-	store8888 (dest, x);
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_out_u (pixman_implementation_t *imp,
-                   pixman_op_t              op,
-                   uint32_t *               dest,
-                   const uint32_t *         src,
-                   const uint32_t *         mask,
-                   int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 a;
-	__m64 x = combine (src, mask);
-
-	a = load8888 (dest);
-	a = expand_alpha (a);
-	a = negate (a);
-	x = pix_multiply (x, a);
-	store8888 (dest, x);
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_out_reverse_u (pixman_implementation_t *imp,
-                           pixman_op_t              op,
-                           uint32_t *               dest,
-                           const uint32_t *         src,
-                           const uint32_t *         mask,
-                           int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 a = combine (src, mask);
-	__m64 x;
-
-	x = load8888 (dest);
-	a = expand_alpha (a);
-	a = negate (a);
-	x = pix_multiply (x, a);
-
-	store8888 (dest, x);
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_atop_u (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 da, d, sia;
-	__m64 s = combine (src, mask);
-
-	d = load8888 (dest);
-	sia = expand_alpha (s);
-	sia = negate (sia);
-	da = expand_alpha (d);
-	s = pix_add_mul (s, da, d, sia);
-	store8888 (dest, s);
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_atop_reverse_u (pixman_implementation_t *imp,
-                            pixman_op_t              op,
-                            uint32_t *               dest,
-                            const uint32_t *         src,
-                            const uint32_t *         mask,
-                            int                      width)
-{
-    const uint32_t *end;
-
-    end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 dia, d, sa;
-	__m64 s = combine (src, mask);
-
-	d = load8888 (dest);
-	sa = expand_alpha (s);
-	dia = expand_alpha (d);
-	dia = negate (dia);
-	s = pix_add_mul (s, dia, d, sa);
-	store8888 (dest, s);
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_xor_u (pixman_implementation_t *imp,
-                   pixman_op_t              op,
-                   uint32_t *               dest,
-                   const uint32_t *         src,
-                   const uint32_t *         mask,
-                   int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 dia, d, sia;
-	__m64 s = combine (src, mask);
-
-	d = load8888 (dest);
-	sia = expand_alpha (s);
-	dia = expand_alpha (d);
-	sia = negate (sia);
-	dia = negate (dia);
-	s = pix_add_mul (s, dia, d, sia);
-	store8888 (dest, s);
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_add_u (pixman_implementation_t *imp,
-                   pixman_op_t              op,
-                   uint32_t *               dest,
-                   const uint32_t *         src,
-                   const uint32_t *         mask,
-                   int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 d;
-	__m64 s = combine (src, mask);
-
-	d = load8888 (dest);
-	s = pix_add (s, d);
-	store8888 (dest, s);
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_saturate_u (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        uint32_t *               dest,
-                        const uint32_t *         src,
-                        const uint32_t *         mask,
-                        int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	uint32_t s, sa, da;
-	uint32_t d = *dest;
-	__m64 ms = combine (src, mask);
-	__m64 md = load8888 (dest);
-
-	store8888(&s, ms);
-	da = ~d >> 24;
-	sa = s >> 24;
-
-	if (sa > da)
-	{
-	    uint32_t quot = DIV_UN8 (da, sa) << 24;
-	    __m64 msa = load8888 (&quot);
-	    msa = expand_alpha (msa);
-	    ms = pix_multiply (ms, msa);
-	}
-
-	md = pix_add (md, ms);
-	store8888 (dest, md);
-
-	++src;
-	++dest;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_src_ca (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (mask);
-	__m64 s = load8888 (src);
-
-	s = pix_multiply (s, a);
-	store8888 (dest, s);
-
-	++src;
-	++mask;
-	++dest;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_over_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               dest,
-                     const uint32_t *         src,
-                     const uint32_t *         mask,
-                     int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (mask);
-	__m64 s = load8888 (src);
-	__m64 d = load8888 (dest);
-	__m64 sa = expand_alpha (s);
-
-	store8888 (dest, in_over (s, sa, a, d));
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_over_reverse_ca (pixman_implementation_t *imp,
-                             pixman_op_t              op,
-                             uint32_t *               dest,
-                             const uint32_t *         src,
-                             const uint32_t *         mask,
-                             int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (mask);
-	__m64 s = load8888 (src);
-	__m64 d = load8888 (dest);
-	__m64 da = expand_alpha (d);
-
-	store8888 (dest, over (d, da, in (s, a)));
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_in_ca (pixman_implementation_t *imp,
-                   pixman_op_t              op,
-                   uint32_t *               dest,
-                   const uint32_t *         src,
-                   const uint32_t *         mask,
-                   int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (mask);
-	__m64 s = load8888 (src);
-	__m64 d = load8888 (dest);
-	__m64 da = expand_alpha (d);
-
-	s = pix_multiply (s, a);
-	s = pix_multiply (s, da);
-	store8888 (dest, s);
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_in_reverse_ca (pixman_implementation_t *imp,
-                           pixman_op_t              op,
-                           uint32_t *               dest,
-                           const uint32_t *         src,
-                           const uint32_t *         mask,
-                           int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (mask);
-	__m64 s = load8888 (src);
-	__m64 d = load8888 (dest);
-	__m64 sa = expand_alpha (s);
-
-	a = pix_multiply (a, sa);
-	d = pix_multiply (d, a);
-	store8888 (dest, d);
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_out_ca (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (mask);
-	__m64 s = load8888 (src);
-	__m64 d = load8888 (dest);
-	__m64 da = expand_alpha (d);
-
-	da = negate (da);
-	s = pix_multiply (s, a);
-	s = pix_multiply (s, da);
-	store8888 (dest, s);
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_out_reverse_ca (pixman_implementation_t *imp,
-                            pixman_op_t              op,
-                            uint32_t *               dest,
-                            const uint32_t *         src,
-                            const uint32_t *         mask,
-                            int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (mask);
-	__m64 s = load8888 (src);
-	__m64 d = load8888 (dest);
-	__m64 sa = expand_alpha (s);
-
-	a = pix_multiply (a, sa);
-	a = negate (a);
-	d = pix_multiply (d, a);
-	store8888 (dest, d);
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_atop_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               dest,
-                     const uint32_t *         src,
-                     const uint32_t *         mask,
-                     int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (mask);
-	__m64 s = load8888 (src);
-	__m64 d = load8888 (dest);
-	__m64 da = expand_alpha (d);
-	__m64 sa = expand_alpha (s);
-
-	s = pix_multiply (s, a);
-	a = pix_multiply (a, sa);
-	a = negate (a);
-	d = pix_add_mul (d, a, s, da);
-	store8888 (dest, d);
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
-                             pixman_op_t              op,
-                             uint32_t *               dest,
-                             const uint32_t *         src,
-                             const uint32_t *         mask,
-                             int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (mask);
-	__m64 s = load8888 (src);
-	__m64 d = load8888 (dest);
-	__m64 da = expand_alpha (d);
-	__m64 sa = expand_alpha (s);
-
-	s = pix_multiply (s, a);
-	a = pix_multiply (a, sa);
-	da = negate (da);
-	d = pix_add_mul (d, a, s, da);
-	store8888 (dest, d);
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_xor_ca (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (mask);
-	__m64 s = load8888 (src);
-	__m64 d = load8888 (dest);
-	__m64 da = expand_alpha (d);
-	__m64 sa = expand_alpha (s);
-
-	s = pix_multiply (s, a);
-	a = pix_multiply (a, sa);
-	da = negate (da);
-	a = negate (a);
-	d = pix_add_mul (d, a, s, da);
-	store8888 (dest, d);
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_add_ca (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (mask);
-	__m64 s = load8888 (src);
-	__m64 d = load8888 (dest);
-
-	s = pix_multiply (s, a);
-	d = pix_add (s, d);
-	store8888 (dest, d);
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-/* ------------- MMX code paths called from fbpict.c -------------------- */
-
-static void
-mmx_composite_over_n_8888 (pixman_implementation_t *imp,
-                           pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src;
-    uint32_t    *dst_line, *dst;
-    int32_t w;
-    int dst_stride;
-    __m64 vsrc, vsrca;
-
-    CHECKPOINT ();
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
-    vsrc = load8888 (&src);
-    vsrca = expand_alpha (vsrc);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	w = width;
-
-	CHECKPOINT ();
-
-	while (w && (uintptr_t)dst & 7)
-	{
-	    store8888 (dst, over (vsrc, vsrca, load8888 (dst)));
-
-	    w--;
-	    dst++;
-	}
-
-	while (w >= 2)
-	{
-	    __m64 vdest;
-	    __m64 dest0, dest1;
-
-	    vdest = *(__m64 *)dst;
-
-	    dest0 = over (vsrc, vsrca, expand8888 (vdest, 0));
-	    dest1 = over (vsrc, vsrca, expand8888 (vdest, 1));
-
-	    *(__m64 *)dst = pack8888 (dest0, dest1);
-
-	    dst += 2;
-	    w -= 2;
-	}
-
-	CHECKPOINT ();
-
-	if (w)
-	{
-	    store8888 (dst, over (vsrc, vsrca, load8888 (dst)));
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_n_0565 (pixman_implementation_t *imp,
-                           pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src;
-    uint16_t    *dst_line, *dst;
-    int32_t w;
-    int dst_stride;
-    __m64 vsrc, vsrca;
-
-    CHECKPOINT ();
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
-    vsrc = load8888 (&src);
-    vsrca = expand_alpha (vsrc);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	w = width;
-
-	CHECKPOINT ();
-
-	while (w && (uintptr_t)dst & 7)
-	{
-	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (to_m64 (d), 0);
-
-	    vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0);
-	    *dst = to_uint64 (vdest);
-
-	    w--;
-	    dst++;
-	}
-
-	while (w >= 4)
-	{
-	    __m64 vdest = *(__m64 *)dst;
-	    __m64 v0, v1, v2, v3;
-
-	    expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
-
-	    v0 = over (vsrc, vsrca, v0);
-	    v1 = over (vsrc, vsrca, v1);
-	    v2 = over (vsrc, vsrca, v2);
-	    v3 = over (vsrc, vsrca, v3);
-
-	    *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
-
-	    dst += 4;
-	    w -= 4;
-	}
-
-	CHECKPOINT ();
-
-	while (w)
-	{
-	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (to_m64 (d), 0);
-
-	    vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0);
-	    *dst = to_uint64 (vdest);
-
-	    w--;
-	    dst++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
-                                   pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src;
-    uint32_t    *dst_line;
-    uint32_t    *mask_line;
-    int dst_stride, mask_stride;
-    __m64 vsrc, vsrca;
-
-    CHECKPOINT ();
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
-    vsrc = load8888 (&src);
-    vsrca = expand_alpha (vsrc);
-
-    while (height--)
-    {
-	int twidth = width;
-	uint32_t *p = (uint32_t *)mask_line;
-	uint32_t *q = (uint32_t *)dst_line;
-
-	while (twidth && (uintptr_t)q & 7)
-	{
-	    uint32_t m = *(uint32_t *)p;
-
-	    if (m)
-	    {
-		__m64 vdest = load8888 (q);
-		vdest = in_over (vsrc, vsrca, load8888 (&m), vdest);
-		store8888 (q, vdest);
-	    }
-
-	    twidth--;
-	    p++;
-	    q++;
-	}
-
-	while (twidth >= 2)
-	{
-	    uint32_t m0, m1;
-	    m0 = *p;
-	    m1 = *(p + 1);
-
-	    if (m0 | m1)
-	    {
-		__m64 dest0, dest1;
-		__m64 vdest = *(__m64 *)q;
-
-		dest0 = in_over (vsrc, vsrca, load8888 (&m0),
-		                 expand8888 (vdest, 0));
-		dest1 = in_over (vsrc, vsrca, load8888 (&m1),
-		                 expand8888 (vdest, 1));
-
-		*(__m64 *)q = pack8888 (dest0, dest1);
-	    }
-
-	    p += 2;
-	    q += 2;
-	    twidth -= 2;
-	}
-
-	if (twidth)
-	{
-	    uint32_t m = *(uint32_t *)p;
-
-	    if (m)
-	    {
-		__m64 vdest = load8888 (q);
-		vdest = in_over (vsrc, vsrca, load8888 (&m), vdest);
-		store8888 (q, vdest);
-	    }
-
-	    twidth--;
-	    p++;
-	    q++;
-	}
-
-	dst_line += dst_stride;
-	mask_line += mask_stride;
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
-                                pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    uint32_t mask;
-    __m64 vmask;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    CHECKPOINT ();
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);
-    vmask = expand_alpha (load8888 (&mask));
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 7)
-	{
-	    __m64 s = load8888 (src);
-	    __m64 d = load8888 (dst);
-
-	    store8888 (dst, in_over (s, expand_alpha (s), vmask, d));
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-
-	while (w >= 2)
-	{
-	    __m64 vs = ldq_u ((__m64 *)src);
-	    __m64 vd = *(__m64 *)dst;
-	    __m64 vsrc0 = expand8888 (vs, 0);
-	    __m64 vsrc1 = expand8888 (vs, 1);
-
-	    *(__m64 *)dst = pack8888 (
-	        in_over (vsrc0, expand_alpha (vsrc0), vmask, expand8888 (vd, 0)),
-	        in_over (vsrc1, expand_alpha (vsrc1), vmask, expand8888 (vd, 1)));
-
-	    w -= 2;
-	    dst += 2;
-	    src += 2;
-	}
-
-	if (w)
-	{
-	    __m64 s = load8888 (src);
-	    __m64 d = load8888 (dst);
-
-	    store8888 (dst, in_over (s, expand_alpha (s), vmask, d));
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
-                                pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t *dst_line, *dst;
-    uint32_t *src_line, *src;
-    uint32_t mask;
-    __m64 vmask;
-    int dst_stride, src_stride;
-    int32_t w;
-    __m64 srca;
-
-    CHECKPOINT ();
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-    mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);
-
-    vmask = expand_alpha (load8888 (&mask));
-    srca = MC (4x00ff);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 7)
-	{
-	    uint32_t ssrc = *src | 0xff000000;
-	    __m64 s = load8888 (&ssrc);
-	    __m64 d = load8888 (dst);
-
-	    store8888 (dst, in_over (s, srca, vmask, d));
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-
-	while (w >= 16)
-	{
-	    __m64 vd0 = *(__m64 *)(dst + 0);
-	    __m64 vd1 = *(__m64 *)(dst + 2);
-	    __m64 vd2 = *(__m64 *)(dst + 4);
-	    __m64 vd3 = *(__m64 *)(dst + 6);
-	    __m64 vd4 = *(__m64 *)(dst + 8);
-	    __m64 vd5 = *(__m64 *)(dst + 10);
-	    __m64 vd6 = *(__m64 *)(dst + 12);
-	    __m64 vd7 = *(__m64 *)(dst + 14);
-
-	    __m64 vs0 = ldq_u ((__m64 *)(src + 0));
-	    __m64 vs1 = ldq_u ((__m64 *)(src + 2));
-	    __m64 vs2 = ldq_u ((__m64 *)(src + 4));
-	    __m64 vs3 = ldq_u ((__m64 *)(src + 6));
-	    __m64 vs4 = ldq_u ((__m64 *)(src + 8));
-	    __m64 vs5 = ldq_u ((__m64 *)(src + 10));
-	    __m64 vs6 = ldq_u ((__m64 *)(src + 12));
-	    __m64 vs7 = ldq_u ((__m64 *)(src + 14));
-
-	    vd0 = pack8888 (
-	        in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)),
-	        in_over (expandx888 (vs0, 1), srca, vmask, expand8888 (vd0, 1)));
-
-	    vd1 = pack8888 (
-	        in_over (expandx888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)),
-	        in_over (expandx888 (vs1, 1), srca, vmask, expand8888 (vd1, 1)));
-
-	    vd2 = pack8888 (
-	        in_over (expandx888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)),
-	        in_over (expandx888 (vs2, 1), srca, vmask, expand8888 (vd2, 1)));
-
-	    vd3 = pack8888 (
-	        in_over (expandx888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)),
-	        in_over (expandx888 (vs3, 1), srca, vmask, expand8888 (vd3, 1)));
-
-	    vd4 = pack8888 (
-	        in_over (expandx888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)),
-	        in_over (expandx888 (vs4, 1), srca, vmask, expand8888 (vd4, 1)));
-
-	    vd5 = pack8888 (
-	        in_over (expandx888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)),
-	        in_over (expandx888 (vs5, 1), srca, vmask, expand8888 (vd5, 1)));
-
-	    vd6 = pack8888 (
-	        in_over (expandx888 (vs6, 0), srca, vmask, expand8888 (vd6, 0)),
-	        in_over (expandx888 (vs6, 1), srca, vmask, expand8888 (vd6, 1)));
-
-	    vd7 = pack8888 (
-	        in_over (expandx888 (vs7, 0), srca, vmask, expand8888 (vd7, 0)),
-	        in_over (expandx888 (vs7, 1), srca, vmask, expand8888 (vd7, 1)));
-
-	    *(__m64 *)(dst + 0) = vd0;
-	    *(__m64 *)(dst + 2) = vd1;
-	    *(__m64 *)(dst + 4) = vd2;
-	    *(__m64 *)(dst + 6) = vd3;
-	    *(__m64 *)(dst + 8) = vd4;
-	    *(__m64 *)(dst + 10) = vd5;
-	    *(__m64 *)(dst + 12) = vd6;
-	    *(__m64 *)(dst + 14) = vd7;
-
-	    w -= 16;
-	    dst += 16;
-	    src += 16;
-	}
-
-	while (w)
-	{
-	    uint32_t ssrc = *src | 0xff000000;
-	    __m64 s = load8888 (&ssrc);
-	    __m64 d = load8888 (dst);
-
-	    store8888 (dst, in_over (s, srca, vmask, d));
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_8888_8888 (pixman_implementation_t *imp,
-                              pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t *dst_line, *dst;
-    uint32_t *src_line, *src;
-    uint32_t s;
-    int dst_stride, src_stride;
-    uint8_t a;
-    int32_t w;
-
-    CHECKPOINT ();
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w--)
-	{
-	    s = *src++;
-	    a = s >> 24;
-
-	    if (a == 0xff)
-	    {
-		*dst = s;
-	    }
-	    else if (s)
-	    {
-		__m64 ms, sa;
-		ms = load8888 (&s);
-		sa = expand_alpha (ms);
-		store8888 (dst, over (ms, sa, load8888 (dst)));
-	    }
-
-	    dst++;
-	}
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
-                              pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint16_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    CHECKPOINT ();
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-#if 0
-    /* FIXME */
-    assert (src_image->drawable == mask_image->drawable);
-#endif
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	CHECKPOINT ();
-
-	while (w && (uintptr_t)dst & 7)
-	{
-	    __m64 vsrc = load8888 (src);
-	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (to_m64 (d), 0);
-
-	    vdest = pack_565 (
-		over (vsrc, expand_alpha (vsrc), vdest), vdest, 0);
-
-	    *dst = to_uint64 (vdest);
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-
-	CHECKPOINT ();
-
-	while (w >= 4)
-	{
-	    __m64 vdest = *(__m64 *)dst;
-	    __m64 v0, v1, v2, v3;
-	    __m64 vsrc0, vsrc1, vsrc2, vsrc3;
-
-	    expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
-
-	    vsrc0 = load8888 ((src + 0));
-	    vsrc1 = load8888 ((src + 1));
-	    vsrc2 = load8888 ((src + 2));
-	    vsrc3 = load8888 ((src + 3));
-
-	    v0 = over (vsrc0, expand_alpha (vsrc0), v0);
-	    v1 = over (vsrc1, expand_alpha (vsrc1), v1);
-	    v2 = over (vsrc2, expand_alpha (vsrc2), v2);
-	    v3 = over (vsrc3, expand_alpha (vsrc3), v3);
-
-	    *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
-
-	    w -= 4;
-	    dst += 4;
-	    src += 4;
-	}
-
-	CHECKPOINT ();
-
-	while (w)
-	{
-	    __m64 vsrc = load8888 (src);
-	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (to_m64 (d), 0);
-
-	    vdest = pack_565 (over (vsrc, expand_alpha (vsrc), vdest), vdest, 0);
-
-	    *dst = to_uint64 (vdest);
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
-                             pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src, srca;
-    uint32_t *dst_line, *dst;
-    uint8_t *mask_line, *mask;
-    int dst_stride, mask_stride;
-    int32_t w;
-    __m64 vsrc, vsrca;
-    uint64_t srcsrc;
-
-    CHECKPOINT ();
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    srca = src >> 24;
-    if (src == 0)
-	return;
-
-    srcsrc = (uint64_t)src << 32 | src;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    vsrc = load8888 (&src);
-    vsrca = expand_alpha (vsrc);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	CHECKPOINT ();
-
-	while (w && (uintptr_t)dst & 7)
-	{
-	    uint64_t m = *mask;
-
-	    if (m)
-	    {
-		__m64 vdest = in_over (vsrc, vsrca,
-				       expand_alpha_rev (to_m64 (m)),
-				       load8888 (dst));
-
-		store8888 (dst, vdest);
-	    }
-
-	    w--;
-	    mask++;
-	    dst++;
-	}
-
-	CHECKPOINT ();
-
-	while (w >= 2)
-	{
-	    uint64_t m0, m1;
-
-	    m0 = *mask;
-	    m1 = *(mask + 1);
-
-	    if (srca == 0xff && (m0 & m1) == 0xff)
-	    {
-		*(uint64_t *)dst = srcsrc;
-	    }
-	    else if (m0 | m1)
-	    {
-		__m64 vdest;
-		__m64 dest0, dest1;
-
-		vdest = *(__m64 *)dst;
-
-		dest0 = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m0)),
-				 expand8888 (vdest, 0));
-		dest1 = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m1)),
-				 expand8888 (vdest, 1));
-
-		*(__m64 *)dst = pack8888 (dest0, dest1);
-	    }
-
-	    mask += 2;
-	    dst += 2;
-	    w -= 2;
-	}
-
-	CHECKPOINT ();
-
-	if (w)
-	{
-	    uint64_t m = *mask;
-
-	    if (m)
-	    {
-		__m64 vdest = load8888 (dst);
-
-		vdest = in_over (
-		    vsrc, vsrca, expand_alpha_rev (to_m64 (m)), vdest);
-		store8888 (dst, vdest);
-	    }
-	}
-    }
-
-    _mm_empty ();
-}
-
-static pixman_bool_t
-mmx_fill (pixman_implementation_t *imp,
-          uint32_t *               bits,
-          int                      stride,
-          int                      bpp,
-          int                      x,
-          int                      y,
-          int                      width,
-          int                      height,
-          uint32_t		   filler)
-{
-    uint64_t fill;
-    __m64 vfill;
-    uint32_t byte_width;
-    uint8_t     *byte_line;
-
-#if defined __GNUC__ && defined USE_X86_MMX
-    __m64 v1, v2, v3, v4, v5, v6, v7;
-#endif
-
-    if (bpp != 16 && bpp != 32 && bpp != 8)
-	return FALSE;
-
-    if (bpp == 8)
-    {
-	stride = stride * (int) sizeof (uint32_t) / 1;
-	byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
-	byte_width = width;
-	stride *= 1;
-        filler = (filler & 0xff) * 0x01010101;
-    }
-    else if (bpp == 16)
-    {
-	stride = stride * (int) sizeof (uint32_t) / 2;
-	byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
-	byte_width = 2 * width;
-	stride *= 2;
-        filler = (filler & 0xffff) * 0x00010001;
-    }
-    else
-    {
-	stride = stride * (int) sizeof (uint32_t) / 4;
-	byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
-	byte_width = 4 * width;
-	stride *= 4;
-    }
-
-    fill = ((uint64_t)filler << 32) | filler;
-    vfill = to_m64 (fill);
-
-#if defined __GNUC__ && defined USE_X86_MMX
-    __asm__ (
-        "movq		%7,	%0\n"
-        "movq		%7,	%1\n"
-        "movq		%7,	%2\n"
-        "movq		%7,	%3\n"
-        "movq		%7,	%4\n"
-        "movq		%7,	%5\n"
-        "movq		%7,	%6\n"
-	: "=&y" (v1), "=&y" (v2), "=&y" (v3),
-	  "=&y" (v4), "=&y" (v5), "=&y" (v6), "=y" (v7)
-	: "y" (vfill));
-#endif
-
-    while (height--)
-    {
-	int w;
-	uint8_t *d = byte_line;
-
-	byte_line += stride;
-	w = byte_width;
-
-	if (w >= 1 && ((uintptr_t)d & 1))
-	{
-	    *(uint8_t *)d = (filler & 0xff);
-	    w--;
-	    d++;
-	}
-
-	if (w >= 2 && ((uintptr_t)d & 3))
-	{
-	    *(uint16_t *)d = filler;
-	    w -= 2;
-	    d += 2;
-	}
-
-	while (w >= 4 && ((uintptr_t)d & 7))
-	{
-	    *(uint32_t *)d = filler;
-
-	    w -= 4;
-	    d += 4;
-	}
-
-	while (w >= 64)
-	{
-#if defined __GNUC__ && defined USE_X86_MMX
-	    __asm__ (
-	        "movq	%1,	  (%0)\n"
-	        "movq	%2,	 8(%0)\n"
-	        "movq	%3,	16(%0)\n"
-	        "movq	%4,	24(%0)\n"
-	        "movq	%5,	32(%0)\n"
-	        "movq	%6,	40(%0)\n"
-	        "movq	%7,	48(%0)\n"
-	        "movq	%8,	56(%0)\n"
-		:
-		: "r" (d),
-		  "y" (vfill), "y" (v1), "y" (v2), "y" (v3),
-		  "y" (v4), "y" (v5), "y" (v6), "y" (v7)
-		: "memory");
-#else
-	    *(__m64*) (d +  0) = vfill;
-	    *(__m64*) (d +  8) = vfill;
-	    *(__m64*) (d + 16) = vfill;
-	    *(__m64*) (d + 24) = vfill;
-	    *(__m64*) (d + 32) = vfill;
-	    *(__m64*) (d + 40) = vfill;
-	    *(__m64*) (d + 48) = vfill;
-	    *(__m64*) (d + 56) = vfill;
-#endif
-	    w -= 64;
-	    d += 64;
-	}
-
-	while (w >= 4)
-	{
-	    *(uint32_t *)d = filler;
-
-	    w -= 4;
-	    d += 4;
-	}
-	if (w >= 2)
-	{
-	    *(uint16_t *)d = filler;
-	    w -= 2;
-	    d += 2;
-	}
-	if (w >= 1)
-	{
-	    *(uint8_t *)d = (filler & 0xff);
-	    w--;
-	    d++;
-	}
-
-    }
-
-    _mm_empty ();
-    return TRUE;
-}
-
-static void
-mmx_composite_src_x888_0565 (pixman_implementation_t *imp,
-                             pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint16_t    *dst_line, *dst;
-    uint32_t    *src_line, *src, s;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 7)
-	{
-	    s = *src++;
-	    *dst = convert_8888_to_0565 (s);
-	    dst++;
-	    w--;
-	}
-
-	while (w >= 4)
-	{
-	    __m64 vdest;
-	    __m64 vsrc0 = ldq_u ((__m64 *)(src + 0));
-	    __m64 vsrc1 = ldq_u ((__m64 *)(src + 2));
-
-	    vdest = pack_4xpacked565 (vsrc0, vsrc1);
-
-	    *(__m64 *)dst = vdest;
-
-	    w -= 4;
-	    src += 4;
-	    dst += 4;
-	}
-
-	while (w)
-	{
-	    s = *src++;
-	    *dst = convert_8888_to_0565 (s);
-	    dst++;
-	    w--;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
-                            pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src, srca;
-    uint32_t    *dst_line, *dst;
-    uint8_t     *mask_line, *mask;
-    int dst_stride, mask_stride;
-    int32_t w;
-    __m64 vsrc;
-    uint64_t srcsrc;
-
-    CHECKPOINT ();
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    srca = src >> 24;
-    if (src == 0)
-    {
-	mmx_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride,
-		  PIXMAN_FORMAT_BPP (dest_image->bits.format),
-		  dest_x, dest_y, width, height, 0);
-	return;
-    }
-
-    srcsrc = (uint64_t)src << 32 | src;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    vsrc = load8888 (&src);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	CHECKPOINT ();
-
-	while (w && (uintptr_t)dst & 7)
-	{
-	    uint64_t m = *mask;
-
-	    if (m)
-	    {
-		__m64 vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
-
-		store8888 (dst, vdest);
-	    }
-	    else
-	    {
-		*dst = 0;
-	    }
-
-	    w--;
-	    mask++;
-	    dst++;
-	}
-
-	CHECKPOINT ();
-
-	while (w >= 2)
-	{
-	    uint64_t m0, m1;
-	    m0 = *mask;
-	    m1 = *(mask + 1);
-
-	    if (srca == 0xff && (m0 & m1) == 0xff)
-	    {
-		*(uint64_t *)dst = srcsrc;
-	    }
-	    else if (m0 | m1)
-	    {
-		__m64 dest0, dest1;
-
-		dest0 = in (vsrc, expand_alpha_rev (to_m64 (m0)));
-		dest1 = in (vsrc, expand_alpha_rev (to_m64 (m1)));
-
-		*(__m64 *)dst = pack8888 (dest0, dest1);
-	    }
-	    else
-	    {
-		*(uint64_t *)dst = 0;
-	    }
-
-	    mask += 2;
-	    dst += 2;
-	    w -= 2;
-	}
-
-	CHECKPOINT ();
-
-	if (w)
-	{
-	    uint64_t m = *mask;
-
-	    if (m)
-	    {
-		__m64 vdest = load8888 (dst);
-
-		vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
-		store8888 (dst, vdest);
-	    }
-	    else
-	    {
-		*dst = 0;
-	    }
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
-                             pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src, srca;
-    uint16_t *dst_line, *dst;
-    uint8_t *mask_line, *mask;
-    int dst_stride, mask_stride;
-    int32_t w;
-    __m64 vsrc, vsrca, tmp;
-    __m64 srcsrcsrcsrc;
-
-    CHECKPOINT ();
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    srca = src >> 24;
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    vsrc = load8888 (&src);
-    vsrca = expand_alpha (vsrc);
-
-    tmp = pack_565 (vsrc, _mm_setzero_si64 (), 0);
-    srcsrcsrcsrc = expand_alpha_rev (tmp);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	CHECKPOINT ();
-
-	while (w && (uintptr_t)dst & 7)
-	{
-	    uint64_t m = *mask;
-
-	    if (m)
-	    {
-		uint64_t d = *dst;
-		__m64 vd = to_m64 (d);
-		__m64 vdest = in_over (
-		    vsrc, vsrca, expand_alpha_rev (to_m64 (m)), expand565 (vd, 0));
-
-		vd = pack_565 (vdest, _mm_setzero_si64 (), 0);
-		*dst = to_uint64 (vd);
-	    }
-
-	    w--;
-	    mask++;
-	    dst++;
-	}
-
-	CHECKPOINT ();
-
-	while (w >= 4)
-	{
-	    uint64_t m0, m1, m2, m3;
-	    m0 = *mask;
-	    m1 = *(mask + 1);
-	    m2 = *(mask + 2);
-	    m3 = *(mask + 3);
-
-	    if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff)
-	    {
-		*(__m64 *)dst = srcsrcsrcsrc;
-	    }
-	    else if (m0 | m1 | m2 | m3)
-	    {
-		__m64 vdest = *(__m64 *)dst;
-		__m64 v0, v1, v2, v3;
-		__m64 vm0, vm1, vm2, vm3;
-
-		expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
-
-		vm0 = to_m64 (m0);
-		v0 = in_over (vsrc, vsrca, expand_alpha_rev (vm0), v0);
-
-		vm1 = to_m64 (m1);
-		v1 = in_over (vsrc, vsrca, expand_alpha_rev (vm1), v1);
-
-		vm2 = to_m64 (m2);
-		v2 = in_over (vsrc, vsrca, expand_alpha_rev (vm2), v2);
-
-		vm3 = to_m64 (m3);
-		v3 = in_over (vsrc, vsrca, expand_alpha_rev (vm3), v3);
-
-		*(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);;
-	    }
-
-	    w -= 4;
-	    mask += 4;
-	    dst += 4;
-	}
-
-	CHECKPOINT ();
-
-	while (w)
-	{
-	    uint64_t m = *mask;
-
-	    if (m)
-	    {
-		uint64_t d = *dst;
-		__m64 vd = to_m64 (d);
-		__m64 vdest = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m)),
-				       expand565 (vd, 0));
-		vd = pack_565 (vdest, _mm_setzero_si64 (), 0);
-		*dst = to_uint64 (vd);
-	    }
-
-	    w--;
-	    mask++;
-	    dst++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
-                                pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint16_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    CHECKPOINT ();
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-#if 0
-    /* FIXME */
-    assert (src_image->drawable == mask_image->drawable);
-#endif
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	CHECKPOINT ();
-
-	while (w && (uintptr_t)dst & 7)
-	{
-	    __m64 vsrc = load8888 (src);
-	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (to_m64 (d), 0);
-
-	    vdest = pack_565 (over_rev_non_pre (vsrc, vdest), vdest, 0);
-
-	    *dst = to_uint64 (vdest);
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-
-	CHECKPOINT ();
-
-	while (w >= 4)
-	{
-	    uint32_t s0, s1, s2, s3;
-	    unsigned char a0, a1, a2, a3;
-
-	    s0 = *src;
-	    s1 = *(src + 1);
-	    s2 = *(src + 2);
-	    s3 = *(src + 3);
-
-	    a0 = (s0 >> 24);
-	    a1 = (s1 >> 24);
-	    a2 = (s2 >> 24);
-	    a3 = (s3 >> 24);
-
-	    if ((a0 & a1 & a2 & a3) == 0xFF)
-	    {
-		__m64 v0 = invert_colors (load8888 (&s0));
-		__m64 v1 = invert_colors (load8888 (&s1));
-		__m64 v2 = invert_colors (load8888 (&s2));
-		__m64 v3 = invert_colors (load8888 (&s3));
-
-		*(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
-	    }
-	    else if (s0 | s1 | s2 | s3)
-	    {
-		__m64 vdest = *(__m64 *)dst;
-		__m64 v0, v1, v2, v3;
-
-		__m64 vsrc0 = load8888 (&s0);
-		__m64 vsrc1 = load8888 (&s1);
-		__m64 vsrc2 = load8888 (&s2);
-		__m64 vsrc3 = load8888 (&s3);
-
-		expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
-
-		v0 = over_rev_non_pre (vsrc0, v0);
-		v1 = over_rev_non_pre (vsrc1, v1);
-		v2 = over_rev_non_pre (vsrc2, v2);
-		v3 = over_rev_non_pre (vsrc3, v3);
-
-		*(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
-	    }
-
-	    w -= 4;
-	    dst += 4;
-	    src += 4;
-	}
-
-	CHECKPOINT ();
-
-	while (w)
-	{
-	    __m64 vsrc = load8888 (src);
-	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (to_m64 (d), 0);
-
-	    vdest = pack_565 (over_rev_non_pre (vsrc, vdest), vdest, 0);
-
-	    *dst = to_uint64 (vdest);
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
-                                pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    CHECKPOINT ();
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-#if 0
-    /* FIXME */
-    assert (src_image->drawable == mask_image->drawable);
-#endif
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 7)
-	{
-	    __m64 s = load8888 (src);
-	    __m64 d = load8888 (dst);
-
-	    store8888 (dst, over_rev_non_pre (s, d));
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-
-	while (w >= 2)
-	{
-	    uint32_t s0, s1;
-	    unsigned char a0, a1;
-	    __m64 d0, d1;
-
-	    s0 = *src;
-	    s1 = *(src + 1);
-
-	    a0 = (s0 >> 24);
-	    a1 = (s1 >> 24);
-
-	    if ((a0 & a1) == 0xFF)
-	    {
-		d0 = invert_colors (load8888 (&s0));
-		d1 = invert_colors (load8888 (&s1));
-
-		*(__m64 *)dst = pack8888 (d0, d1);
-	    }
-	    else if (s0 | s1)
-	    {
-		__m64 vdest = *(__m64 *)dst;
-
-		d0 = over_rev_non_pre (load8888 (&s0), expand8888 (vdest, 0));
-		d1 = over_rev_non_pre (load8888 (&s1), expand8888 (vdest, 1));
-
-		*(__m64 *)dst = pack8888 (d0, d1);
-	    }
-
-	    w -= 2;
-	    dst += 2;
-	    src += 2;
-	}
-
-	if (w)
-	{
-	    __m64 s = load8888 (src);
-	    __m64 d = load8888 (dst);
-
-	    store8888 (dst, over_rev_non_pre (s, d));
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
-                                   pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src;
-    uint16_t    *dst_line;
-    uint32_t    *mask_line;
-    int dst_stride, mask_stride;
-    __m64 vsrc, vsrca;
-
-    CHECKPOINT ();
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
-    vsrc = load8888 (&src);
-    vsrca = expand_alpha (vsrc);
-
-    while (height--)
-    {
-	int twidth = width;
-	uint32_t *p = (uint32_t *)mask_line;
-	uint16_t *q = (uint16_t *)dst_line;
-
-	while (twidth && ((uintptr_t)q & 7))
-	{
-	    uint32_t m = *(uint32_t *)p;
-
-	    if (m)
-	    {
-		uint64_t d = *q;
-		__m64 vdest = expand565 (to_m64 (d), 0);
-		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0);
-		*q = to_uint64 (vdest);
-	    }
-
-	    twidth--;
-	    p++;
-	    q++;
-	}
-
-	while (twidth >= 4)
-	{
-	    uint32_t m0, m1, m2, m3;
-
-	    m0 = *p;
-	    m1 = *(p + 1);
-	    m2 = *(p + 2);
-	    m3 = *(p + 3);
-
-	    if ((m0 | m1 | m2 | m3))
-	    {
-		__m64 vdest = *(__m64 *)q;
-		__m64 v0, v1, v2, v3;
-
-		expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
-
-		v0 = in_over (vsrc, vsrca, load8888 (&m0), v0);
-		v1 = in_over (vsrc, vsrca, load8888 (&m1), v1);
-		v2 = in_over (vsrc, vsrca, load8888 (&m2), v2);
-		v3 = in_over (vsrc, vsrca, load8888 (&m3), v3);
-
-		*(__m64 *)q = pack_4x565 (v0, v1, v2, v3);
-	    }
-	    twidth -= 4;
-	    p += 4;
-	    q += 4;
-	}
-
-	while (twidth)
-	{
-	    uint32_t m;
-
-	    m = *(uint32_t *)p;
-	    if (m)
-	    {
-		uint64_t d = *q;
-		__m64 vdest = expand565 (to_m64 (d), 0);
-		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0);
-		*q = to_uint64 (vdest);
-	    }
-
-	    twidth--;
-	    p++;
-	    q++;
-	}
-
-	mask_line += mask_stride;
-	dst_line += dst_stride;
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_in_n_8_8 (pixman_implementation_t *imp,
-                        pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint8_t *dst_line, *dst;
-    uint8_t *mask_line, *mask;
-    int dst_stride, mask_stride;
-    int32_t w;
-    uint32_t src;
-    uint8_t sa;
-    __m64 vsrc, vsrca;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    sa = src >> 24;
-
-    vsrc = load8888 (&src);
-    vsrca = expand_alpha (vsrc);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 7)
-	{
-	    uint16_t tmp;
-	    uint8_t a;
-	    uint32_t m, d;
-
-	    a = *mask++;
-	    d = *dst;
-
-	    m = MUL_UN8 (sa, a, tmp);
-	    d = MUL_UN8 (m, d, tmp);
-
-	    *dst++ = d;
-	    w--;
-	}
-
-	while (w >= 4)
-	{
-	    __m64 vmask;
-	    __m64 vdest;
-
-	    vmask = load8888u ((uint32_t *)mask);
-	    vdest = load8888 ((uint32_t *)dst);
-
-	    store8888 ((uint32_t *)dst, in (in (vsrca, vmask), vdest));
-
-	    dst += 4;
-	    mask += 4;
-	    w -= 4;
-	}
-
-	while (w--)
-	{
-	    uint16_t tmp;
-	    uint8_t a;
-	    uint32_t m, d;
-
-	    a = *mask++;
-	    d = *dst;
-
-	    m = MUL_UN8 (sa, a, tmp);
-	    d = MUL_UN8 (m, d, tmp);
-
-	    *dst++ = d;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_in_8_8 (pixman_implementation_t *imp,
-                      pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint8_t     *dst_line, *dst;
-    uint8_t     *src_line, *src;
-    int src_stride, dst_stride;
-    int32_t w;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 3)
-	{
-	    uint8_t s, d;
-	    uint16_t tmp;
-
-	    s = *src;
-	    d = *dst;
-
-	    *dst = MUL_UN8 (s, d, tmp);
-
-	    src++;
-	    dst++;
-	    w--;
-	}
-
-	while (w >= 4)
-	{
-	    uint32_t *s = (uint32_t *)src;
-	    uint32_t *d = (uint32_t *)dst;
-
-	    store8888 (d, in (load8888u (s), load8888 (d)));
-
-	    w -= 4;
-	    dst += 4;
-	    src += 4;
-	}
-
-	while (w--)
-	{
-	    uint8_t s, d;
-	    uint16_t tmp;
-
-	    s = *src;
-	    d = *dst;
-
-	    *dst = MUL_UN8 (s, d, tmp);
-
-	    src++;
-	    dst++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_add_n_8_8 (pixman_implementation_t *imp,
-			 pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint8_t     *dst_line, *dst;
-    uint8_t     *mask_line, *mask;
-    int dst_stride, mask_stride;
-    int32_t w;
-    uint32_t src;
-    uint8_t sa;
-    __m64 vsrc, vsrca;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    sa = src >> 24;
-
-    if (src == 0)
-	return;
-
-    vsrc = load8888 (&src);
-    vsrca = expand_alpha (vsrc);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 3)
-	{
-	    uint16_t tmp;
-	    uint16_t a;
-	    uint32_t m, d;
-	    uint32_t r;
-
-	    a = *mask++;
-	    d = *dst;
-
-	    m = MUL_UN8 (sa, a, tmp);
-	    r = ADD_UN8 (m, d, tmp);
-
-	    *dst++ = r;
-	    w--;
-	}
-
-	while (w >= 4)
-	{
-	    __m64 vmask;
-	    __m64 vdest;
-
-	    vmask = load8888u ((uint32_t *)mask);
-	    vdest = load8888 ((uint32_t *)dst);
-
-	    store8888 ((uint32_t *)dst, _mm_adds_pu8 (in (vsrca, vmask), vdest));
-
-	    dst += 4;
-	    mask += 4;
-	    w -= 4;
-	}
-
-	while (w--)
-	{
-	    uint16_t tmp;
-	    uint16_t a;
-	    uint32_t m, d;
-	    uint32_t r;
-
-	    a = *mask++;
-	    d = *dst;
-
-	    m = MUL_UN8 (sa, a, tmp);
-	    r = ADD_UN8 (m, d, tmp);
-
-	    *dst++ = r;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_add_8_8 (pixman_implementation_t *imp,
-		       pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint8_t *dst_line, *dst;
-    uint8_t *src_line, *src;
-    int dst_stride, src_stride;
-    int32_t w;
-    uint8_t s, d;
-    uint16_t t;
-
-    CHECKPOINT ();
-
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 7)
-	{
-	    s = *src;
-	    d = *dst;
-	    t = d + s;
-	    s = t | (0 - (t >> 8));
-	    *dst = s;
-
-	    dst++;
-	    src++;
-	    w--;
-	}
-
-	while (w >= 8)
-	{
-	    *(__m64*)dst = _mm_adds_pu8 (ldq_u ((__m64 *)src), *(__m64*)dst);
-	    dst += 8;
-	    src += 8;
-	    w -= 8;
-	}
-
-	while (w)
-	{
-	    s = *src;
-	    d = *dst;
-	    t = d + s;
-	    s = t | (0 - (t >> 8));
-	    *dst = s;
-
-	    dst++;
-	    src++;
-	    w--;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_add_0565_0565 (pixman_implementation_t *imp,
-                             pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint16_t    *dst_line, *dst;
-    uint32_t	d;
-    uint16_t    *src_line, *src;
-    uint32_t	s;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    CHECKPOINT ();
-
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 7)
-	{
-	    s = *src++;
-	    if (s)
-	    {
-		d = *dst;
-		s = convert_0565_to_8888 (s);
-		if (d)
-		{
-		    d = convert_0565_to_8888 (d);
-		    UN8x4_ADD_UN8x4 (s, d);
-		}
-		*dst = convert_8888_to_0565 (s);
-	    }
-	    dst++;
-	    w--;
-	}
-
-	while (w >= 4)
-	{
-	    __m64 vdest = *(__m64 *)dst;
-	    __m64 vsrc = ldq_u ((__m64 *)src);
-	    __m64 vd0, vd1;
-	    __m64 vs0, vs1;
-
-	    expand_4xpacked565 (vdest, &vd0, &vd1, 0);
-	    expand_4xpacked565 (vsrc, &vs0, &vs1, 0);
-
-	    vd0 = _mm_adds_pu8 (vd0, vs0);
-	    vd1 = _mm_adds_pu8 (vd1, vs1);
-
-	    *(__m64 *)dst = pack_4xpacked565 (vd0, vd1);
-
-	    dst += 4;
-	    src += 4;
-	    w -= 4;
-	}
-
-	while (w--)
-	{
-	    s = *src++;
-	    if (s)
-	    {
-		d = *dst;
-		s = convert_0565_to_8888 (s);
-		if (d)
-		{
-		    d = convert_0565_to_8888 (d);
-		    UN8x4_ADD_UN8x4 (s, d);
-		}
-		*dst = convert_8888_to_0565 (s);
-	    }
-	    dst++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_add_8888_8888 (pixman_implementation_t *imp,
-                             pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    CHECKPOINT ();
-
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 7)
-	{
-	    store (dst, _mm_adds_pu8 (load ((const uint32_t *)src),
-	                              load ((const uint32_t *)dst)));
-	    dst++;
-	    src++;
-	    w--;
-	}
-
-	while (w >= 2)
-	{
-	    *(__m64 *)dst = _mm_adds_pu8 (ldq_u ((__m64 *)src), *(__m64*)dst);
-	    dst += 2;
-	    src += 2;
-	    w -= 2;
-	}
-
-	if (w)
-	{
-	    store (dst, _mm_adds_pu8 (load ((const uint32_t *)src),
-	                              load ((const uint32_t *)dst)));
-
-	}
-    }
-
-    _mm_empty ();
-}
-
-static pixman_bool_t
-mmx_blt (pixman_implementation_t *imp,
-         uint32_t *               src_bits,
-         uint32_t *               dst_bits,
-         int                      src_stride,
-         int                      dst_stride,
-         int                      src_bpp,
-         int                      dst_bpp,
-         int                      src_x,
-         int                      src_y,
-         int                      dest_x,
-         int                      dest_y,
-         int                      width,
-         int                      height)
-{
-    uint8_t *   src_bytes;
-    uint8_t *   dst_bytes;
-    int byte_width;
-
-    if (src_bpp != dst_bpp)
-	return FALSE;
-
-    if (src_bpp == 16)
-    {
-	src_stride = src_stride * (int) sizeof (uint32_t) / 2;
-	dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
-	src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
-	dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
-	byte_width = 2 * width;
-	src_stride *= 2;
-	dst_stride *= 2;
-    }
-    else if (src_bpp == 32)
-    {
-	src_stride = src_stride * (int) sizeof (uint32_t) / 4;
-	dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
-	src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
-	dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
-	byte_width = 4 * width;
-	src_stride *= 4;
-	dst_stride *= 4;
-    }
-    else
-    {
-	return FALSE;
-    }
-
-    while (height--)
-    {
-	int w;
-	uint8_t *s = src_bytes;
-	uint8_t *d = dst_bytes;
-	src_bytes += src_stride;
-	dst_bytes += dst_stride;
-	w = byte_width;
-
-	if (w >= 1 && ((uintptr_t)d & 1))
-	{
-	    *(uint8_t *)d = *(uint8_t *)s;
-	    w -= 1;
-	    s += 1;
-	    d += 1;
-	}
-
-	if (w >= 2 && ((uintptr_t)d & 3))
-	{
-	    *(uint16_t *)d = *(uint16_t *)s;
-	    w -= 2;
-	    s += 2;
-	    d += 2;
-	}
-
-	while (w >= 4 && ((uintptr_t)d & 7))
-	{
-	    *(uint32_t *)d = ldl_u ((uint32_t *)s);
-
-	    w -= 4;
-	    s += 4;
-	    d += 4;
-	}
-
-	while (w >= 64)
-	{
-#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined USE_X86_MMX
-	    __asm__ (
-	        "movq	  (%1),	  %%mm0\n"
-	        "movq	 8(%1),	  %%mm1\n"
-	        "movq	16(%1),	  %%mm2\n"
-	        "movq	24(%1),	  %%mm3\n"
-	        "movq	32(%1),	  %%mm4\n"
-	        "movq	40(%1),	  %%mm5\n"
-	        "movq	48(%1),	  %%mm6\n"
-	        "movq	56(%1),	  %%mm7\n"
-
-	        "movq	%%mm0,	  (%0)\n"
-	        "movq	%%mm1,	 8(%0)\n"
-	        "movq	%%mm2,	16(%0)\n"
-	        "movq	%%mm3,	24(%0)\n"
-	        "movq	%%mm4,	32(%0)\n"
-	        "movq	%%mm5,	40(%0)\n"
-	        "movq	%%mm6,	48(%0)\n"
-	        "movq	%%mm7,	56(%0)\n"
-		:
-		: "r" (d), "r" (s)
-		: "memory",
-		  "%mm0", "%mm1", "%mm2", "%mm3",
-		  "%mm4", "%mm5", "%mm6", "%mm7");
-#else
-	    __m64 v0 = ldq_u ((__m64 *)(s + 0));
-	    __m64 v1 = ldq_u ((__m64 *)(s + 8));
-	    __m64 v2 = ldq_u ((__m64 *)(s + 16));
-	    __m64 v3 = ldq_u ((__m64 *)(s + 24));
-	    __m64 v4 = ldq_u ((__m64 *)(s + 32));
-	    __m64 v5 = ldq_u ((__m64 *)(s + 40));
-	    __m64 v6 = ldq_u ((__m64 *)(s + 48));
-	    __m64 v7 = ldq_u ((__m64 *)(s + 56));
-	    *(__m64 *)(d + 0)  = v0;
-	    *(__m64 *)(d + 8)  = v1;
-	    *(__m64 *)(d + 16) = v2;
-	    *(__m64 *)(d + 24) = v3;
-	    *(__m64 *)(d + 32) = v4;
-	    *(__m64 *)(d + 40) = v5;
-	    *(__m64 *)(d + 48) = v6;
-	    *(__m64 *)(d + 56) = v7;
-#endif
-
-	    w -= 64;
-	    s += 64;
-	    d += 64;
-	}
-	while (w >= 4)
-	{
-	    *(uint32_t *)d = ldl_u ((uint32_t *)s);
-
-	    w -= 4;
-	    s += 4;
-	    d += 4;
-	}
-	if (w >= 2)
-	{
-	    *(uint16_t *)d = *(uint16_t *)s;
-	    w -= 2;
-	    s += 2;
-	    d += 2;
-	}
-    }
-
-    _mm_empty ();
-
-    return TRUE;
-}
-
-static void
-mmx_composite_copy_area (pixman_implementation_t *imp,
-                         pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-
-    mmx_blt (imp, src_image->bits.bits,
-	     dest_image->bits.bits,
-	     src_image->bits.rowstride,
-	     dest_image->bits.rowstride,
-	     PIXMAN_FORMAT_BPP (src_image->bits.format),
-	     PIXMAN_FORMAT_BPP (dest_image->bits.format),
-	     src_x, src_y, dest_x, dest_y, width, height);
-}
-
-static void
-mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
-                                pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t  *src, *src_line;
-    uint32_t  *dst, *dst_line;
-    uint8_t  *mask, *mask_line;
-    int src_stride, mask_stride, dst_stride;
-    int32_t w;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-	src = src_line;
-	src_line += src_stride;
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-
-	w = width;
-
-	while (w--)
-	{
-	    uint64_t m = *mask;
-
-	    if (m)
-	    {
-		uint32_t ssrc = *src | 0xff000000;
-		__m64 s = load8888 (&ssrc);
-
-		if (m == 0xff)
-		{
-		    store8888 (dst, s);
-		}
-		else
-		{
-		    __m64 sa = expand_alpha (s);
-		    __m64 vm = expand_alpha_rev (to_m64 (m));
-		    __m64 vdest = in_over (s, sa, vm, load8888 (dst));
-
-		    store8888 (dst, vdest);
-		}
-	    }
-
-	    mask++;
-	    dst++;
-	    src++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
-                                   pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src;
-    uint32_t    *dst_line, *dst;
-    int32_t w;
-    int dst_stride;
-    __m64 vsrc;
-
-    CHECKPOINT ();
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
-    vsrc = load8888 (&src);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	w = width;
-
-	CHECKPOINT ();
-
-	while (w && (uintptr_t)dst & 7)
-	{
-	    __m64 vdest = load8888 (dst);
-
-	    store8888 (dst, over (vdest, expand_alpha (vdest), vsrc));
-
-	    w--;
-	    dst++;
-	}
-
-	while (w >= 2)
-	{
-	    __m64 vdest = *(__m64 *)dst;
-	    __m64 dest0 = expand8888 (vdest, 0);
-	    __m64 dest1 = expand8888 (vdest, 1);
-
-
-	    dest0 = over (dest0, expand_alpha (dest0), vsrc);
-	    dest1 = over (dest1, expand_alpha (dest1), vsrc);
-
-	    *(__m64 *)dst = pack8888 (dest0, dest1);
-
-	    dst += 2;
-	    w -= 2;
-	}
-
-	CHECKPOINT ();
-
-	if (w)
-	{
-	    __m64 vdest = load8888 (dst);
-
-	    store8888 (dst, over (vdest, expand_alpha (vdest), vsrc));
-	}
-    }
-
-    _mm_empty ();
-}
-
-static force_inline void
-scaled_nearest_scanline_mmx_8888_8888_OVER (uint32_t*       pd,
-                                            const uint32_t* ps,
-                                            int32_t         w,
-                                            pixman_fixed_t  vx,
-                                            pixman_fixed_t  unit_x,
-                                            pixman_fixed_t  src_width_fixed,
-                                            pixman_bool_t   fully_transparent_src)
-{
-    if (fully_transparent_src)
-	return;
-
-    while (w)
-    {
-	__m64 d = load (pd);
-	__m64 s = load (ps + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-
-	store8888 (pd, core_combine_over_u_pixel_mmx (s, d));
-	pd++;
-
-	w--;
-    }
-
-    _mm_empty ();
-}
-
-FAST_NEAREST_MAINLOOP (mmx_8888_8888_cover_OVER,
-		       scaled_nearest_scanline_mmx_8888_8888_OVER,
-		       uint32_t, uint32_t, COVER)
-FAST_NEAREST_MAINLOOP (mmx_8888_8888_none_OVER,
-		       scaled_nearest_scanline_mmx_8888_8888_OVER,
-		       uint32_t, uint32_t, NONE)
-FAST_NEAREST_MAINLOOP (mmx_8888_8888_pad_OVER,
-		       scaled_nearest_scanline_mmx_8888_8888_OVER,
-		       uint32_t, uint32_t, PAD)
-FAST_NEAREST_MAINLOOP (mmx_8888_8888_normal_OVER,
-		       scaled_nearest_scanline_mmx_8888_8888_OVER,
-		       uint32_t, uint32_t, NORMAL)
-
-static force_inline void
-scaled_nearest_scanline_mmx_8888_n_8888_OVER (const uint32_t * mask,
-					      uint32_t *       dst,
-					      const uint32_t * src,
-					      int32_t          w,
-					      pixman_fixed_t   vx,
-					      pixman_fixed_t   unit_x,
-					      pixman_fixed_t   src_width_fixed,
-					      pixman_bool_t    zero_src)
-{
-    __m64 mm_mask;
-
-    if (zero_src || (*mask >> 24) == 0)
-    {
-	/* A workaround for https://gcc.gnu.org/PR47759 */
-	_mm_empty ();
-	return;
-    }
-
-    mm_mask = expand_alpha (load8888 (mask));
-
-    while (w)
-    {
-	uint32_t s = *(src + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-
-	if (s)
-	{
-	    __m64 ms = load8888 (&s);
-	    __m64 alpha = expand_alpha (ms);
-	    __m64 dest  = load8888 (dst);
-
-	    store8888 (dst, (in_over (ms, alpha, mm_mask, dest)));
-	}
-
-	dst++;
-	w--;
-    }
-
-    _mm_empty ();
-}
-
-FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_cover_OVER,
-			      scaled_nearest_scanline_mmx_8888_n_8888_OVER,
-			      uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)
-FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_pad_OVER,
-			      scaled_nearest_scanline_mmx_8888_n_8888_OVER,
-			      uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)
-FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_none_OVER,
-			      scaled_nearest_scanline_mmx_8888_n_8888_OVER,
-			      uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
-FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_normal_OVER,
-			      scaled_nearest_scanline_mmx_8888_n_8888_OVER,
-			      uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)
-
-#define BSHIFT ((1 << BILINEAR_INTERPOLATION_BITS))
-#define BMSK (BSHIFT - 1)
-
-#define BILINEAR_DECLARE_VARIABLES						\
-    const __m64 mm_wt = _mm_set_pi16 (wt, wt, wt, wt);				\
-    const __m64 mm_wb = _mm_set_pi16 (wb, wb, wb, wb);				\
-    const __m64 mm_addc7 = _mm_set_pi16 (0, 1, 0, 1);				\
-    const __m64 mm_xorc7 = _mm_set_pi16 (0, BMSK, 0, BMSK);			\
-    const __m64 mm_ux = _mm_set_pi16 (unit_x, unit_x, unit_x, unit_x);		\
-    const __m64 mm_zero = _mm_setzero_si64 ();					\
-    __m64 mm_x = _mm_set_pi16 (vx, vx, vx, vx)
-
-#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix)					\
-do {										\
-    /* fetch 2x2 pixel block into 2 mmx registers */				\
-    __m64 t = ldq_u ((__m64 *)&src_top [pixman_fixed_to_int (vx)]);		\
-    __m64 b = ldq_u ((__m64 *)&src_bottom [pixman_fixed_to_int (vx)]);		\
-    /* vertical interpolation */						\
-    __m64 t_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (t, mm_zero), mm_wt);		\
-    __m64 t_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (t, mm_zero), mm_wt);		\
-    __m64 b_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (b, mm_zero), mm_wb);		\
-    __m64 b_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (b, mm_zero), mm_wb);		\
-    __m64 hi = _mm_add_pi16 (t_hi, b_hi);					\
-    __m64 lo = _mm_add_pi16 (t_lo, b_lo);					\
-    /* calculate horizontal weights */						\
-    __m64 mm_wh = _mm_add_pi16 (mm_addc7, _mm_xor_si64 (mm_xorc7,		\
-			  _mm_srli_pi16 (mm_x,					\
-					 16 - BILINEAR_INTERPOLATION_BITS)));	\
-    /* horizontal interpolation */						\
-    __m64 p = _mm_unpacklo_pi16 (lo, hi);					\
-    __m64 q = _mm_unpackhi_pi16 (lo, hi);					\
-    vx += unit_x;								\
-    lo = _mm_madd_pi16 (p, mm_wh);						\
-    hi = _mm_madd_pi16 (q, mm_wh);						\
-    mm_x = _mm_add_pi16 (mm_x, mm_ux);						\
-    /* shift and pack the result */						\
-    hi = _mm_srli_pi32 (hi, BILINEAR_INTERPOLATION_BITS * 2);			\
-    lo = _mm_srli_pi32 (lo, BILINEAR_INTERPOLATION_BITS * 2);			\
-    lo = _mm_packs_pi32 (lo, hi);						\
-    lo = _mm_packs_pu16 (lo, lo);						\
-    pix = lo;									\
-} while (0)
-
-#define BILINEAR_SKIP_ONE_PIXEL()						\
-do {										\
-    vx += unit_x;								\
-    mm_x = _mm_add_pi16 (mm_x, mm_ux);						\
-} while(0)
-
-static force_inline void
-scaled_bilinear_scanline_mmx_8888_8888_SRC (uint32_t *       dst,
-					    const uint32_t * mask,
-					    const uint32_t * src_top,
-					    const uint32_t * src_bottom,
-					    int32_t          w,
-					    int              wt,
-					    int              wb,
-					    pixman_fixed_t   vx,
-					    pixman_fixed_t   unit_x,
-					    pixman_fixed_t   max_vx,
-					    pixman_bool_t    zero_src)
-{
-    BILINEAR_DECLARE_VARIABLES;
-    __m64 pix;
-
-    while (w--)
-    {
-	BILINEAR_INTERPOLATE_ONE_PIXEL (pix);
-	store (dst, pix);
-	dst++;
-    }
-
-    _mm_empty ();
-}
-
-FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_SRC,
-			       scaled_bilinear_scanline_mmx_8888_8888_SRC,
-			       uint32_t, uint32_t, uint32_t,
-			       COVER, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_pad_SRC,
-			       scaled_bilinear_scanline_mmx_8888_8888_SRC,
-			       uint32_t, uint32_t, uint32_t,
-			       PAD, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_SRC,
-			       scaled_bilinear_scanline_mmx_8888_8888_SRC,
-			       uint32_t, uint32_t, uint32_t,
-			       NONE, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_SRC,
-			       scaled_bilinear_scanline_mmx_8888_8888_SRC,
-			       uint32_t, uint32_t, uint32_t,
-			       NORMAL, FLAG_NONE)
-
-static force_inline void
-scaled_bilinear_scanline_mmx_8888_8888_OVER (uint32_t *       dst,
-					     const uint32_t * mask,
-					     const uint32_t * src_top,
-					     const uint32_t * src_bottom,
-					     int32_t          w,
-					     int              wt,
-					     int              wb,
-					     pixman_fixed_t   vx,
-					     pixman_fixed_t   unit_x,
-					     pixman_fixed_t   max_vx,
-					     pixman_bool_t    zero_src)
-{
-    BILINEAR_DECLARE_VARIABLES;
-    __m64 pix1, pix2;
-
-    while (w)
-    {
-	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
-
-	if (!is_zero (pix1))
-	{
-	    pix2 = load (dst);
-	    store8888 (dst, core_combine_over_u_pixel_mmx (pix1, pix2));
-	}
-
-	w--;
-	dst++;
-    }
-
-    _mm_empty ();
-}
-
-FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_OVER,
-			       scaled_bilinear_scanline_mmx_8888_8888_OVER,
-			       uint32_t, uint32_t, uint32_t,
-			       COVER, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_pad_OVER,
-			       scaled_bilinear_scanline_mmx_8888_8888_OVER,
-			       uint32_t, uint32_t, uint32_t,
-			       PAD, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_OVER,
-			       scaled_bilinear_scanline_mmx_8888_8888_OVER,
-			       uint32_t, uint32_t, uint32_t,
-			       NONE, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_OVER,
-			       scaled_bilinear_scanline_mmx_8888_8888_OVER,
-			       uint32_t, uint32_t, uint32_t,
-			       NORMAL, FLAG_NONE)
-
-static force_inline void
-scaled_bilinear_scanline_mmx_8888_8_8888_OVER (uint32_t *       dst,
-					       const uint8_t  * mask,
-					       const uint32_t * src_top,
-					       const uint32_t * src_bottom,
-					       int32_t          w,
-					       int              wt,
-					       int              wb,
-					       pixman_fixed_t   vx,
-					       pixman_fixed_t   unit_x,
-					       pixman_fixed_t   max_vx,
-					       pixman_bool_t    zero_src)
-{
-    BILINEAR_DECLARE_VARIABLES;
-    __m64 pix1, pix2;
-    uint32_t m;
-
-    while (w)
-    {
-	m = (uint32_t) *mask++;
-
-	if (m)
-	{
-	    BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
-
-	    if (m == 0xff && is_opaque (pix1))
-	    {
-		store (dst, pix1);
-	    }
-	    else
-	    {
-		__m64 ms, md, ma, msa;
-
-		pix2 = load (dst);
-		ma = expand_alpha_rev (to_m64 (m));
-		ms = _mm_unpacklo_pi8 (pix1, _mm_setzero_si64 ());
-		md = _mm_unpacklo_pi8 (pix2, _mm_setzero_si64 ());
-
-		msa = expand_alpha (ms);
-
-		store8888 (dst, (in_over (ms, msa, ma, md)));
-	    }
-	}
-	else
-	{
-	    BILINEAR_SKIP_ONE_PIXEL ();
-	}
-
-	w--;
-	dst++;
-    }
-
-    _mm_empty ();
-}
-
-FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_cover_OVER,
-			       scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
-			       uint32_t, uint8_t, uint32_t,
-			       COVER, FLAG_HAVE_NON_SOLID_MASK)
-FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_pad_OVER,
-			       scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
-			       uint32_t, uint8_t, uint32_t,
-			       PAD, FLAG_HAVE_NON_SOLID_MASK)
-FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_none_OVER,
-			       scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
-			       uint32_t, uint8_t, uint32_t,
-			       NONE, FLAG_HAVE_NON_SOLID_MASK)
-FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_normal_OVER,
-			       scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
-			       uint32_t, uint8_t, uint32_t,
-			       NORMAL, FLAG_HAVE_NON_SOLID_MASK)
-
-static uint32_t *
-mmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
-{
-    int w = iter->width;
-    uint32_t *dst = iter->buffer;
-    uint32_t *src = (uint32_t *)iter->bits;
-
-    iter->bits += iter->stride;
-
-    while (w && ((uintptr_t)dst) & 7)
-    {
-	*dst++ = (*src++) | 0xff000000;
-	w--;
-    }
-
-    while (w >= 8)
-    {
-	__m64 vsrc1 = ldq_u ((__m64 *)(src + 0));
-	__m64 vsrc2 = ldq_u ((__m64 *)(src + 2));
-	__m64 vsrc3 = ldq_u ((__m64 *)(src + 4));
-	__m64 vsrc4 = ldq_u ((__m64 *)(src + 6));
-
-	*(__m64 *)(dst + 0) = _mm_or_si64 (vsrc1, MC (ff000000));
-	*(__m64 *)(dst + 2) = _mm_or_si64 (vsrc2, MC (ff000000));
-	*(__m64 *)(dst + 4) = _mm_or_si64 (vsrc3, MC (ff000000));
-	*(__m64 *)(dst + 6) = _mm_or_si64 (vsrc4, MC (ff000000));
-
-	dst += 8;
-	src += 8;
-	w -= 8;
-    }
-
-    while (w)
-    {
-	*dst++ = (*src++) | 0xff000000;
-	w--;
-    }
-
-    _mm_empty ();
-    return iter->buffer;
-}
-
-static uint32_t *
-mmx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
-{
-    int w = iter->width;
-    uint32_t *dst = iter->buffer;
-    uint16_t *src = (uint16_t *)iter->bits;
-
-    iter->bits += iter->stride;
-
-    while (w && ((uintptr_t)dst) & 0x0f)
-    {
-	uint16_t s = *src++;
-
-	*dst++ = convert_0565_to_8888 (s);
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	__m64 vsrc = ldq_u ((__m64 *)src);
-	__m64 mm0, mm1;
-
-	expand_4xpacked565 (vsrc, &mm0, &mm1, 1);
-
-	*(__m64 *)(dst + 0) = mm0;
-	*(__m64 *)(dst + 2) = mm1;
-
-	dst += 4;
-	src += 4;
-	w -= 4;
-    }
-
-    while (w)
-    {
-	uint16_t s = *src++;
-
-	*dst++ = convert_0565_to_8888 (s);
-	w--;
-    }
-
-    _mm_empty ();
-    return iter->buffer;
-}
-
-static uint32_t *
-mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
-{
-    int w = iter->width;
-    uint32_t *dst = iter->buffer;
-    uint8_t *src = iter->bits;
-
-    iter->bits += iter->stride;
-
-    while (w && (((uintptr_t)dst) & 15))
-    {
-        *dst++ = (uint32_t)*(src++) << 24;
-        w--;
-    }
-
-    while (w >= 8)
-    {
-	__m64 mm0 = ldq_u ((__m64 *)src);
-
-	__m64 mm1 = _mm_unpacklo_pi8  (_mm_setzero_si64(), mm0);
-	__m64 mm2 = _mm_unpackhi_pi8  (_mm_setzero_si64(), mm0);
-	__m64 mm3 = _mm_unpacklo_pi16 (_mm_setzero_si64(), mm1);
-	__m64 mm4 = _mm_unpackhi_pi16 (_mm_setzero_si64(), mm1);
-	__m64 mm5 = _mm_unpacklo_pi16 (_mm_setzero_si64(), mm2);
-	__m64 mm6 = _mm_unpackhi_pi16 (_mm_setzero_si64(), mm2);
-
-	*(__m64 *)(dst + 0) = mm3;
-	*(__m64 *)(dst + 2) = mm4;
-	*(__m64 *)(dst + 4) = mm5;
-	*(__m64 *)(dst + 6) = mm6;
-
-	dst += 8;
-	src += 8;
-	w -= 8;
-    }
-
-    while (w)
-    {
-	*dst++ = (uint32_t)*(src++) << 24;
-	w--;
-    }
-
-    _mm_empty ();
-    return iter->buffer;
-}
-
-#define IMAGE_FLAGS							\
-    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |		\
-     FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
-
-static const pixman_iter_info_t mmx_iters[] = 
-{
-    { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
-      _pixman_iter_init_bits_stride, mmx_fetch_x8r8g8b8, NULL
-    },
-    { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW,
-      _pixman_iter_init_bits_stride, mmx_fetch_r5g6b5, NULL
-    },
-    { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
-      _pixman_iter_init_bits_stride, mmx_fetch_a8, NULL
-    },
-    { PIXMAN_null },
-};
-
-static const pixman_fast_path_t mmx_fast_paths[] =
-{
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       r5g6b5,   mmx_composite_over_n_8_0565       ),
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       b5g6r5,   mmx_composite_over_n_8_0565       ),
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       a8r8g8b8, mmx_composite_over_n_8_8888       ),
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       x8r8g8b8, mmx_composite_over_n_8_8888       ),
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       a8b8g8r8, mmx_composite_over_n_8_8888       ),
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       x8b8g8r8, mmx_composite_over_n_8_8888       ),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8r8g8b8, a8r8g8b8, mmx_composite_over_n_8888_8888_ca ),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8r8g8b8, x8r8g8b8, mmx_composite_over_n_8888_8888_ca ),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8r8g8b8, r5g6b5,   mmx_composite_over_n_8888_0565_ca ),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8b8g8r8, a8b8g8r8, mmx_composite_over_n_8888_8888_ca ),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8b8g8r8, x8b8g8r8, mmx_composite_over_n_8888_8888_ca ),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8b8g8r8, b5g6r5,   mmx_composite_over_n_8888_0565_ca ),
-    PIXMAN_STD_FAST_PATH    (OVER, pixbuf,   pixbuf,   a8r8g8b8, mmx_composite_over_pixbuf_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, pixbuf,   pixbuf,   x8r8g8b8, mmx_composite_over_pixbuf_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, pixbuf,   pixbuf,   r5g6b5,   mmx_composite_over_pixbuf_0565    ),
-    PIXMAN_STD_FAST_PATH    (OVER, rpixbuf,  rpixbuf,  a8b8g8r8, mmx_composite_over_pixbuf_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, rpixbuf,  rpixbuf,  x8b8g8r8, mmx_composite_over_pixbuf_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, rpixbuf,  rpixbuf,  b5g6r5,   mmx_composite_over_pixbuf_0565    ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, solid,    a8r8g8b8, mmx_composite_over_x888_n_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, solid,    x8r8g8b8, mmx_composite_over_x888_n_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, solid,    a8b8g8r8, mmx_composite_over_x888_n_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, solid,    x8b8g8r8, mmx_composite_over_x888_n_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, solid,    a8r8g8b8, mmx_composite_over_8888_n_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, solid,    x8r8g8b8, mmx_composite_over_8888_n_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, solid,    a8b8g8r8, mmx_composite_over_8888_n_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, solid,    x8b8g8r8, mmx_composite_over_8888_n_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, a8,       x8r8g8b8, mmx_composite_over_x888_8_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, a8,       a8r8g8b8, mmx_composite_over_x888_8_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, a8,       x8b8g8r8, mmx_composite_over_x888_8_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, a8,       a8b8g8r8, mmx_composite_over_x888_8_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     a8r8g8b8, mmx_composite_over_n_8888         ),
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     x8r8g8b8, mmx_composite_over_n_8888         ),
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     r5g6b5,   mmx_composite_over_n_0565         ),
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     b5g6r5,   mmx_composite_over_n_0565         ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, null,     x8r8g8b8, mmx_composite_copy_area           ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, null,     x8b8g8r8, mmx_composite_copy_area           ),
-
-    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     a8r8g8b8, mmx_composite_over_8888_8888      ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     x8r8g8b8, mmx_composite_over_8888_8888      ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     r5g6b5,   mmx_composite_over_8888_0565      ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     a8b8g8r8, mmx_composite_over_8888_8888      ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     x8b8g8r8, mmx_composite_over_8888_8888      ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     b5g6r5,   mmx_composite_over_8888_0565      ),
-
-    PIXMAN_STD_FAST_PATH    (OVER_REVERSE, solid, null, a8r8g8b8, mmx_composite_over_reverse_n_8888),
-    PIXMAN_STD_FAST_PATH    (OVER_REVERSE, solid, null, a8b8g8r8, mmx_composite_over_reverse_n_8888),
-
-    PIXMAN_STD_FAST_PATH    (ADD,  r5g6b5,   null,     r5g6b5,   mmx_composite_add_0565_0565       ),
-    PIXMAN_STD_FAST_PATH    (ADD,  b5g6r5,   null,     b5g6r5,   mmx_composite_add_0565_0565       ),
-    PIXMAN_STD_FAST_PATH    (ADD,  a8r8g8b8, null,     a8r8g8b8, mmx_composite_add_8888_8888       ),
-    PIXMAN_STD_FAST_PATH    (ADD,  a8b8g8r8, null,     a8b8g8r8, mmx_composite_add_8888_8888       ),
-    PIXMAN_STD_FAST_PATH    (ADD,  a8,       null,     a8,       mmx_composite_add_8_8		   ),
-    PIXMAN_STD_FAST_PATH    (ADD,  solid,    a8,       a8,       mmx_composite_add_n_8_8           ),
-
-    PIXMAN_STD_FAST_PATH    (SRC,  a8r8g8b8, null,     r5g6b5,   mmx_composite_src_x888_0565       ),
-    PIXMAN_STD_FAST_PATH    (SRC,  a8b8g8r8, null,     b5g6r5,   mmx_composite_src_x888_0565       ),
-    PIXMAN_STD_FAST_PATH    (SRC,  x8r8g8b8, null,     r5g6b5,   mmx_composite_src_x888_0565       ),
-    PIXMAN_STD_FAST_PATH    (SRC,  x8b8g8r8, null,     b5g6r5,   mmx_composite_src_x888_0565       ),
-    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       a8r8g8b8, mmx_composite_src_n_8_8888        ),
-    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       x8r8g8b8, mmx_composite_src_n_8_8888        ),
-    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       a8b8g8r8, mmx_composite_src_n_8_8888        ),
-    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       x8b8g8r8, mmx_composite_src_n_8_8888        ),
-    PIXMAN_STD_FAST_PATH    (SRC,  a8r8g8b8, null,     a8r8g8b8, mmx_composite_copy_area           ),
-    PIXMAN_STD_FAST_PATH    (SRC,  a8b8g8r8, null,     a8b8g8r8, mmx_composite_copy_area           ),
-    PIXMAN_STD_FAST_PATH    (SRC,  a8r8g8b8, null,     x8r8g8b8, mmx_composite_copy_area           ),
-    PIXMAN_STD_FAST_PATH    (SRC,  a8b8g8r8, null,     x8b8g8r8, mmx_composite_copy_area           ),
-    PIXMAN_STD_FAST_PATH    (SRC,  x8r8g8b8, null,     x8r8g8b8, mmx_composite_copy_area           ),
-    PIXMAN_STD_FAST_PATH    (SRC,  x8b8g8r8, null,     x8b8g8r8, mmx_composite_copy_area           ),
-    PIXMAN_STD_FAST_PATH    (SRC,  r5g6b5,   null,     r5g6b5,   mmx_composite_copy_area           ),
-    PIXMAN_STD_FAST_PATH    (SRC,  b5g6r5,   null,     b5g6r5,   mmx_composite_copy_area           ),
-
-    PIXMAN_STD_FAST_PATH    (IN,   a8,       null,     a8,       mmx_composite_in_8_8              ),
-    PIXMAN_STD_FAST_PATH    (IN,   solid,    a8,       a8,       mmx_composite_in_n_8_8            ),
-
-    SIMPLE_NEAREST_FAST_PATH (OVER,   a8r8g8b8, x8r8g8b8, mmx_8888_8888                            ),
-    SIMPLE_NEAREST_FAST_PATH (OVER,   a8b8g8r8, x8b8g8r8, mmx_8888_8888                            ),
-    SIMPLE_NEAREST_FAST_PATH (OVER,   a8r8g8b8, a8r8g8b8, mmx_8888_8888                            ),
-    SIMPLE_NEAREST_FAST_PATH (OVER,   a8b8g8r8, a8b8g8r8, mmx_8888_8888                            ),
-
-    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_n_8888                 ),
-    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_n_8888                 ),
-    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_n_8888                 ),
-    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_n_8888                 ),
-
-    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8,          a8r8g8b8, mmx_8888_8888                     ),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8,          x8r8g8b8, mmx_8888_8888                     ),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8,          x8r8g8b8, mmx_8888_8888                     ),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8,          a8b8g8r8, mmx_8888_8888                     ),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8,          x8b8g8r8, mmx_8888_8888                     ),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8,          x8b8g8r8, mmx_8888_8888                     ),
-
-    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8,         x8r8g8b8, mmx_8888_8888                     ),
-    SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8,         x8b8g8r8, mmx_8888_8888                     ),
-    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8,         a8r8g8b8, mmx_8888_8888                     ),
-    SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8,         a8b8g8r8, mmx_8888_8888                     ),
-
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8_8888                   ),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8_8888                   ),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8_8888                   ),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8_8888                   ),
-
-    { PIXMAN_OP_NONE },
-};
-
-pixman_implementation_t *
-_pixman_implementation_create_mmx (pixman_implementation_t *fallback)
-{
-    pixman_implementation_t *imp = _pixman_implementation_create (fallback, mmx_fast_paths);
-
-    imp->combine_32[PIXMAN_OP_OVER] = mmx_combine_over_u;
-    imp->combine_32[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_u;
-    imp->combine_32[PIXMAN_OP_IN] = mmx_combine_in_u;
-    imp->combine_32[PIXMAN_OP_IN_REVERSE] = mmx_combine_in_reverse_u;
-    imp->combine_32[PIXMAN_OP_OUT] = mmx_combine_out_u;
-    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = mmx_combine_out_reverse_u;
-    imp->combine_32[PIXMAN_OP_ATOP] = mmx_combine_atop_u;
-    imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = mmx_combine_atop_reverse_u;
-    imp->combine_32[PIXMAN_OP_XOR] = mmx_combine_xor_u;
-    imp->combine_32[PIXMAN_OP_ADD] = mmx_combine_add_u;
-    imp->combine_32[PIXMAN_OP_SATURATE] = mmx_combine_saturate_u;
-
-    imp->combine_32_ca[PIXMAN_OP_SRC] = mmx_combine_src_ca;
-    imp->combine_32_ca[PIXMAN_OP_OVER] = mmx_combine_over_ca;
-    imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_IN] = mmx_combine_in_ca;
-    imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = mmx_combine_in_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_OUT] = mmx_combine_out_ca;
-    imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = mmx_combine_out_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_ATOP] = mmx_combine_atop_ca;
-    imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = mmx_combine_atop_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_XOR] = mmx_combine_xor_ca;
-    imp->combine_32_ca[PIXMAN_OP_ADD] = mmx_combine_add_ca;
-
-    imp->blt = mmx_blt;
-    imp->fill = mmx_fill;
-
-    imp->iter_info = mmx_iters;
-
-    return imp;
-}
-
-#endif /* USE_X86_MMX || USE_ARM_IWMMXT || USE_LOONGSON_MMI */
diff --git a/vendor/pixman/pixman/pixman-noop.c b/vendor/pixman/pixman/pixman-noop.c
deleted file mode 100644
index e43199bc1..000000000
--- a/vendor/pixman/pixman/pixman-noop.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
-/*
- * Copyright © 2011 Red Hat, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-#include <string.h>
-#include <stdlib.h>
-#include "pixman-private.h"
-#include "pixman-combine32.h"
-#include "pixman-inlines.h"
-
-static void
-noop_composite (pixman_implementation_t *imp,
-		pixman_composite_info_t *info)
-{
-    return;
-}
-
-static uint32_t *
-noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask)
-{
-    uint32_t *result = iter->buffer;
-
-    iter->buffer += iter->image->bits.rowstride;
-
-    return result;
-}
-
-static void
-noop_init_solid_narrow (pixman_iter_t *iter,
-			const pixman_iter_info_t *info)
-{ 
-    pixman_image_t *image = iter->image;
-    uint32_t *buffer = iter->buffer;
-    uint32_t *end = buffer + iter->width;
-    uint32_t color;
-
-    if (iter->image->type == SOLID)
-	color = image->solid.color_32;
-    else
-	color = image->bits.fetch_pixel_32 (&image->bits, 0, 0);
-
-    while (buffer < end)
-	*(buffer++) = color;
-}
-
-static void
-noop_init_solid_wide (pixman_iter_t *iter,
-		      const pixman_iter_info_t *info)
-{
-    pixman_image_t *image = iter->image;
-    argb_t *buffer = (argb_t *)iter->buffer;
-    argb_t *end = buffer + iter->width;
-    argb_t color;
-
-    if (iter->image->type == SOLID)
-	color = image->solid.color_float;
-    else
-	color = image->bits.fetch_pixel_float (&image->bits, 0, 0);
-
-    while (buffer < end)
-	*(buffer++) = color;
-}
-
-static void
-noop_init_direct_buffer (pixman_iter_t *iter, const pixman_iter_info_t *info)
-{
-    pixman_image_t *image = iter->image;
-
-    iter->buffer =
-	image->bits.bits + iter->y * image->bits.rowstride + iter->x;
-}
-
-static void
-dest_write_back_direct (pixman_iter_t *iter)
-{
-    iter->buffer += iter->image->bits.rowstride;
-}
-
-static const pixman_iter_info_t noop_iters[] =
-{
-    /* Source iters */
-    { PIXMAN_any,
-      0, ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_SRC,
-      NULL,
-      _pixman_iter_get_scanline_noop,
-      NULL
-    },
-    { PIXMAN_solid,
-      FAST_PATH_NO_ALPHA_MAP, ITER_NARROW | ITER_SRC,
-      noop_init_solid_narrow,
-      _pixman_iter_get_scanline_noop,
-      NULL,
-    },
-    { PIXMAN_solid,
-      FAST_PATH_NO_ALPHA_MAP, ITER_WIDE | ITER_SRC,
-      noop_init_solid_wide,
-      _pixman_iter_get_scanline_noop,
-      NULL
-    },
-    { PIXMAN_a8r8g8b8,
-      FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |
-          FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,
-      ITER_NARROW | ITER_SRC,
-      noop_init_direct_buffer,
-      noop_get_scanline,
-      NULL
-    },
-    /* Dest iters */
-    { PIXMAN_a8r8g8b8,
-      FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST,
-      noop_init_direct_buffer,
-      _pixman_iter_get_scanline_noop,
-      dest_write_back_direct
-    },
-    { PIXMAN_x8r8g8b8,
-      FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST | ITER_LOCALIZED_ALPHA,
-      noop_init_direct_buffer,
-      _pixman_iter_get_scanline_noop,
-      dest_write_back_direct
-    },
-    { PIXMAN_null },
-};
-
-static const pixman_fast_path_t noop_fast_paths[] =
-{
-    { PIXMAN_OP_DST, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, noop_composite },
-    { PIXMAN_OP_NONE },
-};
-
-pixman_implementation_t *
-_pixman_implementation_create_noop (pixman_implementation_t *fallback)
-{
-    pixman_implementation_t *imp =
-	_pixman_implementation_create (fallback, noop_fast_paths);
- 
-    imp->iter_info = noop_iters;
-
-    return imp;
-}
diff --git a/vendor/pixman/pixman/pixman-ppc.c b/vendor/pixman/pixman/pixman-ppc.c
deleted file mode 100644
index 926eb445f..000000000
--- a/vendor/pixman/pixman/pixman-ppc.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  SuSE makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include "pixman-private.h"
-
-#ifdef USE_VMX
-
-/* The CPU detection code needs to be in a file not compiled with
- * "-maltivec -mabi=altivec", as gcc would try to save vector register
- * across function calls causing SIGILL on cpus without Altivec/vmx.
- */
-#ifdef __APPLE__
-#include <sys/sysctl.h>
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
-    int error, have_vmx;
-    size_t length = sizeof(have_vmx);
-
-    error = sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0);
-
-    if (error)
-	return FALSE;
-
-    return have_vmx;
-}
-
-#elif defined (__OpenBSD__)
-#include <sys/param.h>
-#include <sys/sysctl.h>
-#include <machine/cpu.h>
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
-    int error, have_vmx;
-    int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC };
-    size_t length = sizeof(have_vmx);
-
-    error = sysctl (mib, 2, &have_vmx, &length, NULL, 0);
-
-    if (error != 0)
-	return FALSE;
-
-    return have_vmx;
-}
-
-#elif defined (__FreeBSD__)
-#include <machine/cpu.h>
-#include <sys/auxv.h>
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
-
-    unsigned long cpufeatures;
-    int have_vmx;
-
-    if (elf_aux_info(AT_HWCAP, &cpufeatures, sizeof(cpufeatures)))
-    return FALSE;
-
-    have_vmx = cpufeatures & PPC_FEATURE_HAS_ALTIVEC;
-    return have_vmx;
-}
-
-#elif defined (__linux__)
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <linux/auxvec.h>
-#include <asm/cputable.h>
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
-    int have_vmx = FALSE;
-    int fd;
-    struct
-    {
-	unsigned long type;
-	unsigned long value;
-    } aux;
-
-    fd = open ("/proc/self/auxv", O_RDONLY);
-    if (fd >= 0)
-    {
-	while (read (fd, &aux, sizeof (aux)) == sizeof (aux))
-	{
-	    if (aux.type == AT_HWCAP && (aux.value & PPC_FEATURE_HAS_ALTIVEC))
-	    {
-		have_vmx = TRUE;
-		break;
-	    }
-	}
-
-	close (fd);
-    }
-
-    return have_vmx;
-}
-
-#else /* !__APPLE__ && !__OpenBSD__ && !__linux__ */
-#include <signal.h>
-#include <setjmp.h>
-
-static jmp_buf jump_env;
-
-static void
-vmx_test (int        sig,
-	  siginfo_t *si,
-	  void *     unused)
-{
-    longjmp (jump_env, 1);
-}
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
-    struct sigaction sa, osa;
-    int jmp_result;
-
-    sa.sa_flags = SA_SIGINFO;
-    sigemptyset (&sa.sa_mask);
-    sa.sa_sigaction = vmx_test;
-    sigaction (SIGILL, &sa, &osa);
-    jmp_result = setjmp (jump_env);
-    if (jmp_result == 0)
-    {
-	asm volatile ( "vor 0, 0, 0" );
-    }
-    sigaction (SIGILL, &osa, NULL);
-    return (jmp_result == 0);
-}
-
-#endif /* __APPLE__ */
-#endif /* USE_VMX */
-
-pixman_implementation_t *
-_pixman_ppc_get_implementations (pixman_implementation_t *imp)
-{
-#ifdef USE_VMX
-    if (!_pixman_disabled ("vmx") && pixman_have_vmx ())
-	imp = _pixman_implementation_create_vmx (imp);
-#endif
-
-    return imp;
-}
diff --git a/vendor/pixman/pixman/pixman-private.h b/vendor/pixman/pixman/pixman-private.h
deleted file mode 100644
index 7316c801b..000000000
--- a/vendor/pixman/pixman/pixman-private.h
+++ /dev/null
@@ -1,1193 +0,0 @@
-#ifndef PIXMAN_PRIVATE_H
-#define PIXMAN_PRIVATE_H
-
-/*
- * The defines which are shared between C and assembly code
- */
-
-/* bilinear interpolation precision (must be < 8) */
-#define BILINEAR_INTERPOLATION_BITS 7
-#define BILINEAR_INTERPOLATION_RANGE (1 << BILINEAR_INTERPOLATION_BITS)
-
-/*
- * C specific part
- */
-
-#ifndef __ASSEMBLER__
-
-#ifndef PACKAGE
-#  error config.h must be included before pixman-private.h
-#endif
-
-#define PIXMAN_DISABLE_DEPRECATED
-#define PIXMAN_USE_INTERNAL_API
-
-#include "pixman.h"
-#include <time.h>
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-#include <stddef.h>
-#include <float.h>
-
-#include "pixman-compiler.h"
-
-/*
- * Images
- */
-typedef struct image_common image_common_t;
-typedef struct solid_fill solid_fill_t;
-typedef struct gradient gradient_t;
-typedef struct linear_gradient linear_gradient_t;
-typedef struct horizontal_gradient horizontal_gradient_t;
-typedef struct vertical_gradient vertical_gradient_t;
-typedef struct conical_gradient conical_gradient_t;
-typedef struct radial_gradient radial_gradient_t;
-typedef struct bits_image bits_image_t;
-typedef struct circle circle_t;
-
-typedef struct argb_t argb_t;
-
-struct argb_t
-{
-    float a;
-    float r;
-    float g;
-    float b;
-};
-
-typedef void (*fetch_scanline_t) (bits_image_t   *image,
-				  int             x,
-				  int             y,
-				  int             width,
-				  uint32_t       *buffer,
-				  const uint32_t *mask);
-
-typedef uint32_t (*fetch_pixel_32_t) (bits_image_t *image,
-				      int           x,
-				      int           y);
-
-typedef argb_t (*fetch_pixel_float_t) (bits_image_t *image,
-				       int           x,
-				       int           y);
-
-typedef void (*store_scanline_t) (bits_image_t *  image,
-				  int             x,
-				  int             y,
-				  int             width,
-				  const uint32_t *values);
-
-typedef enum
-{
-    BITS,
-    LINEAR,
-    CONICAL,
-    RADIAL,
-    SOLID
-} image_type_t;
-
-typedef void (*property_changed_func_t) (pixman_image_t *image);
-
-struct image_common
-{
-    image_type_t                type;
-    int32_t                     ref_count;
-    pixman_region32_t           clip_region;
-    int32_t			alpha_count;	    /* How many times this image is being used as an alpha map */
-    pixman_bool_t               have_clip_region;   /* FALSE if there is no clip */
-    pixman_bool_t               client_clip;        /* Whether the source clip was
-						       set by a client */
-    pixman_bool_t               clip_sources;       /* Whether the clip applies when
-						     * the image is used as a source
-						     */
-    pixman_bool_t		dirty;
-    pixman_transform_t *        transform;
-    pixman_repeat_t             repeat;
-    pixman_filter_t             filter;
-    pixman_fixed_t *            filter_params;
-    int                         n_filter_params;
-    bits_image_t *              alpha_map;
-    int                         alpha_origin_x;
-    int                         alpha_origin_y;
-    pixman_bool_t               component_alpha;
-    property_changed_func_t     property_changed;
-
-    pixman_image_destroy_func_t destroy_func;
-    void *                      destroy_data;
-
-    uint32_t			flags;
-    pixman_format_code_t	extended_format_code;
-};
-
-struct solid_fill
-{
-    image_common_t common;
-    pixman_color_t color;
-
-    uint32_t	   color_32;
-    argb_t	   color_float;
-};
-
-struct gradient
-{
-    image_common_t	    common;
-    int                     n_stops;
-    pixman_gradient_stop_t *stops;
-};
-
-struct linear_gradient
-{
-    gradient_t           common;
-    pixman_point_fixed_t p1;
-    pixman_point_fixed_t p2;
-};
-
-struct circle
-{
-    pixman_fixed_t x;
-    pixman_fixed_t y;
-    pixman_fixed_t radius;
-};
-
-struct radial_gradient
-{
-    gradient_t common;
-
-    circle_t   c1;
-    circle_t   c2;
-
-    circle_t   delta;
-    double     a;
-    double     inva;
-    double     mindr;
-};
-
-struct conical_gradient
-{
-    gradient_t           common;
-    pixman_point_fixed_t center;
-    double		 angle;
-};
-
-struct bits_image
-{
-    image_common_t             common;
-    pixman_format_code_t       format;
-    const pixman_indexed_t *   indexed;
-    int                        width;
-    int                        height;
-    uint32_t *                 bits;
-    uint32_t *                 free_me;
-    int                        rowstride;  /* in number of uint32_t's */
-
-    pixman_dither_t            dither;
-    uint32_t                   dither_offset_y;
-    uint32_t                   dither_offset_x;
-
-    fetch_scanline_t           fetch_scanline_32;
-    fetch_pixel_32_t	       fetch_pixel_32;
-    store_scanline_t           store_scanline_32;
-
-    fetch_scanline_t	       fetch_scanline_float;
-    fetch_pixel_float_t	       fetch_pixel_float;
-    store_scanline_t           store_scanline_float;
-
-    /* Used for indirect access to the bits */
-    pixman_read_memory_func_t  read_func;
-    pixman_write_memory_func_t write_func;
-};
-
-union pixman_image
-{
-    image_type_t       type;
-    image_common_t     common;
-    bits_image_t       bits;
-    gradient_t         gradient;
-    linear_gradient_t  linear;
-    conical_gradient_t conical;
-    radial_gradient_t  radial;
-    solid_fill_t       solid;
-};
-
-typedef struct pixman_iter_t pixman_iter_t;
-typedef uint32_t *(* pixman_iter_get_scanline_t) (pixman_iter_t *iter, const uint32_t *mask);
-typedef void      (* pixman_iter_write_back_t)   (pixman_iter_t *iter);
-typedef void	  (* pixman_iter_fini_t)	 (pixman_iter_t *iter);
-
-typedef enum
-{
-    ITER_NARROW =               (1 << 0),
-    ITER_WIDE =                 (1 << 1),
-
-    /* "Localized alpha" is when the alpha channel is used only to compute
-     * the alpha value of the destination. This means that the computation
-     * of the RGB values of the result is independent of the alpha value.
-     *
-     * For example, the OVER operator has localized alpha for the
-     * destination, because the RGB values of the result can be computed
-     * without knowing the destination alpha. Similarly, ADD has localized
-     * alpha for both source and destination because the RGB values of the
-     * result can be computed without knowing the alpha value of source or
-     * destination.
-     *
-     * When he destination is xRGB, this is useful knowledge, because then
-     * we can treat it as if it were ARGB, which means in some cases we can
-     * avoid copying it to a temporary buffer.
-     */
-    ITER_LOCALIZED_ALPHA =	(1 << 2),
-    ITER_IGNORE_ALPHA =		(1 << 3),
-    ITER_IGNORE_RGB =		(1 << 4),
-
-    /* These indicate whether the iterator is for a source
-     * or a destination image
-     */
-    ITER_SRC =			(1 << 5),
-    ITER_DEST =			(1 << 6)
-} iter_flags_t;
-
-struct pixman_iter_t
-{
-    /* These are initialized by _pixman_implementation_{src,dest}_init */
-    pixman_image_t *		image;
-    uint32_t *			buffer;
-    int				x, y;
-    int				width;
-    int				height;
-    iter_flags_t		iter_flags;
-    uint32_t			image_flags;
-
-    /* These function pointers are initialized by the implementation */
-    pixman_iter_get_scanline_t	get_scanline;
-    pixman_iter_write_back_t	write_back;
-    pixman_iter_fini_t          fini;
-
-    /* These fields are scratch data that implementations can use */
-    void *			data;
-    uint8_t *			bits;
-    int				stride;
-};
-
-typedef struct pixman_iter_info_t pixman_iter_info_t;
-typedef void (* pixman_iter_initializer_t) (pixman_iter_t *iter,
-                                            const pixman_iter_info_t *info);
-struct pixman_iter_info_t
-{
-    pixman_format_code_t	format;
-    uint32_t			image_flags;
-    iter_flags_t		iter_flags;
-    pixman_iter_initializer_t	initializer;
-    pixman_iter_get_scanline_t	get_scanline;
-    pixman_iter_write_back_t	write_back;
-};
-
-void
-_pixman_bits_image_setup_accessors (bits_image_t *image);
-
-void
-_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter);
-
-void
-_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter);
-
-void
-_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t  *iter);
-
-void
-_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter);
-
-void
-_pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter);
-
-void
-_pixman_image_init (pixman_image_t *image);
-
-pixman_bool_t
-_pixman_bits_image_init (pixman_image_t *     image,
-                         pixman_format_code_t format,
-                         int                  width,
-                         int                  height,
-                         uint32_t *           bits,
-                         int                  rowstride,
-			 pixman_bool_t	      clear);
-pixman_bool_t
-_pixman_image_fini (pixman_image_t *image);
-
-pixman_image_t *
-_pixman_image_allocate (void);
-
-pixman_bool_t
-_pixman_init_gradient (gradient_t *                  gradient,
-                       const pixman_gradient_stop_t *stops,
-                       int                           n_stops);
-void
-_pixman_image_reset_clip_region (pixman_image_t *image);
-
-void
-_pixman_image_validate (pixman_image_t *image);
-
-#define PIXMAN_IMAGE_GET_LINE(image, x, y, type, out_stride, line, mul)	\
-    do									\
-    {									\
-	uint32_t *__bits__;						\
-	int       __stride__;						\
-        								\
-	__bits__ = image->bits.bits;					\
-	__stride__ = image->bits.rowstride;				\
-	(out_stride) =							\
-	    __stride__ * (int) sizeof (uint32_t) / (int) sizeof (type);	\
-	(line) =							\
-	    ((type *) __bits__) + (out_stride) * (y) + (mul) * (x);	\
-    } while (0)
-
-/*
- * Gradient walker
- */
-typedef struct
-{
-    float		    a_s, a_b;
-    float		    r_s, r_b;
-    float		    g_s, g_b;
-    float		    b_s, b_b;
-    pixman_fixed_48_16_t    left_x;
-    pixman_fixed_48_16_t    right_x;
-
-    pixman_gradient_stop_t *stops;
-    int                     num_stops;
-    pixman_repeat_t	    repeat;
-
-    pixman_bool_t           need_reset;
-} pixman_gradient_walker_t;
-
-void
-_pixman_gradient_walker_init (pixman_gradient_walker_t *walker,
-                              gradient_t *              gradient,
-			      pixman_repeat_t           repeat);
-
-void
-_pixman_gradient_walker_reset (pixman_gradient_walker_t *walker,
-                               pixman_fixed_48_16_t      pos);
-
-typedef void (*pixman_gradient_walker_write_t) (
-    pixman_gradient_walker_t *walker,
-    pixman_fixed_48_16_t      x,
-    uint32_t                 *buffer);
-
-void
-_pixman_gradient_walker_write_narrow(pixman_gradient_walker_t *walker,
-				     pixman_fixed_48_16_t      x,
-				     uint32_t                 *buffer);
-
-void
-_pixman_gradient_walker_write_wide(pixman_gradient_walker_t *walker,
-				   pixman_fixed_48_16_t      x,
-				   uint32_t                 *buffer);
-
-typedef void (*pixman_gradient_walker_fill_t) (
-    pixman_gradient_walker_t *walker,
-    pixman_fixed_48_16_t      x,
-    uint32_t                 *buffer,
-    uint32_t                 *end);
-
-void
-_pixman_gradient_walker_fill_narrow(pixman_gradient_walker_t *walker,
-				    pixman_fixed_48_16_t      x,
-				    uint32_t                 *buffer,
-				    uint32_t                 *end);
-
-void
-_pixman_gradient_walker_fill_wide(pixman_gradient_walker_t *walker,
-				  pixman_fixed_48_16_t      x,
-				  uint32_t                 *buffer,
-				  uint32_t                 *end);
-
-/*
- * Edges
- */
-
-#define MAX_ALPHA(n)    ((1 << (n)) - 1)
-#define N_Y_FRAC(n)     ((n) == 1 ? 1 : (1 << ((n) / 2)) - 1)
-#define N_X_FRAC(n)     ((n) == 1 ? 1 : (1 << ((n) / 2)) + 1)
-
-#define STEP_Y_SMALL(n) (pixman_fixed_1 / N_Y_FRAC (n))
-#define STEP_Y_BIG(n)   (pixman_fixed_1 - (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n))
-
-#define Y_FRAC_FIRST(n) (STEP_Y_BIG (n) / 2)
-#define Y_FRAC_LAST(n)  (Y_FRAC_FIRST (n) + (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n))
-
-#define STEP_X_SMALL(n) (pixman_fixed_1 / N_X_FRAC (n))
-#define STEP_X_BIG(n)   (pixman_fixed_1 - (N_X_FRAC (n) - 1) * STEP_X_SMALL (n))
-
-#define X_FRAC_FIRST(n) (STEP_X_BIG (n) / 2)
-#define X_FRAC_LAST(n)  (X_FRAC_FIRST (n) + (N_X_FRAC (n) - 1) * STEP_X_SMALL (n))
-
-#define RENDER_SAMPLES_X(x, n)						\
-    ((n) == 1? 0 : (pixman_fixed_frac (x) +				\
-		    X_FRAC_FIRST (n)) / STEP_X_SMALL (n))
-
-void
-pixman_rasterize_edges_accessors (pixman_image_t *image,
-                                  pixman_edge_t * l,
-                                  pixman_edge_t * r,
-                                  pixman_fixed_t  t,
-                                  pixman_fixed_t  b);
-
-/*
- * Implementations
- */
-typedef struct pixman_implementation_t pixman_implementation_t;
-
-typedef struct
-{
-    pixman_op_t              op;
-    pixman_image_t *         src_image;
-    pixman_image_t *         mask_image;
-    pixman_image_t *         dest_image;
-    int32_t                  src_x;
-    int32_t                  src_y;
-    int32_t                  mask_x;
-    int32_t                  mask_y;
-    int32_t                  dest_x;
-    int32_t                  dest_y;
-    int32_t                  width;
-    int32_t                  height;
-
-    uint32_t                 src_flags;
-    uint32_t                 mask_flags;
-    uint32_t                 dest_flags;
-} pixman_composite_info_t;
-
-#define PIXMAN_COMPOSITE_ARGS(info)					\
-    MAYBE_UNUSED pixman_op_t        op = info->op;			\
-    MAYBE_UNUSED pixman_image_t *   src_image = info->src_image;	\
-    MAYBE_UNUSED pixman_image_t *   mask_image = info->mask_image;	\
-    MAYBE_UNUSED pixman_image_t *   dest_image = info->dest_image;	\
-    MAYBE_UNUSED int32_t            src_x = info->src_x;		\
-    MAYBE_UNUSED int32_t            src_y = info->src_y;		\
-    MAYBE_UNUSED int32_t            mask_x = info->mask_x;		\
-    MAYBE_UNUSED int32_t            mask_y = info->mask_y;		\
-    MAYBE_UNUSED int32_t            dest_x = info->dest_x;		\
-    MAYBE_UNUSED int32_t            dest_y = info->dest_y;		\
-    MAYBE_UNUSED int32_t            width = info->width;		\
-    MAYBE_UNUSED int32_t            height = info->height
-
-typedef void (*pixman_combine_32_func_t) (pixman_implementation_t *imp,
-					  pixman_op_t              op,
-					  uint32_t *               dest,
-					  const uint32_t *         src,
-					  const uint32_t *         mask,
-					  int                      width);
-
-typedef void (*pixman_combine_float_func_t) (pixman_implementation_t *imp,
-					     pixman_op_t	      op,
-					     float *		      dest,
-					     const float *	      src,
-					     const float *	      mask,
-					     int		      n_pixels);
-
-typedef void (*pixman_composite_func_t) (pixman_implementation_t *imp,
-					 pixman_composite_info_t *info);
-typedef pixman_bool_t (*pixman_blt_func_t) (pixman_implementation_t *imp,
-					    uint32_t *               src_bits,
-					    uint32_t *               dst_bits,
-					    int                      src_stride,
-					    int                      dst_stride,
-					    int                      src_bpp,
-					    int                      dst_bpp,
-					    int                      src_x,
-					    int                      src_y,
-					    int                      dest_x,
-					    int                      dest_y,
-					    int                      width,
-					    int                      height);
-typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp,
-					     uint32_t *               bits,
-					     int                      stride,
-					     int                      bpp,
-					     int                      x,
-					     int                      y,
-					     int                      width,
-					     int                      height,
-					     uint32_t                 filler);
-
-void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp);
-void _pixman_setup_combiner_functions_float (pixman_implementation_t *imp);
-
-typedef struct
-{
-    pixman_op_t             op;
-    pixman_format_code_t    src_format;
-    uint32_t		    src_flags;
-    pixman_format_code_t    mask_format;
-    uint32_t		    mask_flags;
-    pixman_format_code_t    dest_format;
-    uint32_t		    dest_flags;
-    pixman_composite_func_t func;
-} pixman_fast_path_t;
-
-struct pixman_implementation_t
-{
-    pixman_implementation_t *	toplevel;
-    pixman_implementation_t *	fallback;
-    const pixman_fast_path_t *	fast_paths;
-    const pixman_iter_info_t *  iter_info;
-
-    pixman_blt_func_t		blt;
-    pixman_fill_func_t		fill;
-
-    pixman_combine_32_func_t	combine_32[PIXMAN_N_OPERATORS];
-    pixman_combine_32_func_t	combine_32_ca[PIXMAN_N_OPERATORS];
-    pixman_combine_float_func_t	combine_float[PIXMAN_N_OPERATORS];
-    pixman_combine_float_func_t	combine_float_ca[PIXMAN_N_OPERATORS];
-};
-
-uint32_t
-_pixman_image_get_solid (pixman_implementation_t *imp,
-			 pixman_image_t *         image,
-                         pixman_format_code_t     format);
-
-pixman_implementation_t *
-_pixman_implementation_create (pixman_implementation_t *fallback,
-			       const pixman_fast_path_t *fast_paths);
-
-void
-_pixman_implementation_lookup_composite (pixman_implementation_t  *toplevel,
-					 pixman_op_t               op,
-					 pixman_format_code_t      src_format,
-					 uint32_t                  src_flags,
-					 pixman_format_code_t      mask_format,
-					 uint32_t                  mask_flags,
-					 pixman_format_code_t      dest_format,
-					 uint32_t                  dest_flags,
-					 pixman_implementation_t **out_imp,
-					 pixman_composite_func_t  *out_func);
-
-pixman_combine_32_func_t
-_pixman_implementation_lookup_combiner (pixman_implementation_t *imp,
-					pixman_op_t		 op,
-					pixman_bool_t		 component_alpha,
-					pixman_bool_t		 wide);
-
-pixman_bool_t
-_pixman_implementation_blt (pixman_implementation_t *imp,
-                            uint32_t *               src_bits,
-                            uint32_t *               dst_bits,
-                            int                      src_stride,
-                            int                      dst_stride,
-                            int                      src_bpp,
-                            int                      dst_bpp,
-                            int                      src_x,
-                            int                      src_y,
-                            int                      dest_x,
-                            int                      dest_y,
-                            int                      width,
-                            int                      height);
-
-pixman_bool_t
-_pixman_implementation_fill (pixman_implementation_t *imp,
-                             uint32_t *               bits,
-                             int                      stride,
-                             int                      bpp,
-                             int                      x,
-                             int                      y,
-                             int                      width,
-                             int                      height,
-                             uint32_t                 filler);
-
-void
-_pixman_implementation_iter_init (pixman_implementation_t       *imp,
-                                  pixman_iter_t                 *iter,
-                                  pixman_image_t                *image,
-                                  int                            x,
-                                  int                            y,
-                                  int                            width,
-                                  int                            height,
-                                  uint8_t                       *buffer,
-                                  iter_flags_t                   flags,
-                                  uint32_t                       image_flags);
-
-/* Specific implementations */
-pixman_implementation_t *
-_pixman_implementation_create_general (void);
-
-pixman_implementation_t *
-_pixman_implementation_create_fast_path (pixman_implementation_t *fallback);
-
-pixman_implementation_t *
-_pixman_implementation_create_noop (pixman_implementation_t *fallback);
-
-#if defined USE_X86_MMX || defined USE_ARM_IWMMXT || defined USE_LOONGSON_MMI
-pixman_implementation_t *
-_pixman_implementation_create_mmx (pixman_implementation_t *fallback);
-#endif
-
-#ifdef USE_SSE2
-pixman_implementation_t *
-_pixman_implementation_create_sse2 (pixman_implementation_t *fallback);
-#endif
-
-#ifdef USE_SSSE3
-pixman_implementation_t *
-_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback);
-#endif
-
-#ifdef USE_ARM_SIMD
-pixman_implementation_t *
-_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback);
-#endif
-
-#ifdef USE_ARM_NEON
-pixman_implementation_t *
-_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback);
-#endif
-
-#ifdef USE_ARM_A64_NEON
-pixman_implementation_t *
-_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback);
-#endif
-
-#ifdef USE_MIPS_DSPR2
-pixman_implementation_t *
-_pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback);
-#endif
-
-#ifdef USE_VMX
-pixman_implementation_t *
-_pixman_implementation_create_vmx (pixman_implementation_t *fallback);
-#endif
-
-pixman_bool_t
-_pixman_implementation_disabled (const char *name);
-
-pixman_implementation_t *
-_pixman_x86_get_implementations (pixman_implementation_t *imp);
-
-pixman_implementation_t *
-_pixman_arm_get_implementations (pixman_implementation_t *imp);
-
-pixman_implementation_t *
-_pixman_ppc_get_implementations (pixman_implementation_t *imp);
-
-pixman_implementation_t *
-_pixman_mips_get_implementations (pixman_implementation_t *imp);
-
-pixman_implementation_t *
-_pixman_choose_implementation (void);
-
-pixman_bool_t
-_pixman_disabled (const char *name);
-
-
-/*
- * Utilities
- */
-pixman_bool_t
-_pixman_compute_composite_region32 (pixman_region32_t * region,
-				    pixman_image_t *    src_image,
-				    pixman_image_t *    mask_image,
-				    pixman_image_t *    dest_image,
-				    int32_t             src_x,
-				    int32_t             src_y,
-				    int32_t             mask_x,
-				    int32_t             mask_y,
-				    int32_t             dest_x,
-				    int32_t             dest_y,
-				    int32_t             width,
-				    int32_t             height);
-uint32_t *
-_pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask);
-
-void
-_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info);
-
-/* These "formats" all have depth 0, so they
- * will never clash with any real ones
- */
-#define PIXMAN_null             PIXMAN_FORMAT (0, 0, 0, 0, 0, 0)
-#define PIXMAN_solid            PIXMAN_FORMAT (0, 1, 0, 0, 0, 0)
-#define PIXMAN_pixbuf		PIXMAN_FORMAT (0, 2, 0, 0, 0, 0)
-#define PIXMAN_rpixbuf		PIXMAN_FORMAT (0, 3, 0, 0, 0, 0)
-#define PIXMAN_unknown		PIXMAN_FORMAT (0, 4, 0, 0, 0, 0)
-#define PIXMAN_any		PIXMAN_FORMAT (0, 5, 0, 0, 0, 0)
-
-#define PIXMAN_OP_any		(PIXMAN_N_OPERATORS + 1)
-
-#define FAST_PATH_ID_TRANSFORM			(1 <<  0)
-#define FAST_PATH_NO_ALPHA_MAP			(1 <<  1)
-#define FAST_PATH_NO_CONVOLUTION_FILTER		(1 <<  2)
-#define FAST_PATH_NO_PAD_REPEAT			(1 <<  3)
-#define FAST_PATH_NO_REFLECT_REPEAT		(1 <<  4)
-#define FAST_PATH_NO_ACCESSORS			(1 <<  5)
-#define FAST_PATH_NARROW_FORMAT			(1 <<  6)
-#define FAST_PATH_COMPONENT_ALPHA		(1 <<  8)
-#define FAST_PATH_SAMPLES_OPAQUE		(1 <<  7)
-#define FAST_PATH_UNIFIED_ALPHA			(1 <<  9)
-#define FAST_PATH_SCALE_TRANSFORM		(1 << 10)
-#define FAST_PATH_NEAREST_FILTER		(1 << 11)
-#define FAST_PATH_HAS_TRANSFORM			(1 << 12)
-#define FAST_PATH_IS_OPAQUE			(1 << 13)
-#define FAST_PATH_NO_NORMAL_REPEAT		(1 << 14)
-#define FAST_PATH_NO_NONE_REPEAT		(1 << 15)
-#define FAST_PATH_X_UNIT_POSITIVE		(1 << 16)
-#define FAST_PATH_AFFINE_TRANSFORM		(1 << 17)
-#define FAST_PATH_Y_UNIT_ZERO			(1 << 18)
-#define FAST_PATH_BILINEAR_FILTER		(1 << 19)
-#define FAST_PATH_ROTATE_90_TRANSFORM		(1 << 20)
-#define FAST_PATH_ROTATE_180_TRANSFORM		(1 << 21)
-#define FAST_PATH_ROTATE_270_TRANSFORM		(1 << 22)
-#define FAST_PATH_SAMPLES_COVER_CLIP_NEAREST	(1 << 23)
-#define FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR	(1 << 24)
-#define FAST_PATH_BITS_IMAGE			(1 << 25)
-#define FAST_PATH_SEPARABLE_CONVOLUTION_FILTER  (1 << 26)
-
-#define FAST_PATH_PAD_REPEAT						\
-    (FAST_PATH_NO_NONE_REPEAT		|				\
-     FAST_PATH_NO_NORMAL_REPEAT		|				\
-     FAST_PATH_NO_REFLECT_REPEAT)
-
-#define FAST_PATH_NORMAL_REPEAT						\
-    (FAST_PATH_NO_NONE_REPEAT		|				\
-     FAST_PATH_NO_PAD_REPEAT		|				\
-     FAST_PATH_NO_REFLECT_REPEAT)
-
-#define FAST_PATH_NONE_REPEAT						\
-    (FAST_PATH_NO_NORMAL_REPEAT		|				\
-     FAST_PATH_NO_PAD_REPEAT		|				\
-     FAST_PATH_NO_REFLECT_REPEAT)
-
-#define FAST_PATH_REFLECT_REPEAT					\
-    (FAST_PATH_NO_NONE_REPEAT		|				\
-     FAST_PATH_NO_NORMAL_REPEAT		|				\
-     FAST_PATH_NO_PAD_REPEAT)
-
-#define FAST_PATH_STANDARD_FLAGS					\
-    (FAST_PATH_NO_CONVOLUTION_FILTER	|				\
-     FAST_PATH_NO_ACCESSORS		|				\
-     FAST_PATH_NO_ALPHA_MAP		|				\
-     FAST_PATH_NARROW_FORMAT)
-
-#define FAST_PATH_STD_DEST_FLAGS					\
-    (FAST_PATH_NO_ACCESSORS		|				\
-     FAST_PATH_NO_ALPHA_MAP		|				\
-     FAST_PATH_NARROW_FORMAT)
-
-#define SOURCE_FLAGS(format)						\
-    (FAST_PATH_STANDARD_FLAGS |						\
-     ((PIXMAN_ ## format == PIXMAN_solid) ?				\
-      0 : (FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | FAST_PATH_NEAREST_FILTER | FAST_PATH_ID_TRANSFORM)))
-
-#define MASK_FLAGS(format, extra)					\
-    ((PIXMAN_ ## format == PIXMAN_null) ? 0 : (SOURCE_FLAGS (format) | extra))
-
-#define FAST_PATH(op, src, src_flags, mask, mask_flags, dest, dest_flags, func) \
-    PIXMAN_OP_ ## op,							\
-    PIXMAN_ ## src,							\
-    src_flags,							        \
-    PIXMAN_ ## mask,						        \
-    mask_flags,							        \
-    PIXMAN_ ## dest,	                                                \
-    dest_flags,							        \
-    func
-
-#define PIXMAN_STD_FAST_PATH(op, src, mask, dest, func)			\
-    { FAST_PATH (							\
-	    op,								\
-	    src,  SOURCE_FLAGS (src),					\
-	    mask, MASK_FLAGS (mask, FAST_PATH_UNIFIED_ALPHA),		\
-	    dest, FAST_PATH_STD_DEST_FLAGS,				\
-	    func) }
-
-#define PIXMAN_STD_FAST_PATH_CA(op, src, mask, dest, func)		\
-    { FAST_PATH (							\
-	    op,								\
-	    src,  SOURCE_FLAGS (src),					\
-	    mask, MASK_FLAGS (mask, FAST_PATH_COMPONENT_ALPHA),		\
-	    dest, FAST_PATH_STD_DEST_FLAGS,				\
-	    func) }
-
-extern pixman_implementation_t *global_implementation;
-
-static force_inline pixman_implementation_t *
-get_implementation (void)
-{
-#ifndef TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR
-    if (!global_implementation)
-	global_implementation = _pixman_choose_implementation ();
-#endif
-    return global_implementation;
-}
-
-/* This function is exported for the sake of the test suite and not part
- * of the ABI.
- */
-PIXMAN_EXPORT pixman_implementation_t *
-_pixman_internal_only_get_implementation (void);
-
-/* Memory allocation helpers */
-void *
-pixman_malloc_ab (unsigned int n, unsigned int b);
-
-void *
-pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c);
-
-void *
-pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c);
-
-pixman_bool_t
-_pixman_multiply_overflows_size (size_t a, size_t b);
-
-pixman_bool_t
-_pixman_multiply_overflows_int (unsigned int a, unsigned int b);
-
-pixman_bool_t
-_pixman_addition_overflows_int (unsigned int a, unsigned int b);
-
-/* Compositing utilities */
-void
-pixman_expand_to_float (argb_t               *dst,
-			const uint32_t       *src,
-			pixman_format_code_t  format,
-			int                   width);
-
-void
-pixman_contract_from_float (uint32_t     *dst,
-			    const argb_t *src,
-			    int           width);
-
-/* Region Helpers */
-pixman_bool_t
-pixman_region32_copy_from_region16 (pixman_region32_t *dst,
-                                    pixman_region16_t *src);
-
-pixman_bool_t
-pixman_region16_copy_from_region32 (pixman_region16_t *dst,
-                                    pixman_region32_t *src);
-
-/* Doubly linked lists */
-typedef struct pixman_link_t pixman_link_t;
-struct pixman_link_t
-{
-    pixman_link_t *next;
-    pixman_link_t *prev;
-};
-
-typedef struct pixman_list_t pixman_list_t;
-struct pixman_list_t
-{
-    pixman_link_t *head;
-    pixman_link_t *tail;
-};
-
-static force_inline void
-pixman_list_init (pixman_list_t *list)
-{
-    list->head = (pixman_link_t *)list;
-    list->tail = (pixman_link_t *)list;
-}
-
-static force_inline void
-pixman_list_prepend (pixman_list_t *list, pixman_link_t *link)
-{
-    link->next = list->head;
-    link->prev = (pixman_link_t *)list;
-    list->head->prev = link;
-    list->head = link;
-}
-
-static force_inline void
-pixman_list_unlink (pixman_link_t *link)
-{
-    link->prev->next = link->next;
-    link->next->prev = link->prev;
-}
-
-static force_inline void
-pixman_list_move_to_front (pixman_list_t *list, pixman_link_t *link)
-{
-    pixman_list_unlink (link);
-    pixman_list_prepend (list, link);
-}
-
-/* Misc macros */
-
-#ifndef FALSE
-#   define FALSE 0
-#endif
-
-#ifndef TRUE
-#   define TRUE 1
-#endif
-
-#ifndef MIN
-#  define MIN(a, b) ((a < b) ? a : b)
-#endif
-
-#ifndef MAX
-#  define MAX(a, b) ((a > b) ? a : b)
-#endif
-
-/* Integer division that rounds towards -infinity */
-#define DIV(a, b)					   \
-    ((((a) < 0) == ((b) < 0)) ? (a) / (b) :                \
-     ((a) - (b) + 1 - (((b) < 0) << 1)) / (b))
-
-/* Modulus that produces the remainder wrt. DIV */
-#define MOD(a, b) ((a) < 0 ? ((b) - ((-(a) - 1) % (b))) - 1 : (a) % (b))
-
-#define CLIP(v, low, high) ((v) < (low) ? (low) : ((v) > (high) ? (high) : (v)))
-
-#define FLOAT_IS_ZERO(f)     (-FLT_MIN < (f) && (f) < FLT_MIN)
-
-/* Conversion between 8888 and 0565 */
-
-static force_inline uint16_t
-convert_8888_to_0565 (uint32_t s)
-{
-    /* The following code can be compiled into just 4 instructions on ARM */
-    uint32_t a, b;
-    a = (s >> 3) & 0x1F001F;
-    b = s & 0xFC00;
-    a |= a >> 5;
-    a |= b >> 5;
-    return (uint16_t)a;
-}
-
-static force_inline uint32_t
-convert_0565_to_0888 (uint16_t s)
-{
-    return (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) |
-            ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) |
-            ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000)));
-}
-
-static force_inline uint32_t
-convert_0565_to_8888 (uint16_t s)
-{
-    return convert_0565_to_0888 (s) | 0xff000000;
-}
-
-/* Trivial versions that are useful in macros */
-
-static force_inline uint32_t
-convert_8888_to_8888 (uint32_t s)
-{
-    return s;
-}
-
-static force_inline uint32_t
-convert_x888_to_8888 (uint32_t s)
-{
-    return s | 0xff000000;
-}
-
-static force_inline uint16_t
-convert_0565_to_0565 (uint16_t s)
-{
-    return s;
-}
-
-#define PIXMAN_FORMAT_IS_WIDE(f)					\
-    (PIXMAN_FORMAT_A (f) > 8 ||						\
-     PIXMAN_FORMAT_R (f) > 8 ||						\
-     PIXMAN_FORMAT_G (f) > 8 ||						\
-     PIXMAN_FORMAT_B (f) > 8 ||						\
-     PIXMAN_FORMAT_TYPE (f) == PIXMAN_TYPE_ARGB_SRGB)
-
-#ifdef WORDS_BIGENDIAN
-#   define SCREEN_SHIFT_LEFT(x,n)	((x) << (n))
-#   define SCREEN_SHIFT_RIGHT(x,n)	((x) >> (n))
-#else
-#   define SCREEN_SHIFT_LEFT(x,n)	((x) >> (n))
-#   define SCREEN_SHIFT_RIGHT(x,n)	((x) << (n))
-#endif
-
-static force_inline uint32_t
-unorm_to_unorm (uint32_t val, int from_bits, int to_bits)
-{
-    uint32_t result;
-
-    if (from_bits == 0)
-	return 0;
-
-    /* Delete any extra bits */
-    val &= ((1 << from_bits) - 1);
-
-    if (from_bits >= to_bits)
-	return val >> (from_bits - to_bits);
-
-    /* Start out with the high bit of val in the high bit of result. */
-    result = val << (to_bits - from_bits);
-
-    /* Copy the bits in result, doubling the number of bits each time, until
-     * we fill all to_bits. Unrolled manually because from_bits and to_bits
-     * are usually known statically, so the compiler can turn all of this
-     * into a few shifts.
-     */
-#define REPLICATE()							\
-    do									\
-    {									\
-	if (from_bits < to_bits)					\
-	{								\
-	    result |= result >> from_bits;				\
-									\
-	    from_bits *= 2;						\
-	}								\
-    }									\
-    while (0)
-
-    REPLICATE();
-    REPLICATE();
-    REPLICATE();
-    REPLICATE();
-    REPLICATE();
-
-    return result;
-}
-
-uint16_t pixman_float_to_unorm (float f, int n_bits);
-float pixman_unorm_to_float (uint16_t u, int n_bits);
-
-/*
- * Various debugging code
- */
-
-#undef DEBUG
-
-#define COMPILE_TIME_ASSERT(x)						\
-    do { typedef int compile_time_assertion [(x)?1:-1]; } while (0)
-
-/* Turn on debugging depending on what type of release this is
- */
-#if (((PIXMAN_VERSION_MICRO % 2) == 0) && ((PIXMAN_VERSION_MINOR % 2) == 1))
-
-/* Debugging gets turned on for development releases because these
- * are the things that end up in bleeding edge distributions such
- * as Rawhide etc.
- *
- * For performance reasons we don't turn it on for stable releases or
- * random git checkouts. (Random git checkouts are often used for
- * performance work).
- */
-
-#    define DEBUG
-
-#endif
-
-void
-_pixman_log_error (const char *function, const char *message);
-
-#define return_if_fail(expr)                                            \
-    do                                                                  \
-    {                                                                   \
-	if (unlikely (!(expr)))                                         \
-	{								\
-	    _pixman_log_error (FUNC, "The expression " # expr " was false"); \
-	    return;							\
-	}								\
-    }                                                                   \
-    while (0)
-
-#define return_val_if_fail(expr, retval)                                \
-    do                                                                  \
-    {                                                                   \
-	if (unlikely (!(expr)))                                         \
-	{								\
-	    _pixman_log_error (FUNC, "The expression " # expr " was false"); \
-	    return (retval);						\
-	}								\
-    }                                                                   \
-    while (0)
-
-#define critical_if_fail(expr)						\
-    do									\
-    {									\
-	if (unlikely (!(expr)))                                         \
-	    _pixman_log_error (FUNC, "The expression " # expr " was false"); \
-    }									\
-    while (0)
-
-/*
- * Matrix
- */
-
-typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t;
-
-PIXMAN_EXPORT
-pixman_bool_t
-pixman_transform_point_31_16 (const pixman_transform_t    *t,
-                              const pixman_vector_48_16_t *v,
-                              pixman_vector_48_16_t       *result);
-
-PIXMAN_EXPORT
-void
-pixman_transform_point_31_16_3d (const pixman_transform_t    *t,
-                                 const pixman_vector_48_16_t *v,
-                                 pixman_vector_48_16_t       *result);
-
-PIXMAN_EXPORT
-void
-pixman_transform_point_31_16_affine (const pixman_transform_t    *t,
-                                     const pixman_vector_48_16_t *v,
-                                     pixman_vector_48_16_t       *result);
-
-/*
- * Timers
- */
-
-#ifdef PIXMAN_TIMERS
-
-static inline uint64_t
-oil_profile_stamp_rdtsc (void)
-{
-    uint32_t hi, lo;
-
-    __asm__ __volatile__ ("rdtsc\n" : "=a" (lo), "=d" (hi));
-
-    return lo | (((uint64_t)hi) << 32);
-}
-
-#define OIL_STAMP oil_profile_stamp_rdtsc
-
-typedef struct pixman_timer_t pixman_timer_t;
-
-struct pixman_timer_t
-{
-    int             initialized;
-    const char *    name;
-    uint64_t        n_times;
-    uint64_t        total;
-    pixman_timer_t *next;
-};
-
-extern int timer_defined;
-
-void pixman_timer_register (pixman_timer_t *timer);
-
-#define TIMER_BEGIN(tname)                                              \
-    {                                                                   \
-	static pixman_timer_t timer ## tname;                           \
-	uint64_t              begin ## tname;                           \
-        								\
-	if (!timer ## tname.initialized)				\
-	{                                                               \
-	    timer ## tname.initialized = 1;				\
-	    timer ## tname.name = # tname;				\
-	    pixman_timer_register (&timer ## tname);			\
-	}                                                               \
-									\
-	timer ## tname.n_times++;					\
-	begin ## tname = OIL_STAMP ();
-
-#define TIMER_END(tname)                                                \
-    timer ## tname.total += OIL_STAMP () - begin ## tname;		\
-    }
-
-#else
-
-#define TIMER_BEGIN(tname)
-#define TIMER_END(tname)
-
-#endif /* PIXMAN_TIMERS */
-
-#endif /* __ASSEMBLER__ */
-
-#endif /* PIXMAN_PRIVATE_H */
diff --git a/vendor/pixman/pixman/pixman-radial-gradient.c b/vendor/pixman/pixman/pixman-radial-gradient.c
deleted file mode 100644
index 38e1052f3..000000000
--- a/vendor/pixman/pixman/pixman-radial-gradient.c
+++ /dev/null
@@ -1,509 +0,0 @@
-/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
-/*
- *
- * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
- * Copyright © 2000 SuSE, Inc.
- *             2005 Lars Knoll & Zack Rusin, Trolltech
- * Copyright © 2007 Red Hat, Inc.
- *
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Keith Packard makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-#include <stdlib.h>
-#include <math.h>
-#include "pixman-private.h"
-
-static inline pixman_fixed_32_32_t
-dot (pixman_fixed_48_16_t x1,
-     pixman_fixed_48_16_t y1,
-     pixman_fixed_48_16_t z1,
-     pixman_fixed_48_16_t x2,
-     pixman_fixed_48_16_t y2,
-     pixman_fixed_48_16_t z2)
-{
-    /*
-     * Exact computation, assuming that the input values can
-     * be represented as pixman_fixed_16_16_t
-     */
-    return x1 * x2 + y1 * y2 + z1 * z2;
-}
-
-static inline double
-fdot (double x1,
-      double y1,
-      double z1,
-      double x2,
-      double y2,
-      double z2)
-{
-    /*
-     * Error can be unbound in some special cases.
-     * Using clever dot product algorithms (for example compensated
-     * dot product) would improve this but make the code much less
-     * obvious
-     */
-    return x1 * x2 + y1 * y2 + z1 * z2;
-}
-
-static void
-radial_write_color (double                         a,
-		    double                         b,
-		    double                         c,
-		    double                         inva,
-		    double                         dr,
-		    double                         mindr,
-		    pixman_gradient_walker_t      *walker,
-		    pixman_repeat_t                repeat,
-		    int                            Bpp,
-		    pixman_gradient_walker_write_t write_pixel,
-		    uint32_t                      *buffer)
-{
-    /*
-     * In this function error propagation can lead to bad results:
-     *  - discr can have an unbound error (if b*b-a*c is very small),
-     *    potentially making it the opposite sign of what it should have been
-     *    (thus clearing a pixel that would have been colored or vice-versa)
-     *    or propagating the error to sqrtdiscr;
-     *    if discr has the wrong sign or b is very small, this can lead to bad
-     *    results
-     *
-     *  - the algorithm used to compute the solutions of the quadratic
-     *    equation is not numerically stable (but saves one division compared
-     *    to the numerically stable one);
-     *    this can be a problem if a*c is much smaller than b*b
-     *
-     *  - the above problems are worse if a is small (as inva becomes bigger)
-     */
-    double discr;
-
-    if (a == 0)
-    {
-	double t;
-
-	if (b == 0)
-	{
-	    memset (buffer, 0, Bpp);
-	    return;
-	}
-
-	t = pixman_fixed_1 / 2 * c / b;
-	if (repeat == PIXMAN_REPEAT_NONE)
-	{
-	    if (0 <= t && t <= pixman_fixed_1)
-	    {
-		write_pixel (walker, t, buffer);
-		return;
-	    }
-	}
-	else
-	{
-	    if (t * dr >= mindr)
-	    {
-		write_pixel (walker, t, buffer);
-		return;
-	    }
-	}
-
-	memset (buffer, 0, Bpp);
-	return;
-    }
-
-    discr = fdot (b, a, 0, b, -c, 0);
-    if (discr >= 0)
-    {
-	double sqrtdiscr, t0, t1;
-
-	sqrtdiscr = sqrt (discr);
-	t0 = (b + sqrtdiscr) * inva;
-	t1 = (b - sqrtdiscr) * inva;
-
-	/*
-	 * The root that must be used is the biggest one that belongs
-	 * to the valid range ([0,1] for PIXMAN_REPEAT_NONE, any
-	 * solution that results in a positive radius otherwise).
-	 *
-	 * If a > 0, t0 is the biggest solution, so if it is valid, it
-	 * is the correct result.
-	 *
-	 * If a < 0, only one of the solutions can be valid, so the
-	 * order in which they are tested is not important.
-	 */
-	if (repeat == PIXMAN_REPEAT_NONE)
-	{
-	    if (0 <= t0 && t0 <= pixman_fixed_1)
-	    {
-		write_pixel (walker, t0, buffer);
-		return;
-	    }
-	    else if (0 <= t1 && t1 <= pixman_fixed_1)
-	    {
-		write_pixel (walker, t1, buffer);
-		return;
-           }
-	}
-	else
-	{
-	    if (t0 * dr >= mindr)
-	    {
-		write_pixel (walker, t0, buffer);
-		return;
-	    }
-	    else if (t1 * dr >= mindr)
-	    {
-		write_pixel (walker, t1, buffer);
-		return;
-	    }
-	}
-    }
-
-    memset (buffer, 0, Bpp);
-    return;
-}
-
-static uint32_t *
-radial_get_scanline (pixman_iter_t                 *iter,
-		     const uint32_t                *mask,
-		     int                            Bpp,
-		     pixman_gradient_walker_write_t write_pixel)
-{
-    /*
-     * Implementation of radial gradients following the PDF specification.
-     * See section 8.7.4.5.4 Type 3 (Radial) Shadings of the PDF Reference
-     * Manual (PDF 32000-1:2008 at the time of this writing).
-     *
-     * In the radial gradient problem we are given two circles (c₁,r₁) and
-     * (c₂,r₂) that define the gradient itself.
-     *
-     * Mathematically the gradient can be defined as the family of circles
-     *
-     *     ((1-t)·c₁ + t·(c₂), (1-t)·r₁ + t·r₂)
-     *
-     * excluding those circles whose radius would be < 0. When a point
-     * belongs to more than one circle, the one with a bigger t is the only
-     * one that contributes to its color. When a point does not belong
-     * to any of the circles, it is transparent black, i.e. RGBA (0, 0, 0, 0).
-     * Further limitations on the range of values for t are imposed when
-     * the gradient is not repeated, namely t must belong to [0,1].
-     *
-     * The graphical result is the same as drawing the valid (radius > 0)
-     * circles with increasing t in [-inf, +inf] (or in [0,1] if the gradient
-     * is not repeated) using SOURCE operator composition.
-     *
-     * It looks like a cone pointing towards the viewer if the ending circle
-     * is smaller than the starting one, a cone pointing inside the page if
-     * the starting circle is the smaller one and like a cylinder if they
-     * have the same radius.
-     *
-     * What we actually do is, given the point whose color we are interested
-     * in, compute the t values for that point, solving for t in:
-     *
-     *     length((1-t)·c₁ + t·(c₂) - p) = (1-t)·r₁ + t·r₂
-     *
-     * Let's rewrite it in a simpler way, by defining some auxiliary
-     * variables:
-     *
-     *     cd = c₂ - c₁
-     *     pd = p - c₁
-     *     dr = r₂ - r₁
-     *     length(t·cd - pd) = r₁ + t·dr
-     *
-     * which actually means
-     *
-     *     hypot(t·cdx - pdx, t·cdy - pdy) = r₁ + t·dr
-     *
-     * or
-     *
-     *     ⎷((t·cdx - pdx)² + (t·cdy - pdy)²) = r₁ + t·dr.
-     *
-     * If we impose (as stated earlier) that r₁ + t·dr >= 0, it becomes:
-     *
-     *     (t·cdx - pdx)² + (t·cdy - pdy)² = (r₁ + t·dr)²
-     *
-     * where we can actually expand the squares and solve for t:
-     *
-     *     t²cdx² - 2t·cdx·pdx + pdx² + t²cdy² - 2t·cdy·pdy + pdy² =
-     *       = r₁² + 2·r₁·t·dr + t²·dr²
-     *
-     *     (cdx² + cdy² - dr²)t² - 2(cdx·pdx + cdy·pdy + r₁·dr)t +
-     *         (pdx² + pdy² - r₁²) = 0
-     *
-     *     A = cdx² + cdy² - dr²
-     *     B = pdx·cdx + pdy·cdy + r₁·dr
-     *     C = pdx² + pdy² - r₁²
-     *     At² - 2Bt + C = 0
-     *
-     * The solutions (unless the equation degenerates because of A = 0) are:
-     *
-     *     t = (B ± ⎷(B² - A·C)) / A
-     *
-     * The solution we are going to prefer is the bigger one, unless the
-     * radius associated to it is negative (or it falls outside the valid t
-     * range).
-     *
-     * Additional observations (useful for optimizations):
-     * A does not depend on p
-     *
-     * A < 0 <=> one of the two circles completely contains the other one
-     *   <=> for every p, the radiuses associated with the two t solutions
-     *       have opposite sign
-     */
-    pixman_image_t *image = iter->image;
-    int x = iter->x;
-    int y = iter->y;
-    int width = iter->width;
-    uint32_t *buffer = iter->buffer;
-
-    gradient_t *gradient = (gradient_t *)image;
-    radial_gradient_t *radial = (radial_gradient_t *)image;
-    uint32_t *end = buffer + width * (Bpp / 4);
-    pixman_gradient_walker_t walker;
-    pixman_vector_t v, unit;
-
-    /* reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
-    v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
-    v.vector[2] = pixman_fixed_1;
-
-    _pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
-
-    if (image->common.transform)
-    {
-	if (!pixman_transform_point_3d (image->common.transform, &v))
-	    return iter->buffer;
-
-	unit.vector[0] = image->common.transform->matrix[0][0];
-	unit.vector[1] = image->common.transform->matrix[1][0];
-	unit.vector[2] = image->common.transform->matrix[2][0];
-    }
-    else
-    {
-	unit.vector[0] = pixman_fixed_1;
-	unit.vector[1] = 0;
-	unit.vector[2] = 0;
-    }
-
-    if (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)
-    {
-	/*
-	 * Given:
-	 *
-	 * t = (B ± ⎷(B² - A·C)) / A
-	 *
-	 * where
-	 *
-	 * A = cdx² + cdy² - dr²
-	 * B = pdx·cdx + pdy·cdy + r₁·dr
-	 * C = pdx² + pdy² - r₁²
-	 * det = B² - A·C
-	 *
-	 * Since we have an affine transformation, we know that (pdx, pdy)
-	 * increase linearly with each pixel,
-	 *
-	 * pdx = pdx₀ + n·ux,
-	 * pdy = pdy₀ + n·uy,
-	 *
-	 * we can then express B, C and det through multiple differentiation.
-	 */
-	pixman_fixed_32_32_t b, db, c, dc, ddc;
-
-	/* warning: this computation may overflow */
-	v.vector[0] -= radial->c1.x;
-	v.vector[1] -= radial->c1.y;
-
-	/*
-	 * B and C are computed and updated exactly.
-	 * If fdot was used instead of dot, in the worst case it would
-	 * lose 11 bits of precision in each of the multiplication and
-	 * summing up would zero out all the bit that were preserved,
-	 * thus making the result 0 instead of the correct one.
-	 * This would mean a worst case of unbound relative error or
-	 * about 2^10 absolute error
-	 */
-	b = dot (v.vector[0], v.vector[1], radial->c1.radius,
-		 radial->delta.x, radial->delta.y, radial->delta.radius);
-	db = dot (unit.vector[0], unit.vector[1], 0,
-		  radial->delta.x, radial->delta.y, 0);
-
-	c = dot (v.vector[0], v.vector[1],
-		 -((pixman_fixed_48_16_t) radial->c1.radius),
-		 v.vector[0], v.vector[1], radial->c1.radius);
-	dc = dot (2 * (pixman_fixed_48_16_t) v.vector[0] + unit.vector[0],
-		  2 * (pixman_fixed_48_16_t) v.vector[1] + unit.vector[1],
-		  0,
-		  unit.vector[0], unit.vector[1], 0);
-	ddc = 2 * dot (unit.vector[0], unit.vector[1], 0,
-		       unit.vector[0], unit.vector[1], 0);
-
-	while (buffer < end)
-	{
-	    if (!mask || *mask++)
-	    {
-		radial_write_color (radial->a, b, c,
-				    radial->inva,
-				    radial->delta.radius,
-				    radial->mindr,
-				    &walker,
-				    image->common.repeat,
-				    Bpp,
-				    write_pixel,
-				    buffer);
-	    }
-
-	    b += db;
-	    c += dc;
-	    dc += ddc;
-	    buffer += (Bpp / 4);
-	}
-    }
-    else
-    {
-	/* projective */
-	/* Warning:
-	 * error propagation guarantees are much looser than in the affine case
-	 */
-	while (buffer < end)
-	{
-	    if (!mask || *mask++)
-	    {
-		if (v.vector[2] != 0)
-		{
-		    double pdx, pdy, invv2, b, c;
-
-		    invv2 = 1. * pixman_fixed_1 / v.vector[2];
-
-		    pdx = v.vector[0] * invv2 - radial->c1.x;
-		    /*    / pixman_fixed_1 */
-
-		    pdy = v.vector[1] * invv2 - radial->c1.y;
-		    /*    / pixman_fixed_1 */
-
-		    b = fdot (pdx, pdy, radial->c1.radius,
-			      radial->delta.x, radial->delta.y,
-			      radial->delta.radius);
-		    /*  / pixman_fixed_1 / pixman_fixed_1 */
-
-		    c = fdot (pdx, pdy, -radial->c1.radius,
-			      pdx, pdy, radial->c1.radius);
-		    /*  / pixman_fixed_1 / pixman_fixed_1 */
-
-		    radial_write_color (radial->a, b, c,
-					radial->inva,
-					radial->delta.radius,
-					radial->mindr,
-					&walker,
-					image->common.repeat,
-					Bpp,
-					write_pixel,
-					buffer);
-		}
-		else
-		{
-		    memset (buffer, 0, Bpp);
-		}
-	    }
-
-	    buffer += (Bpp / 4);
-
-	    v.vector[0] += unit.vector[0];
-	    v.vector[1] += unit.vector[1];
-	    v.vector[2] += unit.vector[2];
-	}
-    }
-
-    iter->y++;
-    return iter->buffer;
-}
-
-static uint32_t *
-radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
-{
-    return radial_get_scanline (iter, mask, 4,
-				_pixman_gradient_walker_write_narrow);
-}
-
-static uint32_t *
-radial_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
-{
-    return radial_get_scanline (iter, NULL, 16,
-				_pixman_gradient_walker_write_wide);
-}
-
-void
-_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
-{
-    if (iter->iter_flags & ITER_NARROW)
-	iter->get_scanline = radial_get_scanline_narrow;
-    else
-	iter->get_scanline = radial_get_scanline_wide;
-}
-
-PIXMAN_EXPORT pixman_image_t *
-pixman_image_create_radial_gradient (const pixman_point_fixed_t *  inner,
-				     const pixman_point_fixed_t *  outer,
-				     pixman_fixed_t                inner_radius,
-				     pixman_fixed_t                outer_radius,
-				     const pixman_gradient_stop_t *stops,
-				     int                           n_stops)
-{
-    pixman_image_t *image;
-    radial_gradient_t *radial;
-
-    image = _pixman_image_allocate ();
-
-    if (!image)
-	return NULL;
-
-    radial = &image->radial;
-
-    if (!_pixman_init_gradient (&radial->common, stops, n_stops))
-    {
-	free (image);
-	return NULL;
-    }
-
-    image->type = RADIAL;
-
-    radial->c1.x = inner->x;
-    radial->c1.y = inner->y;
-    radial->c1.radius = inner_radius;
-    radial->c2.x = outer->x;
-    radial->c2.y = outer->y;
-    radial->c2.radius = outer_radius;
-
-    /* warning: this computations may overflow */
-    radial->delta.x = radial->c2.x - radial->c1.x;
-    radial->delta.y = radial->c2.y - radial->c1.y;
-    radial->delta.radius = radial->c2.radius - radial->c1.radius;
-
-    /* computed exactly, then cast to double -> every bit of the double
-       representation is correct (53 bits) */
-    radial->a = dot (radial->delta.x, radial->delta.y, -radial->delta.radius,
-		     radial->delta.x, radial->delta.y, radial->delta.radius);
-    if (radial->a != 0)
-	radial->inva = 1. * pixman_fixed_1 / radial->a;
-
-    radial->mindr = -1. * pixman_fixed_1 * radial->c1.radius;
-
-    return image;
-}
diff --git a/vendor/pixman/pixman/pixman-region.c b/vendor/pixman/pixman/pixman-region.c
deleted file mode 100644
index 537d5fbe4..000000000
--- a/vendor/pixman/pixman/pixman-region.c
+++ /dev/null
@@ -1,2800 +0,0 @@
-/*
- * Copyright 1987, 1988, 1989, 1998  The Open Group
- * 
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation.
- * 
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- * Except as contained in this notice, the name of The Open Group shall not be
- * used in advertising or otherwise to promote the sale, use or other dealings
- * in this Software without prior written authorization from The Open Group.
- * 
- * Copyright 1987, 1988, 1989 by
- * Digital Equipment Corporation, Maynard, Massachusetts.
- * 
- *                    All Rights Reserved
- * 
- * Permission to use, copy, modify, and distribute this software and its
- * documentation for any purpose and without fee is hereby granted,
- * provided that the above copyright notice appear in all copies and that
- * both that copyright notice and this permission notice appear in
- * supporting documentation, and that the name of Digital not be
- * used in advertising or publicity pertaining to distribution of the
- * software without specific, written prior permission.
- * 
- * DIGITAL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
- * ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
- * DIGITAL BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
- * ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
- * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
- * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Copyright © 1998 Keith Packard
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Keith Packard makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
- * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
- * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <stdlib.h>
-#include <limits.h>
-#include <string.h>
-#include <stdio.h>
-#include "pixman-private.h"
-
-#define PIXREGION_NIL(reg) ((reg)->data && !(reg)->data->numRects)
-/* not a region */
-#define PIXREGION_NAR(reg)      ((reg)->data == pixman_broken_data)
-#define PIXREGION_NUMRECTS(reg) ((reg)->data ? (reg)->data->numRects : 1)
-#define PIXREGION_SIZE(reg) ((reg)->data ? (reg)->data->size : 0)
-#define PIXREGION_RECTS(reg) \
-    ((reg)->data ? (box_type_t *)((reg)->data + 1) \
-     : (box_type_t *)&(reg)->extents)
-#define PIXREGION_BOXPTR(reg) ((box_type_t *)((reg)->data + 1))
-#define PIXREGION_BOX(reg, i) (&PIXREGION_BOXPTR (reg)[i])
-#define PIXREGION_TOP(reg) PIXREGION_BOX (reg, (reg)->data->numRects)
-#define PIXREGION_END(reg) PIXREGION_BOX (reg, (reg)->data->numRects - 1)
-
-#define GOOD_RECT(rect) ((rect)->x1 < (rect)->x2 && (rect)->y1 < (rect)->y2)
-#define BAD_RECT(rect) ((rect)->x1 > (rect)->x2 || (rect)->y1 > (rect)->y2)
-
-#ifdef DEBUG
-
-#define GOOD(reg)							\
-    do									\
-    {									\
-	if (!PREFIX (_selfcheck (reg)))					\
-	    _pixman_log_error (FUNC, "Malformed region " # reg);	\
-    } while (0)
-
-#else
-
-#define GOOD(reg)
-
-#endif
-
-static const box_type_t PREFIX (_empty_box_) = { 0, 0, 0, 0 };
-static const region_data_type_t PREFIX (_empty_data_) = { 0, 0 };
-#if defined (__llvm__) && !defined (__clang__)
-static const volatile region_data_type_t PREFIX (_broken_data_) = { 0, 0 };
-#else
-static const region_data_type_t PREFIX (_broken_data_) = { 0, 0 };
-#endif
-
-static box_type_t *pixman_region_empty_box =
-    (box_type_t *)&PREFIX (_empty_box_);
-static region_data_type_t *pixman_region_empty_data =
-    (region_data_type_t *)&PREFIX (_empty_data_);
-static region_data_type_t *pixman_broken_data =
-    (region_data_type_t *)&PREFIX (_broken_data_);
-
-static pixman_bool_t
-pixman_break (region_type_t *region);
-
-/*
- * The functions in this file implement the Region abstraction used extensively
- * throughout the X11 sample server. A Region is simply a set of disjoint
- * (non-overlapping) rectangles, plus an "extent" rectangle which is the
- * smallest single rectangle that contains all the non-overlapping rectangles.
- *
- * A Region is implemented as a "y-x-banded" array of rectangles.  This array
- * imposes two degrees of order.  First, all rectangles are sorted by top side
- * y coordinate first (y1), and then by left side x coordinate (x1).
- *
- * Furthermore, the rectangles are grouped into "bands".  Each rectangle in a
- * band has the same top y coordinate (y1), and each has the same bottom y
- * coordinate (y2).  Thus all rectangles in a band differ only in their left
- * and right side (x1 and x2).  Bands are implicit in the array of rectangles:
- * there is no separate list of band start pointers.
- *
- * The y-x band representation does not minimize rectangles.  In particular,
- * if a rectangle vertically crosses a band (the rectangle has scanlines in
- * the y1 to y2 area spanned by the band), then the rectangle may be broken
- * down into two or more smaller rectangles stacked one atop the other.
- *
- *  -----------				    -----------
- *  |         |				    |         |		    band 0
- *  |         |  --------		    -----------  --------
- *  |         |  |      |  in y-x banded    |         |  |      |   band 1
- *  |         |  |      |  form is	    |         |  |      |
- *  -----------  |      |		    -----------  --------
- *               |      |				 |      |   band 2
- *               --------				 --------
- *
- * An added constraint on the rectangles is that they must cover as much
- * horizontal area as possible: no two rectangles within a band are allowed
- * to touch.
- *
- * Whenever possible, bands will be merged together to cover a greater vertical
- * distance (and thus reduce the number of rectangles). Two bands can be merged
- * only if the bottom of one touches the top of the other and they have
- * rectangles in the same places (of the same width, of course).
- *
- * Adam de Boor wrote most of the original region code.  Joel McCormack
- * substantially modified or rewrote most of the core arithmetic routines, and
- * added pixman_region_validate in order to support several speed improvements
- * to pixman_region_validate_tree.  Bob Scheifler changed the representation
- * to be more compact when empty or a single rectangle, and did a bunch of
- * gratuitous reformatting. Carl Worth did further gratuitous reformatting
- * while re-merging the server and client region code into libpixregion.
- * Soren Sandmann did even more gratuitous reformatting.
- */
-
-/*  true iff two Boxes overlap */
-#define EXTENTCHECK(r1, r2)	   \
-    (!( ((r1)->x2 <= (r2)->x1)  || \
-        ((r1)->x1 >= (r2)->x2)  || \
-        ((r1)->y2 <= (r2)->y1)  || \
-        ((r1)->y1 >= (r2)->y2) ) )
-
-/* true iff (x,y) is in Box */
-#define INBOX(r, x, y)	\
-    ( ((r)->x2 >  x) && \
-      ((r)->x1 <= x) && \
-      ((r)->y2 >  y) && \
-      ((r)->y1 <= y) )
-
-/* true iff Box r1 contains Box r2 */
-#define SUBSUMES(r1, r2)	\
-    ( ((r1)->x1 <= (r2)->x1) && \
-      ((r1)->x2 >= (r2)->x2) && \
-      ((r1)->y1 <= (r2)->y1) && \
-      ((r1)->y2 >= (r2)->y2) )
-
-static size_t
-PIXREGION_SZOF (size_t n)
-{
-    size_t size = n * sizeof(box_type_t);
-    
-    if (n > UINT32_MAX / sizeof(box_type_t))
-	return 0;
-
-    if (sizeof(region_data_type_t) > UINT32_MAX - size)
-	return 0;
-
-    return size + sizeof(region_data_type_t);
-}
-
-static region_data_type_t *
-alloc_data (size_t n)
-{
-    size_t sz = PIXREGION_SZOF (n);
-
-    if (!sz)
-	return NULL;
-
-    return malloc (sz);
-}
-
-#define FREE_DATA(reg) if ((reg)->data && (reg)->data->size) free ((reg)->data)
-
-#define RECTALLOC_BAIL(region, n, bail)					\
-    do									\
-    {									\
-	if (!(region)->data ||						\
-	    (((region)->data->numRects + (n)) > (region)->data->size))	\
-	{								\
-	    if (!pixman_rect_alloc (region, n))				\
-		goto bail;						\
-	}								\
-    } while (0)
-
-#define RECTALLOC(region, n)						\
-    do									\
-    {									\
-	if (!(region)->data ||						\
-	    (((region)->data->numRects + (n)) > (region)->data->size))	\
-	{								\
-	    if (!pixman_rect_alloc (region, n)) {			\
-		return FALSE;						\
-	    }								\
-	}								\
-    } while (0)
-
-#define ADDRECT(next_rect, nx1, ny1, nx2, ny2)      \
-    do						    \
-    {						    \
-	next_rect->x1 = nx1;                        \
-	next_rect->y1 = ny1;                        \
-	next_rect->x2 = nx2;                        \
-	next_rect->y2 = ny2;                        \
-	next_rect++;                                \
-    }						    \
-    while (0)
-
-#define NEWRECT(region, next_rect, nx1, ny1, nx2, ny2)			\
-    do									\
-    {									\
-	if (!(region)->data ||						\
-	    ((region)->data->numRects == (region)->data->size))		\
-	{								\
-	    if (!pixman_rect_alloc (region, 1))				\
-		return FALSE;						\
-	    next_rect = PIXREGION_TOP (region);				\
-	}								\
-	ADDRECT (next_rect, nx1, ny1, nx2, ny2);			\
-	region->data->numRects++;					\
-	critical_if_fail (region->data->numRects <= region->data->size);		\
-    } while (0)
-
-#define DOWNSIZE(reg, numRects)						\
-    do									\
-    {									\
-	if (((numRects) < ((reg)->data->size >> 1)) &&			\
-	    ((reg)->data->size > 50))					\
-	{								\
-	    region_data_type_t * new_data;				\
-	    size_t data_size = PIXREGION_SZOF (numRects);		\
-									\
-	    if (!data_size)						\
-	    {								\
-		new_data = NULL;					\
-	    }								\
-	    else							\
-	    {								\
-		new_data = (region_data_type_t *)			\
-		    realloc ((reg)->data, data_size);			\
-	    }								\
-									\
-	    if (new_data)						\
-	    {								\
-		new_data->size = (numRects);				\
-		(reg)->data = new_data;					\
-	    }								\
-	}								\
-    } while (0)
-
-PIXMAN_EXPORT pixman_bool_t
-PREFIX (_equal) (const region_type_t *reg1, const region_type_t *reg2)
-{
-    int i;
-    box_type_t *rects1;
-    box_type_t *rects2;
-
-    if (reg1->extents.x1 != reg2->extents.x1)
-	return FALSE;
-    
-    if (reg1->extents.x2 != reg2->extents.x2)
-	return FALSE;
-    
-    if (reg1->extents.y1 != reg2->extents.y1)
-	return FALSE;
-    
-    if (reg1->extents.y2 != reg2->extents.y2)
-	return FALSE;
-    
-    if (PIXREGION_NUMRECTS (reg1) != PIXREGION_NUMRECTS (reg2))
-	return FALSE;
-
-    rects1 = PIXREGION_RECTS (reg1);
-    rects2 = PIXREGION_RECTS (reg2);
-    
-    for (i = 0; i != PIXREGION_NUMRECTS (reg1); i++)
-    {
-	if (rects1[i].x1 != rects2[i].x1)
-	    return FALSE;
-	
-	if (rects1[i].x2 != rects2[i].x2)
-	    return FALSE;
-	
-	if (rects1[i].y1 != rects2[i].y1)
-	    return FALSE;
-	
-	if (rects1[i].y2 != rects2[i].y2)
-	    return FALSE;
-    }
-
-    return TRUE;
-}
-
-int
-PREFIX (_print) (region_type_t *rgn)
-{
-    int num, size;
-    int i;
-    box_type_t * rects;
-
-    num = PIXREGION_NUMRECTS (rgn);
-    size = PIXREGION_SIZE (rgn);
-    rects = PIXREGION_RECTS (rgn);
-
-    fprintf (stderr, "num: %d size: %d\n", num, size);
-    fprintf (stderr, "extents: %d %d %d %d\n",
-             rgn->extents.x1,
-	     rgn->extents.y1,
-	     rgn->extents.x2,
-	     rgn->extents.y2);
-    
-    for (i = 0; i < num; i++)
-    {
-	fprintf (stderr, "%d %d %d %d \n",
-	         rects[i].x1, rects[i].y1, rects[i].x2, rects[i].y2);
-    }
-    
-    fprintf (stderr, "\n");
-
-    return(num);
-}
-
-
-PIXMAN_EXPORT void
-PREFIX (_init) (region_type_t *region)
-{
-    region->extents = *pixman_region_empty_box;
-    region->data = pixman_region_empty_data;
-}
-
-PIXMAN_EXPORT void
-PREFIX (_init_rect) (region_type_t *	region,
-                     int		x,
-		     int		y,
-		     unsigned int	width,
-		     unsigned int	height)
-{
-    region->extents.x1 = x;
-    region->extents.y1 = y;
-    region->extents.x2 = x + width;
-    region->extents.y2 = y + height;
-
-    if (!GOOD_RECT (&region->extents))
-    {
-        if (BAD_RECT (&region->extents))
-            _pixman_log_error (FUNC, "Invalid rectangle passed");
-        PREFIX (_init) (region);
-        return;
-    }
-
-    region->data = NULL;
-}
-
-PIXMAN_EXPORT void
-PREFIX (_init_with_extents) (region_type_t *region, const box_type_t *extents)
-{
-    if (!GOOD_RECT (extents))
-    {
-        if (BAD_RECT (extents))
-            _pixman_log_error (FUNC, "Invalid rectangle passed");
-        PREFIX (_init) (region);
-        return;
-    }
-    region->extents = *extents;
-
-    region->data = NULL;
-}
-
-PIXMAN_EXPORT void
-PREFIX (_fini) (region_type_t *region)
-{
-    GOOD (region);
-    FREE_DATA (region);
-}
-
-PIXMAN_EXPORT int
-PREFIX (_n_rects) (const region_type_t *region)
-{
-    return PIXREGION_NUMRECTS (region);
-}
-
-PIXMAN_EXPORT box_type_t *
-PREFIX (_rectangles) (const region_type_t *region,
-                      int               *n_rects)
-{
-    if (n_rects)
-	*n_rects = PIXREGION_NUMRECTS (region);
-
-    return PIXREGION_RECTS (region);
-}
-
-static pixman_bool_t
-pixman_break (region_type_t *region)
-{
-    FREE_DATA (region);
-
-    region->extents = *pixman_region_empty_box;
-    region->data = pixman_broken_data;
-
-    return FALSE;
-}
-
-static pixman_bool_t
-pixman_rect_alloc (region_type_t * region,
-                   int             n)
-{
-    region_data_type_t *data;
-
-    if (!region->data)
-    {
-	n++;
-	region->data = alloc_data (n);
-
-	if (!region->data)
-	    return pixman_break (region);
-
-	region->data->numRects = 1;
-	*PIXREGION_BOXPTR (region) = region->extents;
-    }
-    else if (!region->data->size)
-    {
-	region->data = alloc_data (n);
-
-	if (!region->data)
-	    return pixman_break (region);
-
-	region->data->numRects = 0;
-    }
-    else
-    {
-	size_t data_size;
-
-	if (n == 1)
-	{
-	    n = region->data->numRects;
-	    if (n > 500) /* XXX pick numbers out of a hat */
-		n = 250;
-	}
-
-	n += region->data->numRects;
-	data_size = PIXREGION_SZOF (n);
-
-	if (!data_size)
-	{
-	    data = NULL;
-	}
-	else
-	{
-	    data = (region_data_type_t *)
-		realloc (region->data, PIXREGION_SZOF (n));
-	}
-	
-	if (!data)
-	    return pixman_break (region);
-	
-	region->data = data;
-    }
-    
-    region->data->size = n;
-
-    return TRUE;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-PREFIX (_copy) (region_type_t *dst, const region_type_t *src)
-{
-    GOOD (dst);
-    GOOD (src);
-
-    if (dst == src)
-	return TRUE;
-    
-    dst->extents = src->extents;
-
-    if (!src->data || !src->data->size)
-    {
-	FREE_DATA (dst);
-	dst->data = src->data;
-	return TRUE;
-    }
-    
-    if (!dst->data || (dst->data->size < src->data->numRects))
-    {
-	FREE_DATA (dst);
-
-	dst->data = alloc_data (src->data->numRects);
-
-	if (!dst->data)
-	    return pixman_break (dst);
-
-	dst->data->size = src->data->numRects;
-    }
-
-    dst->data->numRects = src->data->numRects;
-
-    memmove ((char *)PIXREGION_BOXPTR (dst), (char *)PIXREGION_BOXPTR (src),
-             dst->data->numRects * sizeof(box_type_t));
-
-    return TRUE;
-}
-
-/*======================================================================
- *	    Generic Region Operator
- *====================================================================*/
-
-/*-
- *-----------------------------------------------------------------------
- * pixman_coalesce --
- *	Attempt to merge the boxes in the current band with those in the
- *	previous one.  We are guaranteed that the current band extends to
- *      the end of the rects array.  Used only by pixman_op.
- *
- * Results:
- *	The new index for the previous band.
- *
- * Side Effects:
- *	If coalescing takes place:
- *	    - rectangles in the previous band will have their y2 fields
- *	      altered.
- *	    - region->data->numRects will be decreased.
- *
- *-----------------------------------------------------------------------
- */
-static inline int
-pixman_coalesce (region_type_t * region,      /* Region to coalesce		 */
-		 int             prev_start,  /* Index of start of previous band */
-		 int             cur_start)   /* Index of start of current band  */
-{
-    box_type_t *prev_box;       /* Current box in previous band	     */
-    box_type_t *cur_box;        /* Current box in current band       */
-    int numRects;               /* Number rectangles in both bands   */
-    int y2;                     /* Bottom of current band	     */
-
-    /*
-     * Figure out how many rectangles are in the band.
-     */
-    numRects = cur_start - prev_start;
-    critical_if_fail (numRects == region->data->numRects - cur_start);
-
-    if (!numRects) return cur_start;
-
-    /*
-     * The bands may only be coalesced if the bottom of the previous
-     * matches the top scanline of the current.
-     */
-    prev_box = PIXREGION_BOX (region, prev_start);
-    cur_box = PIXREGION_BOX (region, cur_start);
-    if (prev_box->y2 != cur_box->y1) return cur_start;
-
-    /*
-     * Make sure the bands have boxes in the same places. This
-     * assumes that boxes have been added in such a way that they
-     * cover the most area possible. I.e. two boxes in a band must
-     * have some horizontal space between them.
-     */
-    y2 = cur_box->y2;
-
-    do
-    {
-	if ((prev_box->x1 != cur_box->x1) || (prev_box->x2 != cur_box->x2))
-	    return (cur_start);
-	
-	prev_box++;
-	cur_box++;
-	numRects--;
-    }
-    while (numRects);
-
-    /*
-     * The bands may be merged, so set the bottom y of each box
-     * in the previous band to the bottom y of the current band.
-     */
-    numRects = cur_start - prev_start;
-    region->data->numRects -= numRects;
-
-    do
-    {
-	prev_box--;
-	prev_box->y2 = y2;
-	numRects--;
-    }
-    while (numRects);
-
-    return prev_start;
-}
-
-/* Quicky macro to avoid trivial reject procedure calls to pixman_coalesce */
-
-#define COALESCE(new_reg, prev_band, cur_band)                          \
-    do									\
-    {									\
-	if (cur_band - prev_band == new_reg->data->numRects - cur_band)	\
-	    prev_band = pixman_coalesce (new_reg, prev_band, cur_band);	\
-	else								\
-	    prev_band = cur_band;					\
-    } while (0)
-
-/*-
- *-----------------------------------------------------------------------
- * pixman_region_append_non_o --
- *	Handle a non-overlapping band for the union and subtract operations.
- *      Just adds the (top/bottom-clipped) rectangles into the region.
- *      Doesn't have to check for subsumption or anything.
- *
- * Results:
- *	None.
- *
- * Side Effects:
- *	region->data->numRects is incremented and the rectangles overwritten
- *	with the rectangles we're passed.
- *
- *-----------------------------------------------------------------------
- */
-static inline pixman_bool_t
-pixman_region_append_non_o (region_type_t * region,
-			    box_type_t *    r,
-			    box_type_t *    r_end,
-			    int             y1,
-			    int             y2)
-{
-    box_type_t *next_rect;
-    int new_rects;
-
-    new_rects = r_end - r;
-
-    critical_if_fail (y1 < y2);
-    critical_if_fail (new_rects != 0);
-
-    /* Make sure we have enough space for all rectangles to be added */
-    RECTALLOC (region, new_rects);
-    next_rect = PIXREGION_TOP (region);
-    region->data->numRects += new_rects;
-
-    do
-    {
-	critical_if_fail (r->x1 < r->x2);
-	ADDRECT (next_rect, r->x1, y1, r->x2, y2);
-	r++;
-    }
-    while (r != r_end);
-
-    return TRUE;
-}
-
-#define FIND_BAND(r, r_band_end, r_end, ry1)			     \
-    do								     \
-    {								     \
-	ry1 = r->y1;						     \
-	r_band_end = r + 1;					     \
-	while ((r_band_end != r_end) && (r_band_end->y1 == ry1)) {   \
-	    r_band_end++;					     \
-	}							     \
-    } while (0)
-
-#define APPEND_REGIONS(new_reg, r, r_end)				\
-    do									\
-    {									\
-	int new_rects;							\
-	if ((new_rects = r_end - r)) {					\
-	    RECTALLOC_BAIL (new_reg, new_rects, bail);			\
-	    memmove ((char *)PIXREGION_TOP (new_reg), (char *)r,	\
-		     new_rects * sizeof(box_type_t));			\
-	    new_reg->data->numRects += new_rects;			\
-	}								\
-    } while (0)
-
-/*-
- *-----------------------------------------------------------------------
- * pixman_op --
- *	Apply an operation to two regions. Called by pixman_region_union, pixman_region_inverse,
- *	pixman_region_subtract, pixman_region_intersect....  Both regions MUST have at least one
- *      rectangle, and cannot be the same object.
- *
- * Results:
- *	TRUE if successful.
- *
- * Side Effects:
- *	The new region is overwritten.
- *	overlap set to TRUE if overlap_func ever returns TRUE.
- *
- * Notes:
- *	The idea behind this function is to view the two regions as sets.
- *	Together they cover a rectangle of area that this function divides
- *	into horizontal bands where points are covered only by one region
- *	or by both. For the first case, the non_overlap_func is called with
- *	each the band and the band's upper and lower extents. For the
- *	second, the overlap_func is called to process the entire band. It
- *	is responsible for clipping the rectangles in the band, though
- *	this function provides the boundaries.
- *	At the end of each band, the new region is coalesced, if possible,
- *	to reduce the number of rectangles in the region.
- *
- *-----------------------------------------------------------------------
- */
-
-typedef pixman_bool_t (*overlap_proc_ptr) (region_type_t *region,
-					   box_type_t *   r1,
-					   box_type_t *   r1_end,
-					   box_type_t *   r2,
-					   box_type_t *   r2_end,
-					   int            y1,
-					   int            y2);
-
-static pixman_bool_t
-pixman_op (region_type_t *  new_reg,               /* Place to store result	    */
-	   const region_type_t *  reg1,                  /* First region in operation     */
-	   const region_type_t *  reg2,                  /* 2d region in operation        */
-	   overlap_proc_ptr overlap_func,          /* Function to call for over-
-						    * lapping bands		    */
-	   int              append_non1,           /* Append non-overlapping bands  
-						    * in region 1 ?
-						    */
-	   int              append_non2            /* Append non-overlapping bands
-						    * in region 2 ?
-						    */
-    )
-{
-    box_type_t *r1;                 /* Pointer into first region     */
-    box_type_t *r2;                 /* Pointer into 2d region	     */
-    box_type_t *r1_end;             /* End of 1st region	     */
-    box_type_t *r2_end;             /* End of 2d region		     */
-    int ybot;                       /* Bottom of intersection	     */
-    int ytop;                       /* Top of intersection	     */
-    region_data_type_t *old_data;   /* Old data for new_reg	     */
-    int prev_band;                  /* Index of start of
-				     * previous band in new_reg       */
-    int cur_band;                   /* Index of start of current
-				     * band in new_reg		     */
-    box_type_t * r1_band_end;       /* End of current band in r1     */
-    box_type_t * r2_band_end;       /* End of current band in r2     */
-    int top;                        /* Top of non-overlapping band   */
-    int bot;                        /* Bottom of non-overlapping band*/
-    int r1y1;                       /* Temps for r1->y1 and r2->y1   */
-    int r2y1;
-    int new_size;
-    int numRects;
-
-    /*
-     * Break any region computed from a broken region
-     */
-    if (PIXREGION_NAR (reg1) || PIXREGION_NAR (reg2))
-	return pixman_break (new_reg);
-
-    /*
-     * Initialization:
-     *	set r1, r2, r1_end and r2_end appropriately, save the rectangles
-     * of the destination region until the end in case it's one of
-     * the two source regions, then mark the "new" region empty, allocating
-     * another array of rectangles for it to use.
-     */
-
-    r1 = PIXREGION_RECTS (reg1);
-    new_size = PIXREGION_NUMRECTS (reg1);
-    r1_end = r1 + new_size;
-
-    numRects = PIXREGION_NUMRECTS (reg2);
-    r2 = PIXREGION_RECTS (reg2);
-    r2_end = r2 + numRects;
-    
-    critical_if_fail (r1 != r1_end);
-    critical_if_fail (r2 != r2_end);
-
-    old_data = (region_data_type_t *)NULL;
-
-    if (((new_reg == reg1) && (new_size > 1)) ||
-        ((new_reg == reg2) && (numRects > 1)))
-    {
-        old_data = new_reg->data;
-        new_reg->data = pixman_region_empty_data;
-    }
-
-    /* guess at new size */
-    if (numRects > new_size)
-	new_size = numRects;
-
-    new_size <<= 1;
-
-    if (!new_reg->data)
-	new_reg->data = pixman_region_empty_data;
-    else if (new_reg->data->size)
-	new_reg->data->numRects = 0;
-
-    if (new_size > new_reg->data->size)
-    {
-        if (!pixman_rect_alloc (new_reg, new_size))
-        {
-            free (old_data);
-            return FALSE;
-	}
-    }
-
-    /*
-     * Initialize ybot.
-     * In the upcoming loop, ybot and ytop serve different functions depending
-     * on whether the band being handled is an overlapping or non-overlapping
-     * band.
-     *  In the case of a non-overlapping band (only one of the regions
-     * has points in the band), ybot is the bottom of the most recent
-     * intersection and thus clips the top of the rectangles in that band.
-     * ytop is the top of the next intersection between the two regions and
-     * serves to clip the bottom of the rectangles in the current band.
-     *	For an overlapping band (where the two regions intersect), ytop clips
-     * the top of the rectangles of both regions and ybot clips the bottoms.
-     */
-
-    ybot = MIN (r1->y1, r2->y1);
-
-    /*
-     * prev_band serves to mark the start of the previous band so rectangles
-     * can be coalesced into larger rectangles. qv. pixman_coalesce, above.
-     * In the beginning, there is no previous band, so prev_band == cur_band
-     * (cur_band is set later on, of course, but the first band will always
-     * start at index 0). prev_band and cur_band must be indices because of
-     * the possible expansion, and resultant moving, of the new region's
-     * array of rectangles.
-     */
-    prev_band = 0;
-
-    do
-    {
-        /*
-	 * This algorithm proceeds one source-band (as opposed to a
-	 * destination band, which is determined by where the two regions
-	 * intersect) at a time. r1_band_end and r2_band_end serve to mark the
-	 * rectangle after the last one in the current band for their
-	 * respective regions.
-	 */
-        critical_if_fail (r1 != r1_end);
-        critical_if_fail (r2 != r2_end);
-
-        FIND_BAND (r1, r1_band_end, r1_end, r1y1);
-        FIND_BAND (r2, r2_band_end, r2_end, r2y1);
-
-        /*
-	 * First handle the band that doesn't intersect, if any.
-	 *
-	 * Note that attention is restricted to one band in the
-	 * non-intersecting region at once, so if a region has n
-	 * bands between the current position and the next place it overlaps
-	 * the other, this entire loop will be passed through n times.
-	 */
-        if (r1y1 < r2y1)
-        {
-            if (append_non1)
-            {
-                top = MAX (r1y1, ybot);
-                bot = MIN (r1->y2, r2y1);
-                if (top != bot)
-                {
-                    cur_band = new_reg->data->numRects;
-                    if (!pixman_region_append_non_o (new_reg, r1, r1_band_end, top, bot))
-			goto bail;
-                    COALESCE (new_reg, prev_band, cur_band);
-		}
-	    }
-            ytop = r2y1;
-	}
-        else if (r2y1 < r1y1)
-        {
-            if (append_non2)
-            {
-                top = MAX (r2y1, ybot);
-                bot = MIN (r2->y2, r1y1);
-		
-                if (top != bot)
-                {
-                    cur_band = new_reg->data->numRects;
-
-                    if (!pixman_region_append_non_o (new_reg, r2, r2_band_end, top, bot))
-			goto bail;
-
-                    COALESCE (new_reg, prev_band, cur_band);
-		}
-	    }
-            ytop = r1y1;
-	}
-        else
-        {
-            ytop = r1y1;
-	}
-
-        /*
-	 * Now see if we've hit an intersecting band. The two bands only
-	 * intersect if ybot > ytop
-	 */
-        ybot = MIN (r1->y2, r2->y2);
-        if (ybot > ytop)
-        {
-            cur_band = new_reg->data->numRects;
-
-            if (!(*overlap_func)(new_reg,
-                                 r1, r1_band_end,
-                                 r2, r2_band_end,
-                                 ytop, ybot))
-	    {
-		goto bail;
-	    }
-	    
-            COALESCE (new_reg, prev_band, cur_band);
-	}
-
-        /*
-	 * If we've finished with a band (y2 == ybot) we skip forward
-	 * in the region to the next band.
-	 */
-        if (r1->y2 == ybot)
-	    r1 = r1_band_end;
-
-        if (r2->y2 == ybot)
-	    r2 = r2_band_end;
-
-    }
-    while (r1 != r1_end && r2 != r2_end);
-
-    /*
-     * Deal with whichever region (if any) still has rectangles left.
-     *
-     * We only need to worry about banding and coalescing for the very first
-     * band left.  After that, we can just group all remaining boxes,
-     * regardless of how many bands, into one final append to the list.
-     */
-
-    if ((r1 != r1_end) && append_non1)
-    {
-        /* Do first non_overlap1Func call, which may be able to coalesce */
-        FIND_BAND (r1, r1_band_end, r1_end, r1y1);
-	
-        cur_band = new_reg->data->numRects;
-	
-        if (!pixman_region_append_non_o (new_reg,
-                                         r1, r1_band_end,
-                                         MAX (r1y1, ybot), r1->y2))
-	{
-	    goto bail;
-	}
-	
-        COALESCE (new_reg, prev_band, cur_band);
-
-        /* Just append the rest of the boxes  */
-        APPEND_REGIONS (new_reg, r1_band_end, r1_end);
-    }
-    else if ((r2 != r2_end) && append_non2)
-    {
-        /* Do first non_overlap2Func call, which may be able to coalesce */
-        FIND_BAND (r2, r2_band_end, r2_end, r2y1);
-
-	cur_band = new_reg->data->numRects;
-
-        if (!pixman_region_append_non_o (new_reg,
-                                         r2, r2_band_end,
-                                         MAX (r2y1, ybot), r2->y2))
-	{
-	    goto bail;
-	}
-
-        COALESCE (new_reg, prev_band, cur_band);
-
-        /* Append rest of boxes */
-        APPEND_REGIONS (new_reg, r2_band_end, r2_end);
-    }
-
-    free (old_data);
-
-    if (!(numRects = new_reg->data->numRects))
-    {
-        FREE_DATA (new_reg);
-        new_reg->data = pixman_region_empty_data;
-    }
-    else if (numRects == 1)
-    {
-        new_reg->extents = *PIXREGION_BOXPTR (new_reg);
-        FREE_DATA (new_reg);
-        new_reg->data = (region_data_type_t *)NULL;
-    }
-    else
-    {
-        DOWNSIZE (new_reg, numRects);
-    }
-
-    return TRUE;
-
-bail:
-    free (old_data);
-
-    return pixman_break (new_reg);
-}
-
-/*-
- *-----------------------------------------------------------------------
- * pixman_set_extents --
- *	Reset the extents of a region to what they should be. Called by
- *	pixman_region_subtract and pixman_region_intersect as they can't
- *      figure it out along the way or do so easily, as pixman_region_union can.
- *
- * Results:
- *	None.
- *
- * Side Effects:
- *	The region's 'extents' structure is overwritten.
- *
- *-----------------------------------------------------------------------
- */
-static void
-pixman_set_extents (region_type_t *region)
-{
-    box_type_t *box, *box_end;
-
-    if (!region->data)
-	return;
-
-    if (!region->data->size)
-    {
-        region->extents.x2 = region->extents.x1;
-        region->extents.y2 = region->extents.y1;
-        return;
-    }
-
-    box = PIXREGION_BOXPTR (region);
-    box_end = PIXREGION_END (region);
-
-    /*
-     * Since box is the first rectangle in the region, it must have the
-     * smallest y1 and since box_end is the last rectangle in the region,
-     * it must have the largest y2, because of banding. Initialize x1 and
-     * x2 from  box and box_end, resp., as good things to initialize them
-     * to...
-     */
-    region->extents.x1 = box->x1;
-    region->extents.y1 = box->y1;
-    region->extents.x2 = box_end->x2;
-    region->extents.y2 = box_end->y2;
-
-    critical_if_fail (region->extents.y1 < region->extents.y2);
-
-    while (box <= box_end)
-    {
-        if (box->x1 < region->extents.x1)
-	    region->extents.x1 = box->x1;
-        if (box->x2 > region->extents.x2)
-	    region->extents.x2 = box->x2;
-        box++;
-    }
-
-    critical_if_fail (region->extents.x1 < region->extents.x2);
-}
-
-/*======================================================================
- *	    Region Intersection
- *====================================================================*/
-/*-
- *-----------------------------------------------------------------------
- * pixman_region_intersect_o --
- *	Handle an overlapping band for pixman_region_intersect.
- *
- * Results:
- *	TRUE if successful.
- *
- * Side Effects:
- *	Rectangles may be added to the region.
- *
- *-----------------------------------------------------------------------
- */
-/*ARGSUSED*/
-static pixman_bool_t
-pixman_region_intersect_o (region_type_t *region,
-                           box_type_t *   r1,
-                           box_type_t *   r1_end,
-                           box_type_t *   r2,
-                           box_type_t *   r2_end,
-                           int            y1,
-                           int            y2)
-{
-    int x1;
-    int x2;
-    box_type_t *        next_rect;
-
-    next_rect = PIXREGION_TOP (region);
-
-    critical_if_fail (y1 < y2);
-    critical_if_fail (r1 != r1_end && r2 != r2_end);
-
-    do
-    {
-        x1 = MAX (r1->x1, r2->x1);
-        x2 = MIN (r1->x2, r2->x2);
-
-        /*
-	 * If there's any overlap between the two rectangles, add that
-	 * overlap to the new region.
-	 */
-        if (x1 < x2)
-	    NEWRECT (region, next_rect, x1, y1, x2, y2);
-
-        /*
-	 * Advance the pointer(s) with the leftmost right side, since the next
-	 * rectangle on that list may still overlap the other region's
-	 * current rectangle.
-	 */
-        if (r1->x2 == x2)
-        {
-            r1++;
-	}
-        if (r2->x2 == x2)
-        {
-            r2++;
-	}
-    }
-    while ((r1 != r1_end) && (r2 != r2_end));
-
-    return TRUE;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-PREFIX (_intersect) (region_type_t *     new_reg,
-                     const region_type_t *        reg1,
-                     const region_type_t *        reg2)
-{
-    GOOD (reg1);
-    GOOD (reg2);
-    GOOD (new_reg);
-
-    /* check for trivial reject */
-    if (PIXREGION_NIL (reg1) || PIXREGION_NIL (reg2) ||
-        !EXTENTCHECK (&reg1->extents, &reg2->extents))
-    {
-        /* Covers about 20% of all cases */
-        FREE_DATA (new_reg);
-        new_reg->extents.x2 = new_reg->extents.x1;
-        new_reg->extents.y2 = new_reg->extents.y1;
-        if (PIXREGION_NAR (reg1) || PIXREGION_NAR (reg2))
-        {
-            new_reg->data = pixman_broken_data;
-            return FALSE;
-	}
-        else
-	{
-	    new_reg->data = pixman_region_empty_data;
-	}
-    }
-    else if (!reg1->data && !reg2->data)
-    {
-        /* Covers about 80% of cases that aren't trivially rejected */
-        new_reg->extents.x1 = MAX (reg1->extents.x1, reg2->extents.x1);
-        new_reg->extents.y1 = MAX (reg1->extents.y1, reg2->extents.y1);
-        new_reg->extents.x2 = MIN (reg1->extents.x2, reg2->extents.x2);
-        new_reg->extents.y2 = MIN (reg1->extents.y2, reg2->extents.y2);
-
-        FREE_DATA (new_reg);
-
-	new_reg->data = (region_data_type_t *)NULL;
-    }
-    else if (!reg2->data && SUBSUMES (&reg2->extents, &reg1->extents))
-    {
-        return PREFIX (_copy) (new_reg, reg1);
-    }
-    else if (!reg1->data && SUBSUMES (&reg1->extents, &reg2->extents))
-    {
-        return PREFIX (_copy) (new_reg, reg2);
-    }
-    else if (reg1 == reg2)
-    {
-        return PREFIX (_copy) (new_reg, reg1);
-    }
-    else
-    {
-        /* General purpose intersection */
-
-        if (!pixman_op (new_reg, reg1, reg2, pixman_region_intersect_o, FALSE, FALSE))
-	    return FALSE;
-	
-        pixman_set_extents (new_reg);
-    }
-
-    GOOD (new_reg);
-    return(TRUE);
-}
-
-#define MERGERECT(r)							\
-    do									\
-    {									\
-        if (r->x1 <= x2)						\
-	{								\
-            /* Merge with current rectangle */				\
-            if (x2 < r->x2)						\
-		x2 = r->x2;						\
-	}								\
-	else								\
-	{								\
-            /* Add current rectangle, start new one */			\
-            NEWRECT (region, next_rect, x1, y1, x2, y2);		\
-            x1 = r->x1;							\
-            x2 = r->x2;							\
-	}								\
-        r++;								\
-    } while (0)
-
-/*======================================================================
- *	    Region Union
- *====================================================================*/
-
-/*-
- *-----------------------------------------------------------------------
- * pixman_region_union_o --
- *	Handle an overlapping band for the union operation. Picks the
- *	left-most rectangle each time and merges it into the region.
- *
- * Results:
- *	TRUE if successful.
- *
- * Side Effects:
- *	region is overwritten.
- *	overlap is set to TRUE if any boxes overlap.
- *
- *-----------------------------------------------------------------------
- */
-static pixman_bool_t
-pixman_region_union_o (region_type_t *region,
-		       box_type_t *   r1,
-		       box_type_t *   r1_end,
-		       box_type_t *   r2,
-		       box_type_t *   r2_end,
-		       int            y1,
-		       int            y2)
-{
-    box_type_t *next_rect;
-    int x1;            /* left and right side of current union */
-    int x2;
-
-    critical_if_fail (y1 < y2);
-    critical_if_fail (r1 != r1_end && r2 != r2_end);
-
-    next_rect = PIXREGION_TOP (region);
-
-    /* Start off current rectangle */
-    if (r1->x1 < r2->x1)
-    {
-        x1 = r1->x1;
-        x2 = r1->x2;
-        r1++;
-    }
-    else
-    {
-        x1 = r2->x1;
-        x2 = r2->x2;
-        r2++;
-    }
-    while (r1 != r1_end && r2 != r2_end)
-    {
-        if (r1->x1 < r2->x1)
-	    MERGERECT (r1);
-	else
-	    MERGERECT (r2);
-    }
-
-    /* Finish off whoever (if any) is left */
-    if (r1 != r1_end)
-    {
-        do
-        {
-            MERGERECT (r1);
-	}
-        while (r1 != r1_end);
-    }
-    else if (r2 != r2_end)
-    {
-        do
-        {
-            MERGERECT (r2);
-	}
-        while (r2 != r2_end);
-    }
-
-    /* Add current rectangle */
-    NEWRECT (region, next_rect, x1, y1, x2, y2);
-
-    return TRUE;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-PREFIX(_intersect_rect) (region_type_t *dest,
-			 const region_type_t *source,
-			 int x, int y,
-			 unsigned int width,
-			 unsigned int height)
-{
-    region_type_t region;
-
-    region.data = NULL;
-    region.extents.x1 = x;
-    region.extents.y1 = y;
-    region.extents.x2 = x + width;
-    region.extents.y2 = y + height;
-
-    return PREFIX(_intersect) (dest, source, &region);
-}
-
-/* Convenience function for performing union of region with a
- * single rectangle
- */
-PIXMAN_EXPORT pixman_bool_t
-PREFIX (_union_rect) (region_type_t *dest,
-                      const region_type_t *source,
-                      int            x,
-		      int            y,
-                      unsigned int   width,
-		      unsigned int   height)
-{
-    region_type_t region;
-
-    region.extents.x1 = x;
-    region.extents.y1 = y;
-    region.extents.x2 = x + width;
-    region.extents.y2 = y + height;
-
-    if (!GOOD_RECT (&region.extents))
-    {
-        if (BAD_RECT (&region.extents))
-            _pixman_log_error (FUNC, "Invalid rectangle passed");
-	return PREFIX (_copy) (dest, source);
-    }
-
-    region.data = NULL;
-
-    return PREFIX (_union) (dest, source, &region);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-PREFIX (_union) (region_type_t *      new_reg,
-                 const region_type_t *reg1,
-                 const region_type_t *reg2)
-{
-    /* Return TRUE if some overlap
-     * between reg1, reg2
-     */
-    GOOD (reg1);
-    GOOD (reg2);
-    GOOD (new_reg);
-
-    /*  checks all the simple cases */
-
-    /*
-     * Region 1 and 2 are the same
-     */
-    if (reg1 == reg2)
-        return PREFIX (_copy) (new_reg, reg1);
-
-    /*
-     * Region 1 is empty
-     */
-    if (PIXREGION_NIL (reg1))
-    {
-        if (PIXREGION_NAR (reg1))
-	    return pixman_break (new_reg);
-
-        if (new_reg != reg2)
-	    return PREFIX (_copy) (new_reg, reg2);
-
-	return TRUE;
-    }
-
-    /*
-     * Region 2 is empty
-     */
-    if (PIXREGION_NIL (reg2))
-    {
-        if (PIXREGION_NAR (reg2))
-	    return pixman_break (new_reg);
-
-	if (new_reg != reg1)
-	    return PREFIX (_copy) (new_reg, reg1);
-
-	return TRUE;
-    }
-
-    /*
-     * Region 1 completely subsumes region 2
-     */
-    if (!reg1->data && SUBSUMES (&reg1->extents, &reg2->extents))
-    {
-        if (new_reg != reg1)
-	    return PREFIX (_copy) (new_reg, reg1);
-
-	return TRUE;
-    }
-
-    /*
-     * Region 2 completely subsumes region 1
-     */
-    if (!reg2->data && SUBSUMES (&reg2->extents, &reg1->extents))
-    {
-        if (new_reg != reg2)
-	    return PREFIX (_copy) (new_reg, reg2);
-
-	return TRUE;
-    }
-
-    if (!pixman_op (new_reg, reg1, reg2, pixman_region_union_o, TRUE, TRUE))
-	return FALSE;
-
-    new_reg->extents.x1 = MIN (reg1->extents.x1, reg2->extents.x1);
-    new_reg->extents.y1 = MIN (reg1->extents.y1, reg2->extents.y1);
-    new_reg->extents.x2 = MAX (reg1->extents.x2, reg2->extents.x2);
-    new_reg->extents.y2 = MAX (reg1->extents.y2, reg2->extents.y2);
-    
-    GOOD (new_reg);
-
-    return TRUE;
-}
-
-/*======================================================================
- *	    Batch Rectangle Union
- *====================================================================*/
-
-#define EXCHANGE_RECTS(a, b)	\
-    {                           \
-        box_type_t t;		\
-        t = rects[a];           \
-        rects[a] = rects[b];    \
-        rects[b] = t;           \
-    }
-
-static void
-quick_sort_rects (
-    box_type_t rects[],
-    int        numRects)
-{
-    int y1;
-    int x1;
-    int i, j;
-    box_type_t *r;
-
-    /* Always called with numRects > 1 */
-
-    do
-    {
-        if (numRects == 2)
-        {
-            if (rects[0].y1 > rects[1].y1 ||
-                (rects[0].y1 == rects[1].y1 && rects[0].x1 > rects[1].x1))
-	    {
-		EXCHANGE_RECTS (0, 1);
-	    }
-
-            return;
-	}
-
-        /* Choose partition element, stick in location 0 */
-        EXCHANGE_RECTS (0, numRects >> 1);
-        y1 = rects[0].y1;
-        x1 = rects[0].x1;
-
-        /* Partition array */
-        i = 0;
-        j = numRects;
-
-        do
-        {
-            r = &(rects[i]);
-            do
-            {
-                r++;
-                i++;
-	    }
-	    while (i != numRects && (r->y1 < y1 || (r->y1 == y1 && r->x1 < x1)));
-
-	    r = &(rects[j]);
-            do
-            {
-                r--;
-                j--;
-	    }
-            while (y1 < r->y1 || (y1 == r->y1 && x1 < r->x1));
-	    
-            if (i < j)
-		EXCHANGE_RECTS (i, j);
-	}
-        while (i < j);
-
-        /* Move partition element back to middle */
-        EXCHANGE_RECTS (0, j);
-
-        /* Recurse */
-        if (numRects - j - 1 > 1)
-	    quick_sort_rects (&rects[j + 1], numRects - j - 1);
-
-        numRects = j;
-    }
-    while (numRects > 1);
-}
-
-/*-
- *-----------------------------------------------------------------------
- * pixman_region_validate --
- *
- *      Take a ``region'' which is a non-y-x-banded random collection of
- *      rectangles, and compute a nice region which is the union of all the
- *      rectangles.
- *
- * Results:
- *	TRUE if successful.
- *
- * Side Effects:
- *      The passed-in ``region'' may be modified.
- *	overlap set to TRUE if any retangles overlapped,
- *      else FALSE;
- *
- * Strategy:
- *      Step 1. Sort the rectangles into ascending order with primary key y1
- *		and secondary key x1.
- *
- *      Step 2. Split the rectangles into the minimum number of proper y-x
- *		banded regions.  This may require horizontally merging
- *		rectangles, and vertically coalescing bands.  With any luck,
- *		this step in an identity transformation (ala the Box widget),
- *		or a coalescing into 1 box (ala Menus).
- *
- *	Step 3. Merge the separate regions down to a single region by calling
- *		pixman_region_union.  Maximize the work each pixman_region_union call does by using
- *		a binary merge.
- *
- *-----------------------------------------------------------------------
- */
-
-static pixman_bool_t
-validate (region_type_t * badreg)
-{
-    /* Descriptor for regions under construction  in Step 2. */
-    typedef struct
-    {
-        region_type_t reg;
-        int prev_band;
-        int cur_band;
-    } region_info_t;
-
-    region_info_t stack_regions[64];
-
-    int numRects;                   /* Original numRects for badreg	    */
-    region_info_t *ri;              /* Array of current regions		    */
-    int num_ri;                     /* Number of entries used in ri	    */
-    int size_ri;                    /* Number of entries available in ri    */
-    int i;                          /* Index into rects			    */
-    int j;                          /* Index into ri			    */
-    region_info_t *rit;             /* &ri[j]				    */
-    region_type_t *reg;             /* ri[j].reg			    */
-    box_type_t *box;                /* Current box in rects		    */
-    box_type_t *ri_box;             /* Last box in ri[j].reg		    */
-    region_type_t *hreg;            /* ri[j_half].reg			    */
-    pixman_bool_t ret = TRUE;
-
-    if (!badreg->data)
-    {
-        GOOD (badreg);
-        return TRUE;
-    }
-    
-    numRects = badreg->data->numRects;
-    if (!numRects)
-    {
-        if (PIXREGION_NAR (badreg))
-	    return FALSE;
-        GOOD (badreg);
-        return TRUE;
-    }
-    
-    if (badreg->extents.x1 < badreg->extents.x2)
-    {
-        if ((numRects) == 1)
-        {
-            FREE_DATA (badreg);
-            badreg->data = (region_data_type_t *) NULL;
-	}
-        else
-        {
-            DOWNSIZE (badreg, numRects);
-	}
-
-        GOOD (badreg);
-
-	return TRUE;
-    }
-
-    /* Step 1: Sort the rects array into ascending (y1, x1) order */
-    quick_sort_rects (PIXREGION_BOXPTR (badreg), numRects);
-
-    /* Step 2: Scatter the sorted array into the minimum number of regions */
-
-    /* Set up the first region to be the first rectangle in badreg */
-    /* Note that step 2 code will never overflow the ri[0].reg rects array */
-    ri = stack_regions;
-    size_ri = sizeof (stack_regions) / sizeof (stack_regions[0]);
-    num_ri = 1;
-    ri[0].prev_band = 0;
-    ri[0].cur_band = 0;
-    ri[0].reg = *badreg;
-    box = PIXREGION_BOXPTR (&ri[0].reg);
-    ri[0].reg.extents = *box;
-    ri[0].reg.data->numRects = 1;
-    badreg->extents = *pixman_region_empty_box;
-    badreg->data = pixman_region_empty_data;
-
-    /* Now scatter rectangles into the minimum set of valid regions.  If the
-     * next rectangle to be added to a region would force an existing rectangle
-     * in the region to be split up in order to maintain y-x banding, just
-     * forget it.  Try the next region.  If it doesn't fit cleanly into any
-     * region, make a new one.
-     */
-
-    for (i = numRects; --i > 0;)
-    {
-        box++;
-        /* Look for a region to append box to */
-        for (j = num_ri, rit = ri; --j >= 0; rit++)
-        {
-            reg = &rit->reg;
-            ri_box = PIXREGION_END (reg);
-
-            if (box->y1 == ri_box->y1 && box->y2 == ri_box->y2)
-            {
-                /* box is in same band as ri_box.  Merge or append it */
-                if (box->x1 <= ri_box->x2)
-                {
-                    /* Merge it with ri_box */
-                    if (box->x2 > ri_box->x2)
-			ri_box->x2 = box->x2;
-		}
-                else
-                {
-                    RECTALLOC_BAIL (reg, 1, bail);
-                    *PIXREGION_TOP (reg) = *box;
-                    reg->data->numRects++;
-		}
-		
-                goto next_rect;   /* So sue me */
-	    }
-            else if (box->y1 >= ri_box->y2)
-            {
-                /* Put box into new band */
-                if (reg->extents.x2 < ri_box->x2)
-		    reg->extents.x2 = ri_box->x2;
-		
-                if (reg->extents.x1 > box->x1)
-		    reg->extents.x1 = box->x1;
-		
-                COALESCE (reg, rit->prev_band, rit->cur_band);
-                rit->cur_band = reg->data->numRects;
-                RECTALLOC_BAIL (reg, 1, bail);
-                *PIXREGION_TOP (reg) = *box;
-                reg->data->numRects++;
-
-                goto next_rect;
-	    }
-            /* Well, this region was inappropriate.  Try the next one. */
-	} /* for j */
-
-        /* Uh-oh.  No regions were appropriate.  Create a new one. */
-        if (size_ri == num_ri)
-        {
-            size_t data_size;
-
-            /* Oops, allocate space for new region information */
-            size_ri <<= 1;
-
-            data_size = size_ri * sizeof(region_info_t);
-            if (data_size / size_ri != sizeof(region_info_t))
-		goto bail;
-
-            if (ri == stack_regions)
-            {
-                rit = malloc (data_size);
-                if (!rit)
-		    goto bail;
-                memcpy (rit, ri, num_ri * sizeof (region_info_t));
-	    }
-            else
-            {
-                rit = (region_info_t *) realloc (ri, data_size);
-                if (!rit)
-		    goto bail;
-	    }
-            ri = rit;
-            rit = &ri[num_ri];
-	}
-        num_ri++;
-        rit->prev_band = 0;
-        rit->cur_band = 0;
-        rit->reg.extents = *box;
-        rit->reg.data = (region_data_type_t *)NULL;
-
-	/* MUST force allocation */
-        if (!pixman_rect_alloc (&rit->reg, (i + num_ri) / num_ri))
-	    goto bail;
-	
-    next_rect: ;
-    } /* for i */
-
-    /* Make a final pass over each region in order to COALESCE and set
-     * extents.x2 and extents.y2
-     */
-    for (j = num_ri, rit = ri; --j >= 0; rit++)
-    {
-        reg = &rit->reg;
-        ri_box = PIXREGION_END (reg);
-        reg->extents.y2 = ri_box->y2;
-
-        if (reg->extents.x2 < ri_box->x2)
-	    reg->extents.x2 = ri_box->x2;
-	
-        COALESCE (reg, rit->prev_band, rit->cur_band);
-
-	if (reg->data->numRects == 1) /* keep unions happy below */
-        {
-            FREE_DATA (reg);
-            reg->data = (region_data_type_t *)NULL;
-	}
-    }
-
-    /* Step 3: Union all regions into a single region */
-    while (num_ri > 1)
-    {
-        int half = num_ri / 2;
-        for (j = num_ri & 1; j < (half + (num_ri & 1)); j++)
-        {
-            reg = &ri[j].reg;
-            hreg = &ri[j + half].reg;
-
-            if (!pixman_op (reg, reg, hreg, pixman_region_union_o, TRUE, TRUE))
-		ret = FALSE;
-
-            if (hreg->extents.x1 < reg->extents.x1)
-		reg->extents.x1 = hreg->extents.x1;
-
-            if (hreg->extents.y1 < reg->extents.y1)
-		reg->extents.y1 = hreg->extents.y1;
-
-            if (hreg->extents.x2 > reg->extents.x2)
-		reg->extents.x2 = hreg->extents.x2;
-
-            if (hreg->extents.y2 > reg->extents.y2)
-		reg->extents.y2 = hreg->extents.y2;
-
-            FREE_DATA (hreg);
-	}
-
-        num_ri -= half;
-
-	if (!ret)
-	    goto bail;
-    }
-
-    *badreg = ri[0].reg;
-
-    if (ri != stack_regions)
-	free (ri);
-
-    GOOD (badreg);
-    return ret;
-
-bail:
-    for (i = 0; i < num_ri; i++)
-	FREE_DATA (&ri[i].reg);
-
-    if (ri != stack_regions)
-	free (ri);
-
-    return pixman_break (badreg);
-}
-
-/*======================================================================
- *                Region Subtraction
- *====================================================================*/
-
-/*-
- *-----------------------------------------------------------------------
- * pixman_region_subtract_o --
- *	Overlapping band subtraction. x1 is the left-most point not yet
- *	checked.
- *
- * Results:
- *	TRUE if successful.
- *
- * Side Effects:
- *	region may have rectangles added to it.
- *
- *-----------------------------------------------------------------------
- */
-/*ARGSUSED*/
-static pixman_bool_t
-pixman_region_subtract_o (region_type_t * region,
-			  box_type_t *    r1,
-			  box_type_t *    r1_end,
-			  box_type_t *    r2,
-			  box_type_t *    r2_end,
-			  int             y1,
-			  int             y2)
-{
-    box_type_t *        next_rect;
-    int x1;
-
-    x1 = r1->x1;
-
-    critical_if_fail (y1 < y2);
-    critical_if_fail (r1 != r1_end && r2 != r2_end);
-
-    next_rect = PIXREGION_TOP (region);
-
-    do
-    {
-        if (r2->x2 <= x1)
-        {
-            /*
-	     * Subtrahend entirely to left of minuend: go to next subtrahend.
-	     */
-            r2++;
-	}
-        else if (r2->x1 <= x1)
-        {
-            /*
-	     * Subtrahend precedes minuend: nuke left edge of minuend.
-	     */
-            x1 = r2->x2;
-            if (x1 >= r1->x2)
-            {
-                /*
-		 * Minuend completely covered: advance to next minuend and
-		 * reset left fence to edge of new minuend.
-		 */
-                r1++;
-                if (r1 != r1_end)
-		    x1 = r1->x1;
-	    }
-            else
-            {
-                /*
-		 * Subtrahend now used up since it doesn't extend beyond
-		 * minuend
-		 */
-                r2++;
-	    }
-	}
-        else if (r2->x1 < r1->x2)
-        {
-            /*
-	     * Left part of subtrahend covers part of minuend: add uncovered
-	     * part of minuend to region and skip to next subtrahend.
-	     */
-            critical_if_fail (x1 < r2->x1);
-            NEWRECT (region, next_rect, x1, y1, r2->x1, y2);
-
-            x1 = r2->x2;
-            if (x1 >= r1->x2)
-            {
-                /*
-		 * Minuend used up: advance to new...
-		 */
-                r1++;
-                if (r1 != r1_end)
-		    x1 = r1->x1;
-	    }
-            else
-            {
-                /*
-		 * Subtrahend used up
-		 */
-                r2++;
-	    }
-	}
-        else
-        {
-            /*
-	     * Minuend used up: add any remaining piece before advancing.
-	     */
-            if (r1->x2 > x1)
-		NEWRECT (region, next_rect, x1, y1, r1->x2, y2);
-
-            r1++;
-
-	    if (r1 != r1_end)
-		x1 = r1->x1;
-	}
-    }
-    while ((r1 != r1_end) && (r2 != r2_end));
-
-    /*
-     * Add remaining minuend rectangles to region.
-     */
-    while (r1 != r1_end)
-    {
-        critical_if_fail (x1 < r1->x2);
-
-        NEWRECT (region, next_rect, x1, y1, r1->x2, y2);
-
-        r1++;
-        if (r1 != r1_end)
-	    x1 = r1->x1;
-    }
-    return TRUE;
-}
-
-/*-
- *-----------------------------------------------------------------------
- * pixman_region_subtract --
- *	Subtract reg_s from reg_m and leave the result in reg_d.
- *	S stands for subtrahend, M for minuend and D for difference.
- *
- * Results:
- *	TRUE if successful.
- *
- * Side Effects:
- *	reg_d is overwritten.
- *
- *-----------------------------------------------------------------------
- */
-PIXMAN_EXPORT pixman_bool_t
-PREFIX (_subtract) (region_type_t *      reg_d,
-                    const region_type_t *reg_m,
-                    const region_type_t *reg_s)
-{
-    GOOD (reg_m);
-    GOOD (reg_s);
-    GOOD (reg_d);
-    
-    /* check for trivial rejects */
-    if (PIXREGION_NIL (reg_m) || PIXREGION_NIL (reg_s) ||
-        !EXTENTCHECK (&reg_m->extents, &reg_s->extents))
-    {
-        if (PIXREGION_NAR (reg_s))
-	    return pixman_break (reg_d);
-	
-        return PREFIX (_copy) (reg_d, reg_m);
-    }
-    else if (reg_m == reg_s)
-    {
-        FREE_DATA (reg_d);
-        reg_d->extents.x2 = reg_d->extents.x1;
-        reg_d->extents.y2 = reg_d->extents.y1;
-        reg_d->data = pixman_region_empty_data;
-
-        return TRUE;
-    }
-
-    /* Add those rectangles in region 1 that aren't in region 2,
-       do yucky subtraction for overlaps, and
-       just throw away rectangles in region 2 that aren't in region 1 */
-    if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE))
-	return FALSE;
-
-    /*
-     * Can't alter reg_d's extents before we call pixman_op because
-     * it might be one of the source regions and pixman_op depends
-     * on the extents of those regions being unaltered. Besides, this
-     * way there's no checking against rectangles that will be nuked
-     * due to coalescing, so we have to examine fewer rectangles.
-     */
-    pixman_set_extents (reg_d);
-    GOOD (reg_d);
-    return TRUE;
-}
-
-/*======================================================================
- *	    Region Inversion
- *====================================================================*/
-
-/*-
- *-----------------------------------------------------------------------
- * pixman_region_inverse --
- *	Take a region and a box and return a region that is everything
- *	in the box but not in the region. The careful reader will note
- *	that this is the same as subtracting the region from the box...
- *
- * Results:
- *	TRUE.
- *
- * Side Effects:
- *	new_reg is overwritten.
- *
- *-----------------------------------------------------------------------
- */
-PIXMAN_EXPORT pixman_bool_t
-PREFIX (_inverse) (region_type_t *      new_reg,  /* Destination region */
-		   const region_type_t *reg1,     /* Region to invert */
-		   const box_type_t *   inv_rect) /* Bounding box for inversion */
-{
-    region_type_t inv_reg; /* Quick and dirty region made from the
-			    * bounding box */
-    GOOD (reg1);
-    GOOD (new_reg);
-    
-    /* check for trivial rejects */
-    if (PIXREGION_NIL (reg1) || !EXTENTCHECK (inv_rect, &reg1->extents))
-    {
-        if (PIXREGION_NAR (reg1))
-	    return pixman_break (new_reg);
-	
-        new_reg->extents = *inv_rect;
-        FREE_DATA (new_reg);
-        new_reg->data = (region_data_type_t *)NULL;
-	
-        return TRUE;
-    }
-
-    /* Add those rectangles in region 1 that aren't in region 2,
-     * do yucky subtraction for overlaps, and
-     * just throw away rectangles in region 2 that aren't in region 1
-     */
-    inv_reg.extents = *inv_rect;
-    inv_reg.data = (region_data_type_t *)NULL;
-    if (!pixman_op (new_reg, &inv_reg, reg1, pixman_region_subtract_o, TRUE, FALSE))
-	return FALSE;
-
-    /*
-     * Can't alter new_reg's extents before we call pixman_op because
-     * it might be one of the source regions and pixman_op depends
-     * on the extents of those regions being unaltered. Besides, this
-     * way there's no checking against rectangles that will be nuked
-     * due to coalescing, so we have to examine fewer rectangles.
-     */
-    pixman_set_extents (new_reg);
-    GOOD (new_reg);
-    return TRUE;
-}
-
-/* In time O(log n), locate the first box whose y2 is greater than y.
- * Return @end if no such box exists.
- */
-static box_type_t *
-find_box_for_y (box_type_t *begin, box_type_t *end, int y)
-{
-    box_type_t *mid;
-
-    if (end == begin)
-	return end;
-
-    if (end - begin == 1)
-    {
-	if (begin->y2 > y)
-	    return begin;
-	else
-	    return end;
-    }
-
-    mid = begin + (end - begin) / 2;
-    if (mid->y2 > y)
-    {
-	/* If no box is found in [begin, mid], the function
-	 * will return @mid, which is then known to be the
-	 * correct answer.
-	 */
-	return find_box_for_y (begin, mid, y);
-    }
-    else
-    {
-	return find_box_for_y (mid, end, y);
-    }
-}
-
-/*
- *   rect_in(region, rect)
- *   This routine takes a pointer to a region and a pointer to a box
- *   and determines if the box is outside/inside/partly inside the region.
- *
- *   The idea is to travel through the list of rectangles trying to cover the
- *   passed box with them. Anytime a piece of the rectangle isn't covered
- *   by a band of rectangles, part_out is set TRUE. Any time a rectangle in
- *   the region covers part of the box, part_in is set TRUE. The process ends
- *   when either the box has been completely covered (we reached a band that
- *   doesn't overlap the box, part_in is TRUE and part_out is false), the
- *   box has been partially covered (part_in == part_out == TRUE -- because of
- *   the banding, the first time this is true we know the box is only
- *   partially in the region) or is outside the region (we reached a band
- *   that doesn't overlap the box at all and part_in is false)
- */
-PIXMAN_EXPORT pixman_region_overlap_t
-PREFIX (_contains_rectangle) (const region_type_t *  region,
-			      const box_type_t *     prect)
-{
-    box_type_t *     pbox;
-    box_type_t *     pbox_end;
-    int part_in, part_out;
-    int numRects;
-    int x, y;
-
-    GOOD (region);
-
-    numRects = PIXREGION_NUMRECTS (region);
-
-    /* useful optimization */
-    if (!numRects || !EXTENTCHECK (&region->extents, prect))
-	return(PIXMAN_REGION_OUT);
-
-    if (numRects == 1)
-    {
-        /* We know that it must be PIXMAN_REGION_IN or PIXMAN_REGION_PART */
-        if (SUBSUMES (&region->extents, prect))
-	    return(PIXMAN_REGION_IN);
-        else
-	    return(PIXMAN_REGION_PART);
-    }
-
-    part_out = FALSE;
-    part_in = FALSE;
-
-    /* (x,y) starts at upper left of rect, moving to the right and down */
-    x = prect->x1;
-    y = prect->y1;
-
-    /* can stop when both part_out and part_in are TRUE, or we reach prect->y2 */
-    for (pbox = PIXREGION_BOXPTR (region), pbox_end = pbox + numRects;
-	 pbox != pbox_end;
-	 pbox++)
-    {
-	/* getting up to speed or skipping remainder of band */
-	if (pbox->y2 <= y)
-	{
-	    if ((pbox = find_box_for_y (pbox, pbox_end, y)) == pbox_end)
-		break;
-	}
-
-        if (pbox->y1 > y)
-        {
-            part_out = TRUE;     /* missed part of rectangle above */
-            if (part_in || (pbox->y1 >= prect->y2))
-		break;
-            y = pbox->y1;       /* x guaranteed to be == prect->x1 */
-	}
-
-        if (pbox->x2 <= x)
-	    continue;           /* not far enough over yet */
-
-        if (pbox->x1 > x)
-        {
-            part_out = TRUE;     /* missed part of rectangle to left */
-            if (part_in)
-		break;
-	}
-
-        if (pbox->x1 < prect->x2)
-        {
-            part_in = TRUE;      /* definitely overlap */
-            if (part_out)
-		break;
-	}
-
-        if (pbox->x2 >= prect->x2)
-        {
-            y = pbox->y2;       /* finished with this band */
-            if (y >= prect->y2)
-		break;
-            x = prect->x1;      /* reset x out to left again */
-	}
-        else
-        {
-            /*
-	     * Because boxes in a band are maximal width, if the first box
-	     * to overlap the rectangle doesn't completely cover it in that
-	     * band, the rectangle must be partially out, since some of it
-	     * will be uncovered in that band. part_in will have been set true
-	     * by now...
-	     */
-            part_out = TRUE;
-            break;
-	}
-    }
-
-    if (part_in)
-    {
-        if (y < prect->y2)
-	    return PIXMAN_REGION_PART;
-        else
-	    return PIXMAN_REGION_IN;
-    }
-    else
-    {
-        return PIXMAN_REGION_OUT;
-    }
-}
-
-/* PREFIX(_translate) (region, x, y)
- * translates in place
- */
-
-PIXMAN_EXPORT void
-PREFIX (_translate) (region_type_t *region, int x, int y)
-{
-    overflow_int_t x1, x2, y1, y2;
-    int nbox;
-    box_type_t * pbox;
-
-    GOOD (region);
-    region->extents.x1 = x1 = region->extents.x1 + x;
-    region->extents.y1 = y1 = region->extents.y1 + y;
-    region->extents.x2 = x2 = region->extents.x2 + x;
-    region->extents.y2 = y2 = region->extents.y2 + y;
-    
-    if (((x1 - PIXMAN_REGION_MIN) | (y1 - PIXMAN_REGION_MIN) | (PIXMAN_REGION_MAX - x2) | (PIXMAN_REGION_MAX - y2)) >= 0)
-    {
-        if (region->data && (nbox = region->data->numRects))
-        {
-            for (pbox = PIXREGION_BOXPTR (region); nbox--; pbox++)
-            {
-                pbox->x1 += x;
-                pbox->y1 += y;
-                pbox->x2 += x;
-                pbox->y2 += y;
-	    }
-	}
-        return;
-    }
-
-    if (((x2 - PIXMAN_REGION_MIN) | (y2 - PIXMAN_REGION_MIN) | (PIXMAN_REGION_MAX - x1) | (PIXMAN_REGION_MAX - y1)) <= 0)
-    {
-        region->extents.x2 = region->extents.x1;
-        region->extents.y2 = region->extents.y1;
-        FREE_DATA (region);
-        region->data = pixman_region_empty_data;
-        return;
-    }
-
-    if (x1 < PIXMAN_REGION_MIN)
-	region->extents.x1 = PIXMAN_REGION_MIN;
-    else if (x2 > PIXMAN_REGION_MAX)
-	region->extents.x2 = PIXMAN_REGION_MAX;
-
-    if (y1 < PIXMAN_REGION_MIN)
-	region->extents.y1 = PIXMAN_REGION_MIN;
-    else if (y2 > PIXMAN_REGION_MAX)
-	region->extents.y2 = PIXMAN_REGION_MAX;
-
-    if (region->data && (nbox = region->data->numRects))
-    {
-        box_type_t * pbox_out;
-
-        for (pbox_out = pbox = PIXREGION_BOXPTR (region); nbox--; pbox++)
-        {
-            pbox_out->x1 = x1 = pbox->x1 + x;
-            pbox_out->y1 = y1 = pbox->y1 + y;
-            pbox_out->x2 = x2 = pbox->x2 + x;
-            pbox_out->y2 = y2 = pbox->y2 + y;
-
-            if (((x2 - PIXMAN_REGION_MIN) | (y2 - PIXMAN_REGION_MIN) |
-                 (PIXMAN_REGION_MAX - x1) | (PIXMAN_REGION_MAX - y1)) <= 0)
-            {
-                region->data->numRects--;
-                continue;
-	    }
-
-            if (x1 < PIXMAN_REGION_MIN)
-		pbox_out->x1 = PIXMAN_REGION_MIN;
-            else if (x2 > PIXMAN_REGION_MAX)
-		pbox_out->x2 = PIXMAN_REGION_MAX;
-
-            if (y1 < PIXMAN_REGION_MIN)
-		pbox_out->y1 = PIXMAN_REGION_MIN;
-            else if (y2 > PIXMAN_REGION_MAX)
-		pbox_out->y2 = PIXMAN_REGION_MAX;
-
-            pbox_out++;
-	}
-
-        if (pbox_out != pbox)
-        {
-            if (region->data->numRects == 1)
-            {
-                region->extents = *PIXREGION_BOXPTR (region);
-                FREE_DATA (region);
-                region->data = (region_data_type_t *)NULL;
-	    }
-            else
-	    {
-		pixman_set_extents (region);
-	    }
-	}
-    }
-
-    GOOD (region);
-}
-
-PIXMAN_EXPORT void
-PREFIX (_reset) (region_type_t *region, const box_type_t *box)
-{
-    GOOD (region);
-
-    critical_if_fail (GOOD_RECT (box));
-
-    region->extents = *box;
-
-    FREE_DATA (region);
-
-    region->data = NULL;
-}
-
-PIXMAN_EXPORT void
-PREFIX (_clear) (region_type_t *region)
-{
-    GOOD (region);
-    FREE_DATA (region);
-
-    region->extents = *pixman_region_empty_box;
-    region->data = pixman_region_empty_data;
-}
-
-/* box is "return" value */
-PIXMAN_EXPORT int
-PREFIX (_contains_point) (const region_type_t * region,
-                          int x, int y,
-                          box_type_t * box)
-{
-    box_type_t *pbox, *pbox_end;
-    int numRects;
-
-    GOOD (region);
-    numRects = PIXREGION_NUMRECTS (region);
-
-    if (!numRects || !INBOX (&region->extents, x, y))
-	return(FALSE);
-
-    if (numRects == 1)
-    {
-        if (box)
-	    *box = region->extents;
-
-        return(TRUE);
-    }
-
-    pbox = PIXREGION_BOXPTR (region);
-    pbox_end = pbox + numRects;
-
-    pbox = find_box_for_y (pbox, pbox_end, y);
-
-    for (;pbox != pbox_end; pbox++)
-    {
-        if ((y < pbox->y1) || (x < pbox->x1))
-	    break;              /* missed it */
-
-        if (x >= pbox->x2)
-	    continue;           /* not there yet */
-
-        if (box)
-	    *box = *pbox;
-
-        return(TRUE);
-    }
-
-    return(FALSE);
-}
-
-PIXMAN_EXPORT int
-PREFIX (_empty) (const region_type_t * region)
-{
-    GOOD (region);
-
-    return(PIXREGION_NIL (region));
-}
-
-PIXMAN_EXPORT int
-PREFIX (_not_empty) (const region_type_t * region)
-{
-    GOOD (region);
-
-    return(!PIXREGION_NIL (region));
-}
-
-PIXMAN_EXPORT box_type_t *
-PREFIX (_extents) (const region_type_t * region)
-{
-    GOOD (region);
-
-    return(box_type_t *)(&region->extents);
-}
-
-/*
- * Clip a list of scanlines to a region.  The caller has allocated the
- * space.  FSorted is non-zero if the scanline origins are in ascending order.
- *
- * returns the number of new, clipped scanlines.
- */
-
-PIXMAN_EXPORT pixman_bool_t
-PREFIX (_selfcheck) (region_type_t *reg)
-{
-    int i, numRects;
-
-    if ((reg->extents.x1 > reg->extents.x2) ||
-        (reg->extents.y1 > reg->extents.y2))
-    {
-	return FALSE;
-    }
-
-    numRects = PIXREGION_NUMRECTS (reg);
-    if (!numRects)
-    {
-	return ((reg->extents.x1 == reg->extents.x2) &&
-	        (reg->extents.y1 == reg->extents.y2) &&
-	        (reg->data->size || (reg->data == pixman_region_empty_data)));
-    }
-    else if (numRects == 1)
-    {
-	return (!reg->data);
-    }
-    else
-    {
-        box_type_t * pbox_p, * pbox_n;
-        box_type_t box;
-
-        pbox_p = PIXREGION_RECTS (reg);
-        box = *pbox_p;
-        box.y2 = pbox_p[numRects - 1].y2;
-        pbox_n = pbox_p + 1;
-
-        for (i = numRects; --i > 0; pbox_p++, pbox_n++)
-        {
-            if ((pbox_n->x1 >= pbox_n->x2) ||
-                (pbox_n->y1 >= pbox_n->y2))
-	    {
-		return FALSE;
-	    }
-
-            if (pbox_n->x1 < box.x1)
-		box.x1 = pbox_n->x1;
-	    
-            if (pbox_n->x2 > box.x2)
-		box.x2 = pbox_n->x2;
-	    
-            if ((pbox_n->y1 < pbox_p->y1) ||
-                ((pbox_n->y1 == pbox_p->y1) &&
-                 ((pbox_n->x1 < pbox_p->x2) || (pbox_n->y2 != pbox_p->y2))))
-	    {
-		return FALSE;
-	    }
-	}
-
-        return ((box.x1 == reg->extents.x1) &&
-                (box.x2 == reg->extents.x2) &&
-                (box.y1 == reg->extents.y1) &&
-                (box.y2 == reg->extents.y2));
-    }
-}
-
-PIXMAN_EXPORT pixman_bool_t
-PREFIX (_init_rects) (region_type_t *region,
-                      const box_type_t *boxes, int count)
-{
-    box_type_t *rects;
-    int displacement;
-    int i;
-
-    /* if it's 1, then we just want to set the extents, so call
-     * the existing method. */
-    if (count == 1)
-    {
-        PREFIX (_init_rect) (region,
-                             boxes[0].x1,
-                             boxes[0].y1,
-                             boxes[0].x2 - boxes[0].x1,
-                             boxes[0].y2 - boxes[0].y1);
-        return TRUE;
-    }
-
-    PREFIX (_init) (region);
-
-    /* if it's 0, don't call pixman_rect_alloc -- 0 rectangles is
-     * a special case, and causing pixman_rect_alloc would cause
-     * us to leak memory (because the 0-rect case should be the
-     * static pixman_region_empty_data data).
-     */
-    if (count == 0)
-	return TRUE;
-
-    if (!pixman_rect_alloc (region, count))
-	return FALSE;
-
-    rects = PIXREGION_RECTS (region);
-
-    /* Copy in the rects */
-    memcpy (rects, boxes, sizeof(box_type_t) * count);
-    region->data->numRects = count;
-
-    /* Eliminate empty and malformed rectangles */
-    displacement = 0;
-
-    for (i = 0; i < count; ++i)
-    {
-        box_type_t *box = &rects[i];
-
-        if (box->x1 >= box->x2 || box->y1 >= box->y2)
-	    displacement++;
-        else if (displacement)
-	    rects[i - displacement] = rects[i];
-    }
-
-    region->data->numRects -= displacement;
-
-    /* If eliminating empty rectangles caused there
-     * to be only 0 or 1 rectangles, deal with that.
-     */
-    if (region->data->numRects == 0)
-    {
-        FREE_DATA (region);
-        PREFIX (_init) (region);
-
-        return TRUE;
-    }
-
-    if (region->data->numRects == 1)
-    {
-        region->extents = rects[0];
-
-        FREE_DATA (region);
-        region->data = NULL;
-
-        GOOD (region);
-
-        return TRUE;
-    }
-
-    /* Validate */
-    region->extents.x1 = region->extents.x2 = 0;
-
-    return validate (region);
-}
-
-#define READ(_ptr) (*(_ptr))
-
-static inline box_type_t *
-bitmap_addrect (region_type_t *reg,
-                box_type_t *r,
-                box_type_t **first_rect,
-                int rx1, int ry1,
-                int rx2, int ry2)
-{
-    if ((rx1 < rx2) && (ry1 < ry2) &&
-	(!(reg->data->numRects &&
-	   ((r-1)->y1 == ry1) && ((r-1)->y2 == ry2) &&
-	   ((r-1)->x1 <= rx1) && ((r-1)->x2 >= rx2))))
-    {
-	if (reg->data->numRects == reg->data->size)
-	{
-	    if (!pixman_rect_alloc (reg, 1))
-		return NULL;
-	    *first_rect = PIXREGION_BOXPTR(reg);
-	    r = *first_rect + reg->data->numRects;
-	}
-	r->x1 = rx1;
-	r->y1 = ry1;
-	r->x2 = rx2;
-	r->y2 = ry2;
-	reg->data->numRects++;
-	if (r->x1 < reg->extents.x1)
-	    reg->extents.x1 = r->x1;
-	if (r->x2 > reg->extents.x2)
-	    reg->extents.x2 = r->x2;
-	r++;
-    }
-    return r;
-}
-
-/* Convert bitmap clip mask into clipping region.
- * First, goes through each line and makes boxes by noting the transitions
- * from 0 to 1 and 1 to 0.
- * Then it coalesces the current line with the previous if they have boxes
- * at the same X coordinates.
- * Stride is in number of uint32_t per line.
- */
-PIXMAN_EXPORT void
-PREFIX (_init_from_image) (region_type_t *region,
-                           pixman_image_t *image)
-{
-    uint32_t mask0 = 0xffffffff & ~SCREEN_SHIFT_RIGHT(0xffffffff, 1);
-    box_type_t *first_rect, *rects, *prect_line_start;
-    box_type_t *old_rect, *new_rect;
-    uint32_t *pw, w, *pw_line, *pw_line_end;
-    int	irect_prev_start, irect_line_start;
-    int	h, base, rx1 = 0, crects;
-    int	ib;
-    pixman_bool_t in_box, same;
-    int width, height, stride;
-
-    PREFIX(_init) (region);
-
-    critical_if_fail (region->data);
-
-    return_if_fail (image->type == BITS);
-    return_if_fail (image->bits.format == PIXMAN_a1);
-
-    pw_line = pixman_image_get_data (image);
-    width = pixman_image_get_width (image);
-    height = pixman_image_get_height (image);
-    stride = pixman_image_get_stride (image) / 4;
-
-    first_rect = PIXREGION_BOXPTR(region);
-    rects = first_rect;
-
-    region->extents.x1 = width - 1;
-    region->extents.x2 = 0;
-    irect_prev_start = -1;
-    for (h = 0; h < height; h++)
-    {
-        pw = pw_line;
-        pw_line += stride;
-        irect_line_start = rects - first_rect;
-
-        /* If the Screen left most bit of the word is set, we're starting in
-         * a box */
-        if (READ(pw) & mask0)
-        {
-            in_box = TRUE;
-            rx1 = 0;
-        }
-        else
-        {
-            in_box = FALSE;
-        }
-
-        /* Process all words which are fully in the pixmap */
-        pw_line_end = pw + (width >> 5);
-        for (base = 0; pw < pw_line_end; base += 32)
-        {
-            w = READ(pw++);
-            if (in_box)
-            {
-                if (!~w)
-                    continue;
-            }
-            else
-            {
-                if (!w)
-                    continue;
-            }
-            for (ib = 0; ib < 32; ib++)
-            {
-                /* If the Screen left most bit of the word is set, we're
-                 * starting a box */
-                if (w & mask0)
-                {
-                    if (!in_box)
-                    {
-                        rx1 = base + ib;
-                        /* start new box */
-                        in_box = TRUE;
-                    }
-                }
-                else
-                {
-                    if (in_box)
-                    {
-                        /* end box */
-                        rects = bitmap_addrect (region, rects, &first_rect,
-                                                rx1, h, base + ib, h + 1);
-                        if (rects == NULL)
-                            goto error;
-                        in_box = FALSE;
-                    }
-                }
-                /* Shift the word VISUALLY left one. */
-                w = SCREEN_SHIFT_LEFT(w, 1);
-            }
-        }
-
-        if (width & 31)
-        {
-            /* Process final partial word on line */
-             w = READ(pw++);
-            for (ib = 0; ib < (width & 31); ib++)
-            {
-                /* If the Screen left most bit of the word is set, we're
-                 * starting a box */
-                if (w & mask0)
-                {
-                    if (!in_box)
-                    {
-                        rx1 = base + ib;
-                        /* start new box */
-                        in_box = TRUE;
-                    }
-                }
-                else
-                {
-                    if (in_box)
-                    {
-                        /* end box */
-                        rects = bitmap_addrect(region, rects, &first_rect,
-					       rx1, h, base + ib, h + 1);
-			if (rects == NULL)
-			    goto error;
-                        in_box = FALSE;
-                    }
-                }
-                /* Shift the word VISUALLY left one. */
-                w = SCREEN_SHIFT_LEFT(w, 1);
-            }
-        }
-        /* If scanline ended with last bit set, end the box */
-        if (in_box)
-        {
-            rects = bitmap_addrect(region, rects, &first_rect,
-				   rx1, h, base + (width & 31), h + 1);
-	    if (rects == NULL)
-		goto error;
-        }
-        /* if all rectangles on this line have the same x-coords as
-         * those on the previous line, then add 1 to all the previous  y2s and
-         * throw away all the rectangles from this line
-         */
-        same = FALSE;
-        if (irect_prev_start != -1)
-        {
-            crects = irect_line_start - irect_prev_start;
-            if (crects != 0 &&
-                crects == ((rects - first_rect) - irect_line_start))
-            {
-                old_rect = first_rect + irect_prev_start;
-                new_rect = prect_line_start = first_rect + irect_line_start;
-                same = TRUE;
-                while (old_rect < prect_line_start)
-                {
-                    if ((old_rect->x1 != new_rect->x1) ||
-                        (old_rect->x2 != new_rect->x2))
-                    {
-                          same = FALSE;
-                          break;
-                    }
-                    old_rect++;
-                    new_rect++;
-                }
-                if (same)
-                {
-                    old_rect = first_rect + irect_prev_start;
-                    while (old_rect < prect_line_start)
-                    {
-                        old_rect->y2 += 1;
-                        old_rect++;
-                    }
-                    rects -= crects;
-                    region->data->numRects -= crects;
-                }
-            }
-        }
-        if(!same)
-            irect_prev_start = irect_line_start;
-    }
-    if (!region->data->numRects)
-    {
-        region->extents.x1 = region->extents.x2 = 0;
-    }
-    else
-    {
-        region->extents.y1 = PIXREGION_BOXPTR(region)->y1;
-        region->extents.y2 = PIXREGION_END(region)->y2;
-        if (region->data->numRects == 1)
-        {
-            free (region->data);
-            region->data = NULL;
-        }
-    }
-
- error:
-    return;
-}
diff --git a/vendor/pixman/pixman/pixman-region16.c b/vendor/pixman/pixman/pixman-region16.c
deleted file mode 100644
index da4719e7a..000000000
--- a/vendor/pixman/pixman/pixman-region16.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright © 2008 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software
- * and its documentation for any purpose is hereby granted without
- * fee, provided that the above copyright notice appear in all copies
- * and that both that copyright notice and this permission notice
- * appear in supporting documentation, and that the name of
- * Red Hat, Inc. not be used in advertising or publicity pertaining to
- * distribution of the software without specific, written prior
- * permission. Red Hat, Inc. makes no representations about the
- * suitability of this software for any purpose.  It is provided "as
- * is" without express or implied warranty.
- *
- * RED HAT, INC. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL RED HAT, INC. BE LIABLE FOR ANY SPECIAL,
- * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
- * RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR
- * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Author: Soren Sandmann <sandmann@redhat.com>
- */
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#undef PIXMAN_DISABLE_DEPRECATED
-
-#include "pixman-private.h"
-
-#include <stdlib.h>
-
-typedef pixman_box16_t		box_type_t;
-typedef pixman_region16_data_t	region_data_type_t;
-typedef pixman_region16_t	region_type_t;
-typedef int32_t                 overflow_int_t;
-
-typedef struct {
-    int x, y;
-} point_type_t;
-
-#define PREFIX(x) pixman_region##x
-
-#define PIXMAN_REGION_MAX INT16_MAX
-#define PIXMAN_REGION_MIN INT16_MIN
-
-#include "pixman-region.c"
-
-/* This function exists only to make it possible to preserve the X ABI -
- * it should go away at first opportunity.
- *
- * The problem is that the X ABI exports the three structs and has used
- * them through macros. So the X server calls this function with
- * the addresses of those structs which makes the existing code continue to
- * work.
- */
-PIXMAN_EXPORT void
-pixman_region_set_static_pointers (pixman_box16_t *empty_box,
-				   pixman_region16_data_t *empty_data,
-				   pixman_region16_data_t *broken_data)
-{
-    pixman_region_empty_box = empty_box;
-    pixman_region_empty_data = empty_data;
-    pixman_broken_data = broken_data;
-}
diff --git a/vendor/pixman/pixman/pixman-region32.c b/vendor/pixman/pixman/pixman-region32.c
deleted file mode 100644
index 68b456bf3..000000000
--- a/vendor/pixman/pixman/pixman-region32.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright © 2008 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software
- * and its documentation for any purpose is hereby granted without
- * fee, provided that the above copyright notice appear in all copies
- * and that both that copyright notice and this permission notice
- * appear in supporting documentation, and that the name of
- * Red Hat, Inc. not be used in advertising or publicity pertaining to
- * distribution of the software without specific, written prior
- * permission. Red Hat, Inc. makes no representations about the
- * suitability of this software for any purpose.  It is provided "as
- * is" without express or implied warranty.
- *
- * RED HAT, INC. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL RED HAT, INC. BE LIABLE FOR ANY SPECIAL,
- * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
- * RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR
- * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Author: Soren Sandmann <sandmann@redhat.com>
- */
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include "pixman-private.h"
-
-#include <stdlib.h>
-
-typedef pixman_box32_t		box_type_t;
-typedef pixman_region32_data_t	region_data_type_t;
-typedef pixman_region32_t	region_type_t;
-typedef int64_t                 overflow_int_t;
-
-typedef struct {
-    int x, y;
-} point_type_t;
-
-#define PREFIX(x) pixman_region32##x
-
-#define PIXMAN_REGION_MAX INT32_MAX
-#define PIXMAN_REGION_MIN INT32_MIN
-
-#include "pixman-region.c"
diff --git a/vendor/pixman/pixman/pixman-solid-fill.c b/vendor/pixman/pixman/pixman-solid-fill.c
deleted file mode 100644
index 44f4de07a..000000000
--- a/vendor/pixman/pixman/pixman-solid-fill.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007, 2009 Red Hat, Inc.
- * Copyright © 2009 Soren Sandmann
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  SuSE makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-#include "pixman-private.h"
-
-static uint32_t
-color_to_uint32 (const pixman_color_t *color)
-{
-    return
-        ((unsigned int) color->alpha >> 8 << 24) |
-        ((unsigned int) color->red >> 8 << 16) |
-        ((unsigned int) color->green & 0xff00) |
-        ((unsigned int) color->blue >> 8);
-}
-
-static argb_t
-color_to_float (const pixman_color_t *color)
-{
-    argb_t result;
-
-    result.a = pixman_unorm_to_float (color->alpha, 16);
-    result.r = pixman_unorm_to_float (color->red, 16);
-    result.g = pixman_unorm_to_float (color->green, 16);
-    result.b = pixman_unorm_to_float (color->blue, 16);
-
-    return result;
-}
-
-PIXMAN_EXPORT pixman_image_t *
-pixman_image_create_solid_fill (const pixman_color_t *color)
-{
-    pixman_image_t *img = _pixman_image_allocate ();
-
-    if (!img)
-	return NULL;
-
-    img->type = SOLID;
-    img->solid.color = *color;
-    img->solid.color_32 = color_to_uint32 (color);
-    img->solid.color_float = color_to_float (color);
-
-    return img;
-}
-
diff --git a/vendor/pixman/pixman/pixman-sse2.c b/vendor/pixman/pixman/pixman-sse2.c
deleted file mode 100644
index 60825375f..000000000
--- a/vendor/pixman/pixman/pixman-sse2.c
+++ /dev/null
@@ -1,6528 +0,0 @@
-/*
- * Copyright © 2008 Rodrigo Kumpera
- * Copyright © 2008 André Tupinambá
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Red Hat not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  Red Hat makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author:  Rodrigo Kumpera (kumpera@gmail.com)
- *          André Tupinambá (andrelrt@gmail.com)
- *
- * Based on work by Owen Taylor and Søren Sandmann
- */
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-/* PSHUFD is slow on a lot of old processors, and new processors have SSSE3 */
-#define PSHUFD_IS_FAST 0
-
-#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
-#include <emmintrin.h> /* for SSE2 intrinsics */
-#include "pixman-private.h"
-#include "pixman-combine32.h"
-#include "pixman-inlines.h"
-
-static __m128i mask_0080;
-static __m128i mask_00ff;
-static __m128i mask_0101;
-static __m128i mask_ffff;
-static __m128i mask_ff000000;
-static __m128i mask_alpha;
-
-static __m128i mask_565_r;
-static __m128i mask_565_g1, mask_565_g2;
-static __m128i mask_565_b;
-static __m128i mask_red;
-static __m128i mask_green;
-static __m128i mask_blue;
-
-static __m128i mask_565_fix_rb;
-static __m128i mask_565_fix_g;
-
-static __m128i mask_565_rb;
-static __m128i mask_565_pack_multiplier;
-
-static force_inline __m128i
-unpack_32_1x128 (uint32_t data)
-{
-    return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128 ());
-}
-
-static force_inline void
-unpack_128_2x128 (__m128i data, __m128i* data_lo, __m128i* data_hi)
-{
-    *data_lo = _mm_unpacklo_epi8 (data, _mm_setzero_si128 ());
-    *data_hi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ());
-}
-
-static force_inline __m128i
-unpack_565_to_8888 (__m128i lo)
-{
-    __m128i r, g, b, rb, t;
-
-    r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), mask_red);
-    g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), mask_green);
-    b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), mask_blue);
-
-    rb = _mm_or_si128 (r, b);
-    t  = _mm_and_si128 (rb, mask_565_fix_rb);
-    t  = _mm_srli_epi32 (t, 5);
-    rb = _mm_or_si128 (rb, t);
-
-    t  = _mm_and_si128 (g, mask_565_fix_g);
-    t  = _mm_srli_epi32 (t, 6);
-    g  = _mm_or_si128 (g, t);
-
-    return _mm_or_si128 (rb, g);
-}
-
-static force_inline void
-unpack_565_128_4x128 (__m128i  data,
-                      __m128i* data0,
-                      __m128i* data1,
-                      __m128i* data2,
-                      __m128i* data3)
-{
-    __m128i lo, hi;
-
-    lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ());
-    hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ());
-
-    lo = unpack_565_to_8888 (lo);
-    hi = unpack_565_to_8888 (hi);
-
-    unpack_128_2x128 (lo, data0, data1);
-    unpack_128_2x128 (hi, data2, data3);
-}
-
-static force_inline uint16_t
-pack_565_32_16 (uint32_t pixel)
-{
-    return (uint16_t) (((pixel >> 8) & 0xf800) |
-		       ((pixel >> 5) & 0x07e0) |
-		       ((pixel >> 3) & 0x001f));
-}
-
-static force_inline __m128i
-pack_2x128_128 (__m128i lo, __m128i hi)
-{
-    return _mm_packus_epi16 (lo, hi);
-}
-
-static force_inline __m128i
-pack_565_2packedx128_128 (__m128i lo, __m128i hi)
-{
-    __m128i rb0 = _mm_and_si128 (lo, mask_565_rb);
-    __m128i rb1 = _mm_and_si128 (hi, mask_565_rb);
-
-    __m128i t0 = _mm_madd_epi16 (rb0, mask_565_pack_multiplier);
-    __m128i t1 = _mm_madd_epi16 (rb1, mask_565_pack_multiplier);
-
-    __m128i g0 = _mm_and_si128 (lo, mask_green);
-    __m128i g1 = _mm_and_si128 (hi, mask_green);
-
-    t0 = _mm_or_si128 (t0, g0);
-    t1 = _mm_or_si128 (t1, g1);
-
-    /* Simulates _mm_packus_epi32 */
-    t0 = _mm_slli_epi32 (t0, 16 - 5);
-    t1 = _mm_slli_epi32 (t1, 16 - 5);
-    t0 = _mm_srai_epi32 (t0, 16);
-    t1 = _mm_srai_epi32 (t1, 16);
-    return _mm_packs_epi32 (t0, t1);
-}
-
-static force_inline __m128i
-pack_565_2x128_128 (__m128i lo, __m128i hi)
-{
-    __m128i data;
-    __m128i r, g1, g2, b;
-
-    data = pack_2x128_128 (lo, hi);
-
-    r  = _mm_and_si128 (data, mask_565_r);
-    g1 = _mm_and_si128 (_mm_slli_epi32 (data, 3), mask_565_g1);
-    g2 = _mm_and_si128 (_mm_srli_epi32 (data, 5), mask_565_g2);
-    b  = _mm_and_si128 (_mm_srli_epi32 (data, 3), mask_565_b);
-
-    return _mm_or_si128 (_mm_or_si128 (_mm_or_si128 (r, g1), g2), b);
-}
-
-static force_inline __m128i
-pack_565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3)
-{
-    return _mm_packus_epi16 (pack_565_2x128_128 (*xmm0, *xmm1),
-			     pack_565_2x128_128 (*xmm2, *xmm3));
-}
-
-static force_inline int
-is_opaque (__m128i x)
-{
-    __m128i ffs = _mm_cmpeq_epi8 (x, x);
-
-    return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888;
-}
-
-static force_inline int
-is_zero (__m128i x)
-{
-    return _mm_movemask_epi8 (
-	_mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) == 0xffff;
-}
-
-static force_inline int
-is_transparent (__m128i x)
-{
-    return (_mm_movemask_epi8 (
-		_mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) & 0x8888) == 0x8888;
-}
-
-static force_inline __m128i
-expand_pixel_32_1x128 (uint32_t data)
-{
-    return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE (1, 0, 1, 0));
-}
-
-static force_inline __m128i
-expand_alpha_1x128 (__m128i data)
-{
-    return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data,
-						     _MM_SHUFFLE (3, 3, 3, 3)),
-				_MM_SHUFFLE (3, 3, 3, 3));
-}
-
-static force_inline void
-expand_alpha_2x128 (__m128i  data_lo,
-                    __m128i  data_hi,
-                    __m128i* alpha_lo,
-                    __m128i* alpha_hi)
-{
-    __m128i lo, hi;
-
-    lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 3, 3, 3));
-    hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 3, 3, 3));
-
-    *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 3, 3, 3));
-    *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 3, 3, 3));
-}
-
-static force_inline void
-expand_alpha_rev_2x128 (__m128i  data_lo,
-                        __m128i  data_hi,
-                        __m128i* alpha_lo,
-                        __m128i* alpha_hi)
-{
-    __m128i lo, hi;
-
-    lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (0, 0, 0, 0));
-    hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (0, 0, 0, 0));
-    *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (0, 0, 0, 0));
-    *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (0, 0, 0, 0));
-}
-
-static force_inline void
-pix_multiply_2x128 (__m128i* data_lo,
-                    __m128i* data_hi,
-                    __m128i* alpha_lo,
-                    __m128i* alpha_hi,
-                    __m128i* ret_lo,
-                    __m128i* ret_hi)
-{
-    __m128i lo, hi;
-
-    lo = _mm_mullo_epi16 (*data_lo, *alpha_lo);
-    hi = _mm_mullo_epi16 (*data_hi, *alpha_hi);
-    lo = _mm_adds_epu16 (lo, mask_0080);
-    hi = _mm_adds_epu16 (hi, mask_0080);
-    *ret_lo = _mm_mulhi_epu16 (lo, mask_0101);
-    *ret_hi = _mm_mulhi_epu16 (hi, mask_0101);
-}
-
-static force_inline void
-pix_add_multiply_2x128 (__m128i* src_lo,
-                        __m128i* src_hi,
-                        __m128i* alpha_dst_lo,
-                        __m128i* alpha_dst_hi,
-                        __m128i* dst_lo,
-                        __m128i* dst_hi,
-                        __m128i* alpha_src_lo,
-                        __m128i* alpha_src_hi,
-                        __m128i* ret_lo,
-                        __m128i* ret_hi)
-{
-    __m128i t1_lo, t1_hi;
-    __m128i t2_lo, t2_hi;
-
-    pix_multiply_2x128 (src_lo, src_hi, alpha_dst_lo, alpha_dst_hi, &t1_lo, &t1_hi);
-    pix_multiply_2x128 (dst_lo, dst_hi, alpha_src_lo, alpha_src_hi, &t2_lo, &t2_hi);
-
-    *ret_lo = _mm_adds_epu8 (t1_lo, t2_lo);
-    *ret_hi = _mm_adds_epu8 (t1_hi, t2_hi);
-}
-
-static force_inline void
-negate_2x128 (__m128i  data_lo,
-              __m128i  data_hi,
-              __m128i* neg_lo,
-              __m128i* neg_hi)
-{
-    *neg_lo = _mm_xor_si128 (data_lo, mask_00ff);
-    *neg_hi = _mm_xor_si128 (data_hi, mask_00ff);
-}
-
-static force_inline void
-invert_colors_2x128 (__m128i  data_lo,
-                     __m128i  data_hi,
-                     __m128i* inv_lo,
-                     __m128i* inv_hi)
-{
-    __m128i lo, hi;
-
-    lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 0, 1, 2));
-    hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 0, 1, 2));
-    *inv_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 0, 1, 2));
-    *inv_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 0, 1, 2));
-}
-
-static force_inline void
-over_2x128 (__m128i* src_lo,
-            __m128i* src_hi,
-            __m128i* alpha_lo,
-            __m128i* alpha_hi,
-            __m128i* dst_lo,
-            __m128i* dst_hi)
-{
-    __m128i t1, t2;
-
-    negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2);
-
-    pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi);
-
-    *dst_lo = _mm_adds_epu8 (*src_lo, *dst_lo);
-    *dst_hi = _mm_adds_epu8 (*src_hi, *dst_hi);
-}
-
-static force_inline void
-over_rev_non_pre_2x128 (__m128i  src_lo,
-                        __m128i  src_hi,
-                        __m128i* dst_lo,
-                        __m128i* dst_hi)
-{
-    __m128i lo, hi;
-    __m128i alpha_lo, alpha_hi;
-
-    expand_alpha_2x128 (src_lo, src_hi, &alpha_lo, &alpha_hi);
-
-    lo = _mm_or_si128 (alpha_lo, mask_alpha);
-    hi = _mm_or_si128 (alpha_hi, mask_alpha);
-
-    invert_colors_2x128 (src_lo, src_hi, &src_lo, &src_hi);
-
-    pix_multiply_2x128 (&src_lo, &src_hi, &lo, &hi, &lo, &hi);
-
-    over_2x128 (&lo, &hi, &alpha_lo, &alpha_hi, dst_lo, dst_hi);
-}
-
-static force_inline void
-in_over_2x128 (__m128i* src_lo,
-               __m128i* src_hi,
-               __m128i* alpha_lo,
-               __m128i* alpha_hi,
-               __m128i* mask_lo,
-               __m128i* mask_hi,
-               __m128i* dst_lo,
-               __m128i* dst_hi)
-{
-    __m128i s_lo, s_hi;
-    __m128i a_lo, a_hi;
-
-    pix_multiply_2x128 (src_lo,   src_hi, mask_lo, mask_hi, &s_lo, &s_hi);
-    pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi);
-
-    over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);
-}
-
-/* load 4 pixels from a 16-byte boundary aligned address */
-static force_inline __m128i
-load_128_aligned (__m128i* src)
-{
-    return _mm_load_si128 (src);
-}
-
-/* load 4 pixels from a unaligned address */
-static force_inline __m128i
-load_128_unaligned (const __m128i* src)
-{
-    return _mm_loadu_si128 (src);
-}
-
-/* save 4 pixels using Write Combining memory on a 16-byte
- * boundary aligned address
- */
-static force_inline void
-save_128_write_combining (__m128i* dst,
-                          __m128i  data)
-{
-    _mm_stream_si128 (dst, data);
-}
-
-/* save 4 pixels on a 16-byte boundary aligned address */
-static force_inline void
-save_128_aligned (__m128i* dst,
-                  __m128i  data)
-{
-    _mm_store_si128 (dst, data);
-}
-
-/* save 4 pixels on a unaligned address */
-static force_inline void
-save_128_unaligned (__m128i* dst,
-                    __m128i  data)
-{
-    _mm_storeu_si128 (dst, data);
-}
-
-static force_inline __m128i
-load_32_1x128 (uint32_t data)
-{
-    return _mm_cvtsi32_si128 (data);
-}
-
-static force_inline __m128i
-expand_alpha_rev_1x128 (__m128i data)
-{
-    return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (0, 0, 0, 0));
-}
-
-static force_inline __m128i
-expand_pixel_8_1x128 (uint8_t data)
-{
-    return _mm_shufflelo_epi16 (
-	unpack_32_1x128 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0));
-}
-
-static force_inline __m128i
-pix_multiply_1x128 (__m128i data,
-		    __m128i alpha)
-{
-    return _mm_mulhi_epu16 (_mm_adds_epu16 (_mm_mullo_epi16 (data, alpha),
-					    mask_0080),
-			    mask_0101);
-}
-
-static force_inline __m128i
-pix_add_multiply_1x128 (__m128i* src,
-			__m128i* alpha_dst,
-			__m128i* dst,
-			__m128i* alpha_src)
-{
-    __m128i t1 = pix_multiply_1x128 (*src, *alpha_dst);
-    __m128i t2 = pix_multiply_1x128 (*dst, *alpha_src);
-
-    return _mm_adds_epu8 (t1, t2);
-}
-
-static force_inline __m128i
-negate_1x128 (__m128i data)
-{
-    return _mm_xor_si128 (data, mask_00ff);
-}
-
-static force_inline __m128i
-invert_colors_1x128 (__m128i data)
-{
-    return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (3, 0, 1, 2));
-}
-
-static force_inline __m128i
-over_1x128 (__m128i src, __m128i alpha, __m128i dst)
-{
-    return _mm_adds_epu8 (src, pix_multiply_1x128 (dst, negate_1x128 (alpha)));
-}
-
-static force_inline __m128i
-in_over_1x128 (__m128i* src, __m128i* alpha, __m128i* mask, __m128i* dst)
-{
-    return over_1x128 (pix_multiply_1x128 (*src, *mask),
-		       pix_multiply_1x128 (*alpha, *mask),
-		       *dst);
-}
-
-static force_inline __m128i
-over_rev_non_pre_1x128 (__m128i src, __m128i dst)
-{
-    __m128i alpha = expand_alpha_1x128 (src);
-
-    return over_1x128 (pix_multiply_1x128 (invert_colors_1x128 (src),
-					   _mm_or_si128 (alpha, mask_alpha)),
-		       alpha,
-		       dst);
-}
-
-static force_inline uint32_t
-pack_1x128_32 (__m128i data)
-{
-    return _mm_cvtsi128_si32 (_mm_packus_epi16 (data, _mm_setzero_si128 ()));
-}
-
-static force_inline __m128i
-expand565_16_1x128 (uint16_t pixel)
-{
-    __m128i m = _mm_cvtsi32_si128 (pixel);
-
-    m = unpack_565_to_8888 (m);
-
-    return _mm_unpacklo_epi8 (m, _mm_setzero_si128 ());
-}
-
-static force_inline uint32_t
-core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)
-{
-    uint8_t a;
-    __m128i xmms;
-
-    a = src >> 24;
-
-    if (a == 0xff)
-    {
-	return src;
-    }
-    else if (src)
-    {
-	xmms = unpack_32_1x128 (src);
-	return pack_1x128_32 (
-	    over_1x128 (xmms, expand_alpha_1x128 (xmms),
-			unpack_32_1x128 (dst)));
-    }
-
-    return dst;
-}
-
-static force_inline uint32_t
-combine1 (const uint32_t *ps, const uint32_t *pm)
-{
-    uint32_t s;
-    memcpy(&s, ps, sizeof(uint32_t));
-
-    if (pm)
-    {
-	__m128i ms, mm;
-
-	mm = unpack_32_1x128 (*pm);
-	mm = expand_alpha_1x128 (mm);
-
-	ms = unpack_32_1x128 (s);
-	ms = pix_multiply_1x128 (ms, mm);
-
-	s = pack_1x128_32 (ms);
-    }
-
-    return s;
-}
-
-static force_inline __m128i
-combine4 (const __m128i *ps, const __m128i *pm)
-{
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_msk_lo, xmm_msk_hi;
-    __m128i s;
-
-    if (pm)
-    {
-	xmm_msk_lo = load_128_unaligned (pm);
-
-	if (is_transparent (xmm_msk_lo))
-	    return _mm_setzero_si128 ();
-    }
-
-    s = load_128_unaligned (ps);
-
-    if (pm)
-    {
-	unpack_128_2x128 (s, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_msk_lo, &xmm_msk_lo, &xmm_msk_hi);
-
-	expand_alpha_2x128 (xmm_msk_lo, xmm_msk_hi, &xmm_msk_lo, &xmm_msk_hi);
-
-	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
-			    &xmm_msk_lo, &xmm_msk_hi,
-			    &xmm_src_lo, &xmm_src_hi);
-
-	s = pack_2x128_128 (xmm_src_lo, xmm_src_hi);
-    }
-
-    return s;
-}
-
-static force_inline void
-core_combine_over_u_sse2_mask (uint32_t *	  pd,
-			       const uint32_t*    ps,
-			       const uint32_t*    pm,
-			       int                w)
-{
-    uint32_t s, d;
-
-    /* Align dst on a 16-byte boundary */
-    while (w && ((uintptr_t)pd & 15))
-    {
-	d = *pd;
-	s = combine1 (ps, pm);
-
-	if (s)
-	    *pd = core_combine_over_u_pixel_sse2 (s, d);
-	pd++;
-	ps++;
-	pm++;
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	__m128i mask = load_128_unaligned ((__m128i *)pm);
-
-	if (!is_zero (mask))
-	{
-	    __m128i src;
-	    __m128i src_hi, src_lo;
-	    __m128i mask_hi, mask_lo;
-	    __m128i alpha_hi, alpha_lo;
-
-	    src = load_128_unaligned ((__m128i *)ps);
-
-	    if (is_opaque (_mm_and_si128 (src, mask)))
-	    {
-		save_128_aligned ((__m128i *)pd, src);
-	    }
-	    else
-	    {
-		__m128i dst = load_128_aligned ((__m128i *)pd);
-		__m128i dst_hi, dst_lo;
-
-		unpack_128_2x128 (mask, &mask_lo, &mask_hi);
-		unpack_128_2x128 (src, &src_lo, &src_hi);
-
-		expand_alpha_2x128 (mask_lo, mask_hi, &mask_lo, &mask_hi);
-		pix_multiply_2x128 (&src_lo, &src_hi,
-				    &mask_lo, &mask_hi,
-				    &src_lo, &src_hi);
-
-		unpack_128_2x128 (dst, &dst_lo, &dst_hi);
-
-		expand_alpha_2x128 (src_lo, src_hi,
-				    &alpha_lo, &alpha_hi);
-
-		over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi,
-			    &dst_lo, &dst_hi);
-
-		save_128_aligned (
-		    (__m128i *)pd,
-		    pack_2x128_128 (dst_lo, dst_hi));
-	    }
-	}
-
-	pm += 4;
-	ps += 4;
-	pd += 4;
-	w -= 4;
-    }
-    while (w)
-    {
-	d = *pd;
-	s = combine1 (ps, pm);
-
-	if (s)
-	    *pd = core_combine_over_u_pixel_sse2 (s, d);
-	pd++;
-	ps++;
-	pm++;
-
-	w--;
-    }
-}
-
-static force_inline void
-core_combine_over_u_sse2_no_mask (uint32_t *	  pd,
-				  const uint32_t*    ps,
-				  int                w)
-{
-    uint32_t s, d;
-
-    /* Align dst on a 16-byte boundary */
-    while (w && ((uintptr_t)pd & 15))
-    {
-	d = *pd;
-	s = *ps;
-
-	if (s)
-	    *pd = core_combine_over_u_pixel_sse2 (s, d);
-	pd++;
-	ps++;
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	__m128i src;
-	__m128i src_hi, src_lo, dst_hi, dst_lo;
-	__m128i alpha_hi, alpha_lo;
-
-	src = load_128_unaligned ((__m128i *)ps);
-
-	if (!is_zero (src))
-	{
-	    if (is_opaque (src))
-	    {
-		save_128_aligned ((__m128i *)pd, src);
-	    }
-	    else
-	    {
-		__m128i dst = load_128_aligned ((__m128i *)pd);
-
-		unpack_128_2x128 (src, &src_lo, &src_hi);
-		unpack_128_2x128 (dst, &dst_lo, &dst_hi);
-
-		expand_alpha_2x128 (src_lo, src_hi,
-				    &alpha_lo, &alpha_hi);
-		over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi,
-			    &dst_lo, &dst_hi);
-
-		save_128_aligned (
-		    (__m128i *)pd,
-		    pack_2x128_128 (dst_lo, dst_hi));
-	    }
-	}
-
-	ps += 4;
-	pd += 4;
-	w -= 4;
-    }
-    while (w)
-    {
-	d = *pd;
-	s = *ps;
-
-	if (s)
-	    *pd = core_combine_over_u_pixel_sse2 (s, d);
-	pd++;
-	ps++;
-
-	w--;
-    }
-}
-
-static force_inline void
-sse2_combine_over_u (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               pd,
-                     const uint32_t *         ps,
-                     const uint32_t *         pm,
-                     int                      w)
-{
-    if (pm)
-	core_combine_over_u_sse2_mask (pd, ps, pm, w);
-    else
-	core_combine_over_u_sse2_no_mask (pd, ps, w);
-}
-
-static void
-sse2_combine_over_reverse_u (pixman_implementation_t *imp,
-                             pixman_op_t              op,
-                             uint32_t *               pd,
-                             const uint32_t *         ps,
-                             const uint32_t *         pm,
-                             int                      w)
-{
-    uint32_t s, d;
-
-    __m128i xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_alpha_lo, xmm_alpha_hi;
-
-    /* Align dst on a 16-byte boundary */
-    while (w &&
-           ((uintptr_t)pd & 15))
-    {
-	d = *pd;
-	s = combine1 (ps, pm);
-
-	*pd++ = core_combine_over_u_pixel_sse2 (d, s);
-	w--;
-	ps++;
-	if (pm)
-	    pm++;
-    }
-
-    while (w >= 4)
-    {
-	/* I'm loading unaligned because I'm not sure
-	 * about the address alignment.
-	 */
-	xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
-	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
-
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
-			    &xmm_alpha_lo, &xmm_alpha_hi);
-
-	over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
-		    &xmm_alpha_lo, &xmm_alpha_hi,
-		    &xmm_src_lo, &xmm_src_hi);
-
-	/* rebuid the 4 pixel data and save*/
-	save_128_aligned ((__m128i*)pd,
-			  pack_2x128_128 (xmm_src_lo, xmm_src_hi));
-
-	w -= 4;
-	ps += 4;
-	pd += 4;
-
-	if (pm)
-	    pm += 4;
-    }
-
-    while (w)
-    {
-	d = *pd;
-	s = combine1 (ps, pm);
-
-	*pd++ = core_combine_over_u_pixel_sse2 (d, s);
-	ps++;
-	w--;
-	if (pm)
-	    pm++;
-    }
-}
-
-static force_inline uint32_t
-core_combine_in_u_pixel_sse2 (uint32_t src, uint32_t dst)
-{
-    uint32_t maska = src >> 24;
-
-    if (maska == 0)
-    {
-	return 0;
-    }
-    else if (maska != 0xff)
-    {
-	return pack_1x128_32 (
-	    pix_multiply_1x128 (unpack_32_1x128 (dst),
-				expand_alpha_1x128 (unpack_32_1x128 (src))));
-    }
-
-    return dst;
-}
-
-static void
-sse2_combine_in_u (pixman_implementation_t *imp,
-                   pixman_op_t              op,
-                   uint32_t *               pd,
-                   const uint32_t *         ps,
-                   const uint32_t *         pm,
-                   int                      w)
-{
-    uint32_t s, d;
-
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst_lo, xmm_dst_hi;
-
-    while (w && ((uintptr_t)pd & 15))
-    {
-	s = combine1 (ps, pm);
-	d = *pd;
-
-	*pd++ = core_combine_in_u_pixel_sse2 (d, s);
-	w--;
-	ps++;
-	if (pm)
-	    pm++;
-    }
-
-    while (w >= 4)
-    {
-	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
-	xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm);
-
-	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
-			    &xmm_dst_lo, &xmm_dst_hi,
-			    &xmm_dst_lo, &xmm_dst_hi);
-
-	save_128_aligned ((__m128i*)pd,
-			  pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	ps += 4;
-	pd += 4;
-	w -= 4;
-	if (pm)
-	    pm += 4;
-    }
-
-    while (w)
-    {
-	s = combine1 (ps, pm);
-	d = *pd;
-
-	*pd++ = core_combine_in_u_pixel_sse2 (d, s);
-	w--;
-	ps++;
-	if (pm)
-	    pm++;
-    }
-}
-
-static void
-sse2_combine_in_reverse_u (pixman_implementation_t *imp,
-                           pixman_op_t              op,
-                           uint32_t *               pd,
-                           const uint32_t *         ps,
-                           const uint32_t *         pm,
-                           int                      w)
-{
-    uint32_t s, d;
-
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst_lo, xmm_dst_hi;
-
-    while (w && ((uintptr_t)pd & 15))
-    {
-	s = combine1 (ps, pm);
-	d = *pd;
-
-	*pd++ = core_combine_in_u_pixel_sse2 (s, d);
-	ps++;
-	w--;
-	if (pm)
-	    pm++;
-    }
-
-    while (w >= 4)
-    {
-	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
-	xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
-
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-
-	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
-			    &xmm_src_lo, &xmm_src_hi,
-			    &xmm_dst_lo, &xmm_dst_hi);
-
-	save_128_aligned (
-	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	ps += 4;
-	pd += 4;
-	w -= 4;
-	if (pm)
-	    pm += 4;
-    }
-
-    while (w)
-    {
-	s = combine1 (ps, pm);
-	d = *pd;
-
-	*pd++ = core_combine_in_u_pixel_sse2 (s, d);
-	w--;
-	ps++;
-	if (pm)
-	    pm++;
-    }
-}
-
-static void
-sse2_combine_out_reverse_u (pixman_implementation_t *imp,
-                            pixman_op_t              op,
-                            uint32_t *               pd,
-                            const uint32_t *         ps,
-                            const uint32_t *         pm,
-                            int                      w)
-{
-    while (w && ((uintptr_t)pd & 15))
-    {
-	uint32_t s = combine1 (ps, pm);
-	uint32_t d = *pd;
-
-	*pd++ = pack_1x128_32 (
-	    pix_multiply_1x128 (
-		unpack_32_1x128 (d), negate_1x128 (
-		    expand_alpha_1x128 (unpack_32_1x128 (s)))));
-
-	if (pm)
-	    pm++;
-	ps++;
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	__m128i xmm_src_lo, xmm_src_hi;
-	__m128i xmm_dst_lo, xmm_dst_hi;
-
-	xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
-	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
-
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	negate_2x128       (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-
-	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
-			    &xmm_src_lo, &xmm_src_hi,
-			    &xmm_dst_lo, &xmm_dst_hi);
-
-	save_128_aligned (
-	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	ps += 4;
-	pd += 4;
-	if (pm)
-	    pm += 4;
-
-	w -= 4;
-    }
-
-    while (w)
-    {
-	uint32_t s = combine1 (ps, pm);
-	uint32_t d = *pd;
-
-	*pd++ = pack_1x128_32 (
-	    pix_multiply_1x128 (
-		unpack_32_1x128 (d), negate_1x128 (
-		    expand_alpha_1x128 (unpack_32_1x128 (s)))));
-	ps++;
-	if (pm)
-	    pm++;
-	w--;
-    }
-}
-
-static void
-sse2_combine_out_u (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               pd,
-                    const uint32_t *         ps,
-                    const uint32_t *         pm,
-                    int                      w)
-{
-    while (w && ((uintptr_t)pd & 15))
-    {
-	uint32_t s = combine1 (ps, pm);
-	uint32_t d = *pd;
-
-	*pd++ = pack_1x128_32 (
-	    pix_multiply_1x128 (
-		unpack_32_1x128 (s), negate_1x128 (
-		    expand_alpha_1x128 (unpack_32_1x128 (d)))));
-	w--;
-	ps++;
-	if (pm)
-	    pm++;
-    }
-
-    while (w >= 4)
-    {
-	__m128i xmm_src_lo, xmm_src_hi;
-	__m128i xmm_dst_lo, xmm_dst_hi;
-
-	xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
-	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
-
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-	negate_2x128       (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
-			    &xmm_dst_lo, &xmm_dst_hi,
-			    &xmm_dst_lo, &xmm_dst_hi);
-
-	save_128_aligned (
-	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	ps += 4;
-	pd += 4;
-	w -= 4;
-	if (pm)
-	    pm += 4;
-    }
-
-    while (w)
-    {
-	uint32_t s = combine1 (ps, pm);
-	uint32_t d = *pd;
-
-	*pd++ = pack_1x128_32 (
-	    pix_multiply_1x128 (
-		unpack_32_1x128 (s), negate_1x128 (
-		    expand_alpha_1x128 (unpack_32_1x128 (d)))));
-	w--;
-	ps++;
-	if (pm)
-	    pm++;
-    }
-}
-
-static force_inline uint32_t
-core_combine_atop_u_pixel_sse2 (uint32_t src,
-                                uint32_t dst)
-{
-    __m128i s = unpack_32_1x128 (src);
-    __m128i d = unpack_32_1x128 (dst);
-
-    __m128i sa = negate_1x128 (expand_alpha_1x128 (s));
-    __m128i da = expand_alpha_1x128 (d);
-
-    return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
-}
-
-static void
-sse2_combine_atop_u (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               pd,
-                     const uint32_t *         ps,
-                     const uint32_t *         pm,
-                     int                      w)
-{
-    uint32_t s, d;
-
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
-    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
-
-    while (w && ((uintptr_t)pd & 15))
-    {
-	s = combine1 (ps, pm);
-	d = *pd;
-
-	*pd++ = core_combine_atop_u_pixel_sse2 (s, d);
-	w--;
-	ps++;
-	if (pm)
-	    pm++;
-    }
-
-    while (w >= 4)
-    {
-	xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
-	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
-
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
-			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
-	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
-			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
-	negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
-		      &xmm_alpha_src_lo, &xmm_alpha_src_hi);
-
-	pix_add_multiply_2x128 (
-	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
-	    &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
-	    &xmm_dst_lo, &xmm_dst_hi);
-
-	save_128_aligned (
-	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	ps += 4;
-	pd += 4;
-	w -= 4;
-	if (pm)
-	    pm += 4;
-    }
-
-    while (w)
-    {
-	s = combine1 (ps, pm);
-	d = *pd;
-
-	*pd++ = core_combine_atop_u_pixel_sse2 (s, d);
-	w--;
-	ps++;
-	if (pm)
-	    pm++;
-    }
-}
-
-static force_inline uint32_t
-core_combine_reverse_atop_u_pixel_sse2 (uint32_t src,
-                                        uint32_t dst)
-{
-    __m128i s = unpack_32_1x128 (src);
-    __m128i d = unpack_32_1x128 (dst);
-
-    __m128i sa = expand_alpha_1x128 (s);
-    __m128i da = negate_1x128 (expand_alpha_1x128 (d));
-
-    return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
-}
-
-static void
-sse2_combine_atop_reverse_u (pixman_implementation_t *imp,
-                             pixman_op_t              op,
-                             uint32_t *               pd,
-                             const uint32_t *         ps,
-                             const uint32_t *         pm,
-                             int                      w)
-{
-    uint32_t s, d;
-
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
-    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
-
-    while (w && ((uintptr_t)pd & 15))
-    {
-	s = combine1 (ps, pm);
-	d = *pd;
-
-	*pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
-	ps++;
-	w--;
-	if (pm)
-	    pm++;
-    }
-
-    while (w >= 4)
-    {
-	xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
-	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
-
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
-			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
-	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
-			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
-	negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
-		      &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
-	pix_add_multiply_2x128 (
-	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
-	    &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
-	    &xmm_dst_lo, &xmm_dst_hi);
-
-	save_128_aligned (
-	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	ps += 4;
-	pd += 4;
-	w -= 4;
-	if (pm)
-	    pm += 4;
-    }
-
-    while (w)
-    {
-	s = combine1 (ps, pm);
-	d = *pd;
-
-	*pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
-	ps++;
-	w--;
-	if (pm)
-	    pm++;
-    }
-}
-
-static force_inline uint32_t
-core_combine_xor_u_pixel_sse2 (uint32_t src,
-                               uint32_t dst)
-{
-    __m128i s = unpack_32_1x128 (src);
-    __m128i d = unpack_32_1x128 (dst);
-
-    __m128i neg_d = negate_1x128 (expand_alpha_1x128 (d));
-    __m128i neg_s = negate_1x128 (expand_alpha_1x128 (s));
-
-    return pack_1x128_32 (pix_add_multiply_1x128 (&s, &neg_d, &d, &neg_s));
-}
-
-static void
-sse2_combine_xor_u (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dst,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    int w = width;
-    uint32_t s, d;
-    uint32_t* pd = dst;
-    const uint32_t* ps = src;
-    const uint32_t* pm = mask;
-
-    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
-    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
-
-    while (w && ((uintptr_t)pd & 15))
-    {
-	s = combine1 (ps, pm);
-	d = *pd;
-
-	*pd++ = core_combine_xor_u_pixel_sse2 (s, d);
-	w--;
-	ps++;
-	if (pm)
-	    pm++;
-    }
-
-    while (w >= 4)
-    {
-	xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm);
-	xmm_dst = load_128_aligned ((__m128i*) pd);
-
-	unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
-			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
-	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
-			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
-	negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
-		      &xmm_alpha_src_lo, &xmm_alpha_src_hi);
-	negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
-		      &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
-	pix_add_multiply_2x128 (
-	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
-	    &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
-	    &xmm_dst_lo, &xmm_dst_hi);
-
-	save_128_aligned (
-	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	ps += 4;
-	pd += 4;
-	w -= 4;
-	if (pm)
-	    pm += 4;
-    }
-
-    while (w)
-    {
-	s = combine1 (ps, pm);
-	d = *pd;
-
-	*pd++ = core_combine_xor_u_pixel_sse2 (s, d);
-	w--;
-	ps++;
-	if (pm)
-	    pm++;
-    }
-}
-
-static force_inline void
-sse2_combine_add_u (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dst,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    int w = width;
-    uint32_t s, d;
-    uint32_t* pd = dst;
-    const uint32_t* ps = src;
-    const uint32_t* pm = mask;
-
-    while (w && (uintptr_t)pd & 15)
-    {
-	s = combine1 (ps, pm);
-	d = *pd;
-
-	ps++;
-	if (pm)
-	    pm++;
-	*pd++ = _mm_cvtsi128_si32 (
-	    _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	__m128i s;
-
-	s = combine4 ((__m128i*)ps, (__m128i*)pm);
-
-	save_128_aligned (
-	    (__m128i*)pd, _mm_adds_epu8 (s, load_128_aligned  ((__m128i*)pd)));
-
-	pd += 4;
-	ps += 4;
-	if (pm)
-	    pm += 4;
-	w -= 4;
-    }
-
-    while (w--)
-    {
-	s = combine1 (ps, pm);
-	d = *pd;
-
-	ps++;
-	*pd++ = _mm_cvtsi128_si32 (
-	    _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
-	if (pm)
-	    pm++;
-    }
-}
-
-static force_inline uint32_t
-core_combine_saturate_u_pixel_sse2 (uint32_t src,
-                                    uint32_t dst)
-{
-    __m128i ms = unpack_32_1x128 (src);
-    __m128i md = unpack_32_1x128 (dst);
-    uint32_t sa = src >> 24;
-    uint32_t da = ~dst >> 24;
-
-    if (sa > da)
-    {
-	ms = pix_multiply_1x128 (
-	    ms, expand_alpha_1x128 (unpack_32_1x128 (DIV_UN8 (da, sa) << 24)));
-    }
-
-    return pack_1x128_32 (_mm_adds_epu16 (md, ms));
-}
-
-static void
-sse2_combine_saturate_u (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         uint32_t *               pd,
-                         const uint32_t *         ps,
-                         const uint32_t *         pm,
-                         int                      w)
-{
-    uint32_t s, d;
-
-    uint32_t pack_cmp;
-    __m128i xmm_src, xmm_dst;
-
-    while (w && (uintptr_t)pd & 15)
-    {
-	s = combine1 (ps, pm);
-	d = *pd;
-
-	*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
-	w--;
-	ps++;
-	if (pm)
-	    pm++;
-    }
-
-    while (w >= 4)
-    {
-	xmm_dst = load_128_aligned  ((__m128i*)pd);
-	xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm);
-
-	pack_cmp = _mm_movemask_epi8 (
-	    _mm_cmpgt_epi32 (
-		_mm_srli_epi32 (xmm_src, 24),
-		_mm_srli_epi32 (_mm_xor_si128 (xmm_dst, mask_ff000000), 24)));
-
-	/* if some alpha src is grater than respective ~alpha dst */
-	if (pack_cmp)
-	{
-	    s = combine1 (ps++, pm);
-	    d = *pd;
-	    *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
-	    if (pm)
-		pm++;
-
-	    s = combine1 (ps++, pm);
-	    d = *pd;
-	    *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
-	    if (pm)
-		pm++;
-
-	    s = combine1 (ps++, pm);
-	    d = *pd;
-	    *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
-	    if (pm)
-		pm++;
-
-	    s = combine1 (ps++, pm);
-	    d = *pd;
-	    *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
-	    if (pm)
-		pm++;
-	}
-	else
-	{
-	    save_128_aligned ((__m128i*)pd, _mm_adds_epu8 (xmm_dst, xmm_src));
-
-	    pd += 4;
-	    ps += 4;
-	    if (pm)
-		pm += 4;
-	}
-
-	w -= 4;
-    }
-
-    while (w--)
-    {
-	s = combine1 (ps, pm);
-	d = *pd;
-
-	*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
-	ps++;
-	if (pm)
-	    pm++;
-    }
-}
-
-static void
-sse2_combine_src_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               pd,
-                     const uint32_t *         ps,
-                     const uint32_t *         pm,
-                     int                      w)
-{
-    uint32_t s, m;
-
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_mask_lo, xmm_mask_hi;
-    __m128i xmm_dst_lo, xmm_dst_hi;
-
-    while (w && (uintptr_t)pd & 15)
-    {
-	s = *ps++;
-	m = *pm++;
-	*pd++ = pack_1x128_32 (
-	    pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
-			    &xmm_mask_lo, &xmm_mask_hi,
-			    &xmm_dst_lo, &xmm_dst_hi);
-
-	save_128_aligned (
-	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	ps += 4;
-	pd += 4;
-	pm += 4;
-	w -= 4;
-    }
-
-    while (w)
-    {
-	s = *ps++;
-	m = *pm++;
-	*pd++ = pack_1x128_32 (
-	    pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
-	w--;
-    }
-}
-
-static force_inline uint32_t
-core_combine_over_ca_pixel_sse2 (uint32_t src,
-                                 uint32_t mask,
-                                 uint32_t dst)
-{
-    __m128i s = unpack_32_1x128 (src);
-    __m128i expAlpha = expand_alpha_1x128 (s);
-    __m128i unpk_mask = unpack_32_1x128 (mask);
-    __m128i unpk_dst  = unpack_32_1x128 (dst);
-
-    return pack_1x128_32 (in_over_1x128 (&s, &expAlpha, &unpk_mask, &unpk_dst));
-}
-
-static void
-sse2_combine_over_ca (pixman_implementation_t *imp,
-                      pixman_op_t              op,
-                      uint32_t *               pd,
-                      const uint32_t *         ps,
-                      const uint32_t *         pm,
-                      int                      w)
-{
-    uint32_t s, m, d;
-
-    __m128i xmm_alpha_lo, xmm_alpha_hi;
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_mask_lo, xmm_mask_hi;
-
-    while (w && (uintptr_t)pd & 15)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
-	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
-			    &xmm_alpha_lo, &xmm_alpha_hi);
-
-	in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
-		       &xmm_alpha_lo, &xmm_alpha_hi,
-		       &xmm_mask_lo, &xmm_mask_hi,
-		       &xmm_dst_lo, &xmm_dst_hi);
-
-	save_128_aligned (
-	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	ps += 4;
-	pd += 4;
-	pm += 4;
-	w -= 4;
-    }
-
-    while (w)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
-	w--;
-    }
-}
-
-static force_inline uint32_t
-core_combine_over_reverse_ca_pixel_sse2 (uint32_t src,
-                                         uint32_t mask,
-                                         uint32_t dst)
-{
-    __m128i d = unpack_32_1x128 (dst);
-
-    return pack_1x128_32 (
-	over_1x128 (d, expand_alpha_1x128 (d),
-		    pix_multiply_1x128 (unpack_32_1x128 (src),
-					unpack_32_1x128 (mask))));
-}
-
-static void
-sse2_combine_over_reverse_ca (pixman_implementation_t *imp,
-                              pixman_op_t              op,
-                              uint32_t *               pd,
-                              const uint32_t *         ps,
-                              const uint32_t *         pm,
-                              int                      w)
-{
-    uint32_t s, m, d;
-
-    __m128i xmm_alpha_lo, xmm_alpha_hi;
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_mask_lo, xmm_mask_hi;
-
-    while (w && (uintptr_t)pd & 15)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
-	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
-			    &xmm_alpha_lo, &xmm_alpha_hi);
-	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
-			    &xmm_mask_lo, &xmm_mask_hi,
-			    &xmm_mask_lo, &xmm_mask_hi);
-
-	over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
-		    &xmm_alpha_lo, &xmm_alpha_hi,
-		    &xmm_mask_lo, &xmm_mask_hi);
-
-	save_128_aligned (
-	    (__m128i*)pd, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
-
-	ps += 4;
-	pd += 4;
-	pm += 4;
-	w -= 4;
-    }
-
-    while (w)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
-	w--;
-    }
-}
-
-static void
-sse2_combine_in_ca (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               pd,
-                    const uint32_t *         ps,
-                    const uint32_t *         pm,
-                    int                      w)
-{
-    uint32_t s, m, d;
-
-    __m128i xmm_alpha_lo, xmm_alpha_hi;
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_mask_lo, xmm_mask_hi;
-
-    while (w && (uintptr_t)pd & 15)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = pack_1x128_32 (
-	    pix_multiply_1x128 (
-		pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)),
-		expand_alpha_1x128 (unpack_32_1x128 (d))));
-
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
-	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
-			    &xmm_alpha_lo, &xmm_alpha_hi);
-
-	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
-			    &xmm_mask_lo, &xmm_mask_hi,
-			    &xmm_dst_lo, &xmm_dst_hi);
-
-	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
-			    &xmm_alpha_lo, &xmm_alpha_hi,
-			    &xmm_dst_lo, &xmm_dst_hi);
-
-	save_128_aligned (
-	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	ps += 4;
-	pd += 4;
-	pm += 4;
-	w -= 4;
-    }
-
-    while (w)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = pack_1x128_32 (
-	    pix_multiply_1x128 (
-		pix_multiply_1x128 (
-		    unpack_32_1x128 (s), unpack_32_1x128 (m)),
-		expand_alpha_1x128 (unpack_32_1x128 (d))));
-
-	w--;
-    }
-}
-
-static void
-sse2_combine_in_reverse_ca (pixman_implementation_t *imp,
-                            pixman_op_t              op,
-                            uint32_t *               pd,
-                            const uint32_t *         ps,
-                            const uint32_t *         pm,
-                            int                      w)
-{
-    uint32_t s, m, d;
-
-    __m128i xmm_alpha_lo, xmm_alpha_hi;
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_mask_lo, xmm_mask_hi;
-
-    while (w && (uintptr_t)pd & 15)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = pack_1x128_32 (
-	    pix_multiply_1x128 (
-		unpack_32_1x128 (d),
-		pix_multiply_1x128 (unpack_32_1x128 (m),
-				   expand_alpha_1x128 (unpack_32_1x128 (s)))));
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
-	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
-			    &xmm_alpha_lo, &xmm_alpha_hi);
-	pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
-			    &xmm_alpha_lo, &xmm_alpha_hi,
-			    &xmm_alpha_lo, &xmm_alpha_hi);
-
-	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
-			    &xmm_alpha_lo, &xmm_alpha_hi,
-			    &xmm_dst_lo, &xmm_dst_hi);
-
-	save_128_aligned (
-	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	ps += 4;
-	pd += 4;
-	pm += 4;
-	w -= 4;
-    }
-
-    while (w)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = pack_1x128_32 (
-	    pix_multiply_1x128 (
-		unpack_32_1x128 (d),
-		pix_multiply_1x128 (unpack_32_1x128 (m),
-				   expand_alpha_1x128 (unpack_32_1x128 (s)))));
-	w--;
-    }
-}
-
-static void
-sse2_combine_out_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               pd,
-                     const uint32_t *         ps,
-                     const uint32_t *         pm,
-                     int                      w)
-{
-    uint32_t s, m, d;
-
-    __m128i xmm_alpha_lo, xmm_alpha_hi;
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_mask_lo, xmm_mask_hi;
-
-    while (w && (uintptr_t)pd & 15)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = pack_1x128_32 (
-	    pix_multiply_1x128 (
-		pix_multiply_1x128 (
-		    unpack_32_1x128 (s), unpack_32_1x128 (m)),
-		negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
-	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
-			    &xmm_alpha_lo, &xmm_alpha_hi);
-	negate_2x128 (xmm_alpha_lo, xmm_alpha_hi,
-		      &xmm_alpha_lo, &xmm_alpha_hi);
-
-	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
-			    &xmm_mask_lo, &xmm_mask_hi,
-			    &xmm_dst_lo, &xmm_dst_hi);
-	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
-			    &xmm_alpha_lo, &xmm_alpha_hi,
-			    &xmm_dst_lo, &xmm_dst_hi);
-
-	save_128_aligned (
-	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	ps += 4;
-	pd += 4;
-	pm += 4;
-	w -= 4;
-    }
-
-    while (w)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = pack_1x128_32 (
-	    pix_multiply_1x128 (
-		pix_multiply_1x128 (
-		    unpack_32_1x128 (s), unpack_32_1x128 (m)),
-		negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
-
-	w--;
-    }
-}
-
-static void
-sse2_combine_out_reverse_ca (pixman_implementation_t *imp,
-                             pixman_op_t              op,
-                             uint32_t *               pd,
-                             const uint32_t *         ps,
-                             const uint32_t *         pm,
-                             int                      w)
-{
-    uint32_t s, m, d;
-
-    __m128i xmm_alpha_lo, xmm_alpha_hi;
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_mask_lo, xmm_mask_hi;
-
-    while (w && (uintptr_t)pd & 15)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = pack_1x128_32 (
-	    pix_multiply_1x128 (
-		unpack_32_1x128 (d),
-		negate_1x128 (pix_multiply_1x128 (
-				 unpack_32_1x128 (m),
-				 expand_alpha_1x128 (unpack_32_1x128 (s))))));
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
-	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
-			    &xmm_alpha_lo, &xmm_alpha_hi);
-
-	pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
-			    &xmm_alpha_lo, &xmm_alpha_hi,
-			    &xmm_mask_lo, &xmm_mask_hi);
-
-	negate_2x128 (xmm_mask_lo, xmm_mask_hi,
-		      &xmm_mask_lo, &xmm_mask_hi);
-
-	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
-			    &xmm_mask_lo, &xmm_mask_hi,
-			    &xmm_dst_lo, &xmm_dst_hi);
-
-	save_128_aligned (
-	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	ps += 4;
-	pd += 4;
-	pm += 4;
-	w -= 4;
-    }
-
-    while (w)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = pack_1x128_32 (
-	    pix_multiply_1x128 (
-		unpack_32_1x128 (d),
-		negate_1x128 (pix_multiply_1x128 (
-				 unpack_32_1x128 (m),
-				 expand_alpha_1x128 (unpack_32_1x128 (s))))));
-	w--;
-    }
-}
-
-static force_inline uint32_t
-core_combine_atop_ca_pixel_sse2 (uint32_t src,
-                                 uint32_t mask,
-                                 uint32_t dst)
-{
-    __m128i m = unpack_32_1x128 (mask);
-    __m128i s = unpack_32_1x128 (src);
-    __m128i d = unpack_32_1x128 (dst);
-    __m128i sa = expand_alpha_1x128 (s);
-    __m128i da = expand_alpha_1x128 (d);
-
-    s = pix_multiply_1x128 (s, m);
-    m = negate_1x128 (pix_multiply_1x128 (m, sa));
-
-    return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
-}
-
-static void
-sse2_combine_atop_ca (pixman_implementation_t *imp,
-                      pixman_op_t              op,
-                      uint32_t *               pd,
-                      const uint32_t *         ps,
-                      const uint32_t *         pm,
-                      int                      w)
-{
-    uint32_t s, m, d;
-
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
-    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
-    __m128i xmm_mask_lo, xmm_mask_hi;
-
-    while (w && (uintptr_t)pd & 15)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
-	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
-			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
-	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
-			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
-	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
-			    &xmm_mask_lo, &xmm_mask_hi,
-			    &xmm_src_lo, &xmm_src_hi);
-	pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
-			    &xmm_alpha_src_lo, &xmm_alpha_src_hi,
-			    &xmm_mask_lo, &xmm_mask_hi);
-
-	negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-	pix_add_multiply_2x128 (
-	    &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
-	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
-	    &xmm_dst_lo, &xmm_dst_hi);
-
-	save_128_aligned (
-	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	ps += 4;
-	pd += 4;
-	pm += 4;
-	w -= 4;
-    }
-
-    while (w)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
-	w--;
-    }
-}
-
-static force_inline uint32_t
-core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src,
-                                         uint32_t mask,
-                                         uint32_t dst)
-{
-    __m128i m = unpack_32_1x128 (mask);
-    __m128i s = unpack_32_1x128 (src);
-    __m128i d = unpack_32_1x128 (dst);
-
-    __m128i da = negate_1x128 (expand_alpha_1x128 (d));
-    __m128i sa = expand_alpha_1x128 (s);
-
-    s = pix_multiply_1x128 (s, m);
-    m = pix_multiply_1x128 (m, sa);
-
-    return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
-}
-
-static void
-sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,
-                              pixman_op_t              op,
-                              uint32_t *               pd,
-                              const uint32_t *         ps,
-                              const uint32_t *         pm,
-                              int                      w)
-{
-    uint32_t s, m, d;
-
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
-    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
-    __m128i xmm_mask_lo, xmm_mask_hi;
-
-    while (w && (uintptr_t)pd & 15)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
-	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
-			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
-	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
-			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
-	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
-			    &xmm_mask_lo, &xmm_mask_hi,
-			    &xmm_src_lo, &xmm_src_hi);
-	pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
-			    &xmm_alpha_src_lo, &xmm_alpha_src_hi,
-			    &xmm_mask_lo, &xmm_mask_hi);
-
-	negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
-		      &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
-	pix_add_multiply_2x128 (
-	    &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
-	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
-	    &xmm_dst_lo, &xmm_dst_hi);
-
-	save_128_aligned (
-	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	ps += 4;
-	pd += 4;
-	pm += 4;
-	w -= 4;
-    }
-
-    while (w)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
-	w--;
-    }
-}
-
-static force_inline uint32_t
-core_combine_xor_ca_pixel_sse2 (uint32_t src,
-                                uint32_t mask,
-                                uint32_t dst)
-{
-    __m128i a = unpack_32_1x128 (mask);
-    __m128i s = unpack_32_1x128 (src);
-    __m128i d = unpack_32_1x128 (dst);
-
-    __m128i alpha_dst = negate_1x128 (pix_multiply_1x128 (
-				       a, expand_alpha_1x128 (s)));
-    __m128i dest      = pix_multiply_1x128 (s, a);
-    __m128i alpha_src = negate_1x128 (expand_alpha_1x128 (d));
-
-    return pack_1x128_32 (pix_add_multiply_1x128 (&d,
-                                                &alpha_dst,
-                                                &dest,
-                                                &alpha_src));
-}
-
-static void
-sse2_combine_xor_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               pd,
-                     const uint32_t *         ps,
-                     const uint32_t *         pm,
-                     int                      w)
-{
-    uint32_t s, m, d;
-
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
-    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
-    __m128i xmm_mask_lo, xmm_mask_hi;
-
-    while (w && (uintptr_t)pd & 15)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
-	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
-			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
-	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
-			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
-	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
-			    &xmm_mask_lo, &xmm_mask_hi,
-			    &xmm_src_lo, &xmm_src_hi);
-	pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
-			    &xmm_alpha_src_lo, &xmm_alpha_src_hi,
-			    &xmm_mask_lo, &xmm_mask_hi);
-
-	negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
-		      &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-	negate_2x128 (xmm_mask_lo, xmm_mask_hi,
-		      &xmm_mask_lo, &xmm_mask_hi);
-
-	pix_add_multiply_2x128 (
-	    &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
-	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
-	    &xmm_dst_lo, &xmm_dst_hi);
-
-	save_128_aligned (
-	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	ps += 4;
-	pd += 4;
-	pm += 4;
-	w -= 4;
-    }
-
-    while (w)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
-	w--;
-    }
-}
-
-static void
-sse2_combine_add_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               pd,
-                     const uint32_t *         ps,
-                     const uint32_t *         pm,
-                     int                      w)
-{
-    uint32_t s, m, d;
-
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_mask_lo, xmm_mask_hi;
-
-    while (w && (uintptr_t)pd & 15)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = pack_1x128_32 (
-	    _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
-					       unpack_32_1x128 (m)),
-			   unpack_32_1x128 (d)));
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-
-	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
-			    &xmm_mask_lo, &xmm_mask_hi,
-			    &xmm_src_lo, &xmm_src_hi);
-
-	save_128_aligned (
-	    (__m128i*)pd, pack_2x128_128 (
-		_mm_adds_epu8 (xmm_src_lo, xmm_dst_lo),
-		_mm_adds_epu8 (xmm_src_hi, xmm_dst_hi)));
-
-	ps += 4;
-	pd += 4;
-	pm += 4;
-	w -= 4;
-    }
-
-    while (w)
-    {
-	s = *ps++;
-	m = *pm++;
-	d = *pd;
-
-	*pd++ = pack_1x128_32 (
-	    _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
-					       unpack_32_1x128 (m)),
-			   unpack_32_1x128 (d)));
-	w--;
-    }
-}
-
-static force_inline __m128i
-create_mask_16_128 (uint16_t mask)
-{
-    return _mm_set1_epi16 (mask);
-}
-
-/* Work around a code generation bug in Sun Studio 12. */
-#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
-# define create_mask_2x32_128(mask0, mask1)				\
-    (_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1)))
-#else
-static force_inline __m128i
-create_mask_2x32_128 (uint32_t mask0,
-                      uint32_t mask1)
-{
-    return _mm_set_epi32 (mask0, mask1, mask0, mask1);
-}
-#endif
-
-static void
-sse2_composite_over_n_8888 (pixman_implementation_t *imp,
-                            pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src;
-    uint32_t    *dst_line, *dst, d;
-    int32_t w;
-    int dst_stride;
-    __m128i xmm_src, xmm_alpha;
-    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
-    xmm_src = expand_pixel_32_1x128 (src);
-    xmm_alpha = expand_alpha_1x128 (xmm_src);
-
-    while (height--)
-    {
-	dst = dst_line;
-
-	dst_line += dst_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 15)
-	{
-	    d = *dst;
-	    *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
-						xmm_alpha,
-						unpack_32_1x128 (d)));
-	    w--;
-	}
-
-	while (w >= 4)
-	{
-	    xmm_dst = load_128_aligned ((__m128i*)dst);
-
-	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-	    over_2x128 (&xmm_src, &xmm_src,
-			&xmm_alpha, &xmm_alpha,
-			&xmm_dst_lo, &xmm_dst_hi);
-
-	    /* rebuid the 4 pixel data and save*/
-	    save_128_aligned (
-		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	    w -= 4;
-	    dst += 4;
-	}
-
-	while (w)
-	{
-	    d = *dst;
-	    *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
-						xmm_alpha,
-						unpack_32_1x128 (d)));
-	    w--;
-	}
-
-    }
-}
-
-static void
-sse2_composite_over_n_0565 (pixman_implementation_t *imp,
-                            pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src;
-    uint16_t    *dst_line, *dst, d;
-    int32_t w;
-    int dst_stride;
-    __m128i xmm_src, xmm_alpha;
-    __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
-    xmm_src = expand_pixel_32_1x128 (src);
-    xmm_alpha = expand_alpha_1x128 (xmm_src);
-
-    while (height--)
-    {
-	dst = dst_line;
-
-	dst_line += dst_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 15)
-	{
-	    d = *dst;
-
-	    *dst++ = pack_565_32_16 (
-		pack_1x128_32 (over_1x128 (xmm_src,
-					   xmm_alpha,
-					   expand565_16_1x128 (d))));
-	    w--;
-	}
-
-	while (w >= 8)
-	{
-	    xmm_dst = load_128_aligned ((__m128i*)dst);
-
-	    unpack_565_128_4x128 (xmm_dst,
-				  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
-
-	    over_2x128 (&xmm_src, &xmm_src,
-			&xmm_alpha, &xmm_alpha,
-			&xmm_dst0, &xmm_dst1);
-	    over_2x128 (&xmm_src, &xmm_src,
-			&xmm_alpha, &xmm_alpha,
-			&xmm_dst2, &xmm_dst3);
-
-	    xmm_dst = pack_565_4x128_128 (
-		&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
-
-	    save_128_aligned ((__m128i*)dst, xmm_dst);
-
-	    dst += 8;
-	    w -= 8;
-	}
-
-	while (w--)
-	{
-	    d = *dst;
-	    *dst++ = pack_565_32_16 (
-		pack_1x128_32 (over_1x128 (xmm_src, xmm_alpha,
-					   expand565_16_1x128 (d))));
-	}
-    }
-
-}
-
-static void
-sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
-				   pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src;
-    uint32_t    *dst_line, d;
-    uint32_t    *mask_line, m;
-    uint32_t pack_cmp;
-    int dst_stride, mask_stride;
-
-    __m128i xmm_src;
-    __m128i xmm_dst;
-    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-
-    __m128i mmx_src, mmx_mask, mmx_dest;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
-    xmm_src = _mm_unpacklo_epi8 (
-	create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
-    mmx_src   = xmm_src;
-
-    while (height--)
-    {
-	int w = width;
-	const uint32_t *pm = (uint32_t *)mask_line;
-	uint32_t *pd = (uint32_t *)dst_line;
-
-	dst_line += dst_stride;
-	mask_line += mask_stride;
-
-	while (w && (uintptr_t)pd & 15)
-	{
-	    m = *pm++;
-
-	    if (m)
-	    {
-		d = *pd;
-
-		mmx_mask = unpack_32_1x128 (m);
-		mmx_dest = unpack_32_1x128 (d);
-
-		*pd = pack_1x128_32 (
-		    _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
-				   mmx_dest));
-	    }
-
-	    pd++;
-	    w--;
-	}
-
-	while (w >= 4)
-	{
-	    xmm_mask = load_128_unaligned ((__m128i*)pm);
-
-	    pack_cmp =
-		_mm_movemask_epi8 (
-		    _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
-
-	    /* if all bits in mask are zero, pack_cmp are equal to 0xffff */
-	    if (pack_cmp != 0xffff)
-	    {
-		xmm_dst = load_128_aligned ((__m128i*)pd);
-
-		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
-		pix_multiply_2x128 (&xmm_src, &xmm_src,
-				    &xmm_mask_lo, &xmm_mask_hi,
-				    &xmm_mask_lo, &xmm_mask_hi);
-		xmm_mask_hi = pack_2x128_128 (xmm_mask_lo, xmm_mask_hi);
-
-		save_128_aligned (
-		    (__m128i*)pd, _mm_adds_epu8 (xmm_mask_hi, xmm_dst));
-	    }
-
-	    pd += 4;
-	    pm += 4;
-	    w -= 4;
-	}
-
-	while (w)
-	{
-	    m = *pm++;
-
-	    if (m)
-	    {
-		d = *pd;
-
-		mmx_mask = unpack_32_1x128 (m);
-		mmx_dest = unpack_32_1x128 (d);
-
-		*pd = pack_1x128_32 (
-		    _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
-				   mmx_dest));
-	    }
-
-	    pd++;
-	    w--;
-	}
-    }
-
-}
-
-static void
-sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
-                                    pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src;
-    uint32_t    *dst_line, d;
-    uint32_t    *mask_line, m;
-    uint32_t pack_cmp;
-    int dst_stride, mask_stride;
-
-    __m128i xmm_src, xmm_alpha;
-    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-
-    __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
-    xmm_src = _mm_unpacklo_epi8 (
-	create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
-    xmm_alpha = expand_alpha_1x128 (xmm_src);
-    mmx_src   = xmm_src;
-    mmx_alpha = xmm_alpha;
-
-    while (height--)
-    {
-	int w = width;
-	const uint32_t *pm = (uint32_t *)mask_line;
-	uint32_t *pd = (uint32_t *)dst_line;
-
-	dst_line += dst_stride;
-	mask_line += mask_stride;
-
-	while (w && (uintptr_t)pd & 15)
-	{
-	    m = *pm++;
-
-	    if (m)
-	    {
-		d = *pd;
-		mmx_mask = unpack_32_1x128 (m);
-		mmx_dest = unpack_32_1x128 (d);
-
-		*pd = pack_1x128_32 (in_over_1x128 (&mmx_src,
-		                                  &mmx_alpha,
-		                                  &mmx_mask,
-		                                  &mmx_dest));
-	    }
-
-	    pd++;
-	    w--;
-	}
-
-	while (w >= 4)
-	{
-	    xmm_mask = load_128_unaligned ((__m128i*)pm);
-
-	    pack_cmp =
-		_mm_movemask_epi8 (
-		    _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
-
-	    /* if all bits in mask are zero, pack_cmp are equal to 0xffff */
-	    if (pack_cmp != 0xffff)
-	    {
-		xmm_dst = load_128_aligned ((__m128i*)pd);
-
-		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-		in_over_2x128 (&xmm_src, &xmm_src,
-			       &xmm_alpha, &xmm_alpha,
-			       &xmm_mask_lo, &xmm_mask_hi,
-			       &xmm_dst_lo, &xmm_dst_hi);
-
-		save_128_aligned (
-		    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-	    }
-
-	    pd += 4;
-	    pm += 4;
-	    w -= 4;
-	}
-
-	while (w)
-	{
-	    m = *pm++;
-
-	    if (m)
-	    {
-		d = *pd;
-		mmx_mask = unpack_32_1x128 (m);
-		mmx_dest = unpack_32_1x128 (d);
-
-		*pd = pack_1x128_32 (
-		    in_over_1x128 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest));
-	    }
-
-	    pd++;
-	    w--;
-	}
-    }
-
-}
-
-static void
-sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
-                                 pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    uint32_t mask;
-    int32_t w;
-    int dst_stride, src_stride;
-
-    __m128i xmm_mask;
-    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_alpha_lo, xmm_alpha_hi;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
-
-    xmm_mask = create_mask_16_128 (mask >> 24);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 15)
-	{
-	    uint32_t s = *src++;
-
-	    if (s)
-	    {
-		uint32_t d = *dst;
-		
-		__m128i ms = unpack_32_1x128 (s);
-		__m128i alpha    = expand_alpha_1x128 (ms);
-		__m128i dest     = xmm_mask;
-		__m128i alpha_dst = unpack_32_1x128 (d);
-		
-		*dst = pack_1x128_32 (
-		    in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
-	    }
-	    dst++;
-	    w--;
-	}
-
-	while (w >= 4)
-	{
-	    xmm_src = load_128_unaligned ((__m128i*)src);
-
-	    if (!is_zero (xmm_src))
-	    {
-		xmm_dst = load_128_aligned ((__m128i*)dst);
-		
-		unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-		expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
-				    &xmm_alpha_lo, &xmm_alpha_hi);
-		
-		in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
-			       &xmm_alpha_lo, &xmm_alpha_hi,
-			       &xmm_mask, &xmm_mask,
-			       &xmm_dst_lo, &xmm_dst_hi);
-		
-		save_128_aligned (
-		    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-	    }
-		
-	    dst += 4;
-	    src += 4;
-	    w -= 4;
-	}
-
-	while (w)
-	{
-	    uint32_t s = *src++;
-
-	    if (s)
-	    {
-		uint32_t d = *dst;
-		
-		__m128i ms = unpack_32_1x128 (s);
-		__m128i alpha = expand_alpha_1x128 (ms);
-		__m128i mask  = xmm_mask;
-		__m128i dest  = unpack_32_1x128 (d);
-		
-		*dst = pack_1x128_32 (
-		    in_over_1x128 (&ms, &alpha, &mask, &dest));
-	    }
-
-	    dst++;
-	    w--;
-	}
-    }
-
-}
-
-static void
-sse2_composite_src_x888_0565 (pixman_implementation_t *imp,
-                              pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint16_t    *dst_line, *dst;
-    uint32_t    *src_line, *src, s;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 15)
-	{
-	    s = *src++;
-	    *dst = convert_8888_to_0565 (s);
-	    dst++;
-	    w--;
-	}
-
-	while (w >= 8)
-	{
-	    __m128i xmm_src0 = load_128_unaligned ((__m128i *)src + 0);
-	    __m128i xmm_src1 = load_128_unaligned ((__m128i *)src + 1);
-
-	    save_128_aligned ((__m128i*)dst, pack_565_2packedx128_128 (xmm_src0, xmm_src1));
-
-	    w -= 8;
-	    src += 8;
-	    dst += 8;
-	}
-
-	while (w)
-	{
-	    s = *src++;
-	    *dst = convert_8888_to_0565 (s);
-	    dst++;
-	    w--;
-	}
-    }
-}
-
-static void
-sse2_composite_src_x888_8888 (pixman_implementation_t *imp,
-			      pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    int32_t w;
-    int dst_stride, src_stride;
-
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 15)
-	{
-	    *dst++ = *src++ | 0xff000000;
-	    w--;
-	}
-
-	while (w >= 16)
-	{
-	    __m128i xmm_src1, xmm_src2, xmm_src3, xmm_src4;
-	    
-	    xmm_src1 = load_128_unaligned ((__m128i*)src + 0);
-	    xmm_src2 = load_128_unaligned ((__m128i*)src + 1);
-	    xmm_src3 = load_128_unaligned ((__m128i*)src + 2);
-	    xmm_src4 = load_128_unaligned ((__m128i*)src + 3);
-	    
-	    save_128_aligned ((__m128i*)dst + 0, _mm_or_si128 (xmm_src1, mask_ff000000));
-	    save_128_aligned ((__m128i*)dst + 1, _mm_or_si128 (xmm_src2, mask_ff000000));
-	    save_128_aligned ((__m128i*)dst + 2, _mm_or_si128 (xmm_src3, mask_ff000000));
-	    save_128_aligned ((__m128i*)dst + 3, _mm_or_si128 (xmm_src4, mask_ff000000));
-	    
-	    dst += 16;
-	    src += 16;
-	    w -= 16;
-	}
-
-	while (w)
-	{
-	    *dst++ = *src++ | 0xff000000;
-	    w--;
-	}
-    }
-
-}
-
-static void
-sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
-                                 pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    uint32_t mask;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    __m128i xmm_mask, xmm_alpha;
-    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
-
-    xmm_mask = create_mask_16_128 (mask >> 24);
-    xmm_alpha = mask_00ff;
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 15)
-	{
-	    uint32_t s = (*src++) | 0xff000000;
-	    uint32_t d = *dst;
-
-	    __m128i src   = unpack_32_1x128 (s);
-	    __m128i alpha = xmm_alpha;
-	    __m128i mask  = xmm_mask;
-	    __m128i dest  = unpack_32_1x128 (d);
-
-	    *dst++ = pack_1x128_32 (
-		in_over_1x128 (&src, &alpha, &mask, &dest));
-
-	    w--;
-	}
-
-	while (w >= 4)
-	{
-	    xmm_src = _mm_or_si128 (
-		load_128_unaligned ((__m128i*)src), mask_ff000000);
-	    xmm_dst = load_128_aligned ((__m128i*)dst);
-
-	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-	    in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
-			   &xmm_alpha, &xmm_alpha,
-			   &xmm_mask, &xmm_mask,
-			   &xmm_dst_lo, &xmm_dst_hi);
-
-	    save_128_aligned (
-		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	    dst += 4;
-	    src += 4;
-	    w -= 4;
-
-	}
-
-	while (w)
-	{
-	    uint32_t s = (*src++) | 0xff000000;
-	    uint32_t d = *dst;
-
-	    __m128i src  = unpack_32_1x128 (s);
-	    __m128i alpha = xmm_alpha;
-	    __m128i mask  = xmm_mask;
-	    __m128i dest  = unpack_32_1x128 (d);
-
-	    *dst++ = pack_1x128_32 (
-		in_over_1x128 (&src, &alpha, &mask, &dest));
-
-	    w--;
-	}
-    }
-
-}
-
-static void
-sse2_composite_over_8888_8888 (pixman_implementation_t *imp,
-                               pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    int dst_stride, src_stride;
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    dst = dst_line;
-    src = src_line;
-
-    while (height--)
-    {
-	sse2_combine_over_u (imp, op, dst, src, NULL, width);
-
-	dst += dst_stride;
-	src += src_stride;
-    }
-}
-
-static force_inline uint16_t
-composite_over_8888_0565pixel (uint32_t src, uint16_t dst)
-{
-    __m128i ms;
-
-    ms = unpack_32_1x128 (src);
-    return pack_565_32_16 (
-	pack_1x128_32 (
-	    over_1x128 (
-		ms, expand_alpha_1x128 (ms), expand565_16_1x128 (dst))));
-}
-
-static void
-sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
-                               pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint16_t    *dst_line, *dst, d;
-    uint32_t    *src_line, *src, s;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    __m128i xmm_alpha_lo, xmm_alpha_hi;
-    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	src = src_line;
-
-	dst_line += dst_stride;
-	src_line += src_stride;
-	w = width;
-
-	/* Align dst on a 16-byte boundary */
-	while (w &&
-	       ((uintptr_t)dst & 15))
-	{
-	    s = *src++;
-	    d = *dst;
-
-	    *dst++ = composite_over_8888_0565pixel (s, d);
-	    w--;
-	}
-
-	/* It's a 8 pixel loop */
-	while (w >= 8)
-	{
-	    /* I'm loading unaligned because I'm not sure
-	     * about the address alignment.
-	     */
-	    xmm_src = load_128_unaligned ((__m128i*) src);
-	    xmm_dst = load_128_aligned ((__m128i*) dst);
-
-	    /* Unpacking */
-	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-	    unpack_565_128_4x128 (xmm_dst,
-				  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
-	    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
-				&xmm_alpha_lo, &xmm_alpha_hi);
-
-	    /* I'm loading next 4 pixels from memory
-	     * before to optimze the memory read.
-	     */
-	    xmm_src = load_128_unaligned ((__m128i*) (src + 4));
-
-	    over_2x128 (&xmm_src_lo, &xmm_src_hi,
-			&xmm_alpha_lo, &xmm_alpha_hi,
-			&xmm_dst0, &xmm_dst1);
-
-	    /* Unpacking */
-	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-	    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
-				&xmm_alpha_lo, &xmm_alpha_hi);
-
-	    over_2x128 (&xmm_src_lo, &xmm_src_hi,
-			&xmm_alpha_lo, &xmm_alpha_hi,
-			&xmm_dst2, &xmm_dst3);
-
-	    save_128_aligned (
-		(__m128i*)dst, pack_565_4x128_128 (
-		    &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
-
-	    w -= 8;
-	    dst += 8;
-	    src += 8;
-	}
-
-	while (w--)
-	{
-	    s = *src++;
-	    d = *dst;
-
-	    *dst++ = composite_over_8888_0565pixel (s, d);
-	}
-    }
-
-}
-
-static void
-sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
-                              pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src, srca;
-    uint32_t *dst_line, *dst;
-    uint8_t *mask_line, *mask;
-    int dst_stride, mask_stride;
-    int32_t w;
-    uint32_t d;
-
-    __m128i xmm_src, xmm_alpha, xmm_def;
-    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-
-    __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    srca = src >> 24;
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    xmm_def = create_mask_2x32_128 (src, src);
-    xmm_src = expand_pixel_32_1x128 (src);
-    xmm_alpha = expand_alpha_1x128 (xmm_src);
-    mmx_src   = xmm_src;
-    mmx_alpha = xmm_alpha;
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 15)
-	{
-	    uint8_t m = *mask++;
-
-	    if (m)
-	    {
-		d = *dst;
-		mmx_mask = expand_pixel_8_1x128 (m);
-		mmx_dest = unpack_32_1x128 (d);
-
-		*dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
-		                                   &mmx_alpha,
-		                                   &mmx_mask,
-		                                   &mmx_dest));
-	    }
-
-	    w--;
-	    dst++;
-	}
-
-	while (w >= 4)
-	{
-            uint32_t m;
-            memcpy(&m, mask, sizeof(uint32_t));
-
-	    if (srca == 0xff && m == 0xffffffff)
-	    {
-		save_128_aligned ((__m128i*)dst, xmm_def);
-	    }
-	    else if (m)
-	    {
-		xmm_dst = load_128_aligned ((__m128i*) dst);
-		xmm_mask = unpack_32_1x128 (m);
-		xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
-
-		/* Unpacking */
-		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
-		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
-					&xmm_mask_lo, &xmm_mask_hi);
-
-		in_over_2x128 (&xmm_src, &xmm_src,
-			       &xmm_alpha, &xmm_alpha,
-			       &xmm_mask_lo, &xmm_mask_hi,
-			       &xmm_dst_lo, &xmm_dst_hi);
-
-		save_128_aligned (
-		    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-	    }
-
-	    w -= 4;
-	    dst += 4;
-	    mask += 4;
-	}
-
-	while (w)
-	{
-	    uint8_t m = *mask++;
-
-	    if (m)
-	    {
-		d = *dst;
-		mmx_mask = expand_pixel_8_1x128 (m);
-		mmx_dest = unpack_32_1x128 (d);
-
-		*dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
-		                                   &mmx_alpha,
-		                                   &mmx_mask,
-		                                   &mmx_dest));
-	    }
-
-	    w--;
-	    dst++;
-	}
-    }
-
-}
-
-#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
-__attribute__((__force_align_arg_pointer__))
-#endif
-static pixman_bool_t
-sse2_fill (pixman_implementation_t *imp,
-           uint32_t *               bits,
-           int                      stride,
-           int                      bpp,
-           int                      x,
-           int                      y,
-           int                      width,
-           int                      height,
-           uint32_t		    filler)
-{
-    uint32_t byte_width;
-    uint8_t *byte_line;
-
-    __m128i xmm_def;
-
-    if (bpp == 8)
-    {
-	uint32_t b;
-	uint32_t w;
-
-	stride = stride * (int) sizeof (uint32_t) / 1;
-	byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
-	byte_width = width;
-	stride *= 1;
-
-	b = filler & 0xff;
-	w = (b << 8) | b;
-	filler = (w << 16) | w;
-    }
-    else if (bpp == 16)
-    {
-	stride = stride * (int) sizeof (uint32_t) / 2;
-	byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
-	byte_width = 2 * width;
-	stride *= 2;
-
-        filler = (filler & 0xffff) * 0x00010001;
-    }
-    else if (bpp == 32)
-    {
-	stride = stride * (int) sizeof (uint32_t) / 4;
-	byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
-	byte_width = 4 * width;
-	stride *= 4;
-    }
-    else
-    {
-	return FALSE;
-    }
-
-    xmm_def = create_mask_2x32_128 (filler, filler);
-
-    while (height--)
-    {
-	int w;
-	uint8_t *d = byte_line;
-	byte_line += stride;
-	w = byte_width;
-
-	if (w >= 1 && ((uintptr_t)d & 1))
-	{
-	    *(uint8_t *)d = filler;
-	    w -= 1;
-	    d += 1;
-	}
-
-	while (w >= 2 && ((uintptr_t)d & 3))
-	{
-	    *(uint16_t *)d = filler;
-	    w -= 2;
-	    d += 2;
-	}
-
-	while (w >= 4 && ((uintptr_t)d & 15))
-	{
-	    *(uint32_t *)d = filler;
-
-	    w -= 4;
-	    d += 4;
-	}
-
-	while (w >= 128)
-	{
-	    save_128_aligned ((__m128i*)(d),     xmm_def);
-	    save_128_aligned ((__m128i*)(d + 16),  xmm_def);
-	    save_128_aligned ((__m128i*)(d + 32),  xmm_def);
-	    save_128_aligned ((__m128i*)(d + 48),  xmm_def);
-	    save_128_aligned ((__m128i*)(d + 64),  xmm_def);
-	    save_128_aligned ((__m128i*)(d + 80),  xmm_def);
-	    save_128_aligned ((__m128i*)(d + 96),  xmm_def);
-	    save_128_aligned ((__m128i*)(d + 112), xmm_def);
-
-	    d += 128;
-	    w -= 128;
-	}
-
-	if (w >= 64)
-	{
-	    save_128_aligned ((__m128i*)(d),     xmm_def);
-	    save_128_aligned ((__m128i*)(d + 16),  xmm_def);
-	    save_128_aligned ((__m128i*)(d + 32),  xmm_def);
-	    save_128_aligned ((__m128i*)(d + 48),  xmm_def);
-
-	    d += 64;
-	    w -= 64;
-	}
-
-	if (w >= 32)
-	{
-	    save_128_aligned ((__m128i*)(d),     xmm_def);
-	    save_128_aligned ((__m128i*)(d + 16),  xmm_def);
-
-	    d += 32;
-	    w -= 32;
-	}
-
-	if (w >= 16)
-	{
-	    save_128_aligned ((__m128i*)(d),     xmm_def);
-
-	    d += 16;
-	    w -= 16;
-	}
-
-	while (w >= 4)
-	{
-	    *(uint32_t *)d = filler;
-
-	    w -= 4;
-	    d += 4;
-	}
-
-	if (w >= 2)
-	{
-	    *(uint16_t *)d = filler;
-	    w -= 2;
-	    d += 2;
-	}
-
-	if (w >= 1)
-	{
-	    *(uint8_t *)d = filler;
-	    w -= 1;
-	    d += 1;
-	}
-    }
-
-    return TRUE;
-}
-
-static void
-sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
-                             pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src, srca;
-    uint32_t    *dst_line, *dst;
-    uint8_t     *mask_line, *mask;
-    int dst_stride, mask_stride;
-    int32_t w;
-
-    __m128i xmm_src, xmm_def;
-    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    srca = src >> 24;
-    if (src == 0)
-    {
-	sse2_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride,
-		   PIXMAN_FORMAT_BPP (dest_image->bits.format),
-		   dest_x, dest_y, width, height, 0);
-	return;
-    }
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    xmm_def = create_mask_2x32_128 (src, src);
-    xmm_src = expand_pixel_32_1x128 (src);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 15)
-	{
-	    uint8_t m = *mask++;
-
-	    if (m)
-	    {
-		*dst = pack_1x128_32 (
-		    pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)));
-	    }
-	    else
-	    {
-		*dst = 0;
-	    }
-
-	    w--;
-	    dst++;
-	}
-
-	while (w >= 4)
-	{
-            uint32_t m;
-            memcpy(&m, mask, sizeof(uint32_t));
-
-	    if (srca == 0xff && m == 0xffffffff)
-	    {
-		save_128_aligned ((__m128i*)dst, xmm_def);
-	    }
-	    else if (m)
-	    {
-		xmm_mask = unpack_32_1x128 (m);
-		xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
-
-		/* Unpacking */
-		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
-		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
-					&xmm_mask_lo, &xmm_mask_hi);
-
-		pix_multiply_2x128 (&xmm_src, &xmm_src,
-				    &xmm_mask_lo, &xmm_mask_hi,
-				    &xmm_mask_lo, &xmm_mask_hi);
-
-		save_128_aligned (
-		    (__m128i*)dst, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
-	    }
-	    else
-	    {
-		save_128_aligned ((__m128i*)dst, _mm_setzero_si128 ());
-	    }
-
-	    w -= 4;
-	    dst += 4;
-	    mask += 4;
-	}
-
-	while (w)
-	{
-	    uint8_t m = *mask++;
-
-	    if (m)
-	    {
-		*dst = pack_1x128_32 (
-		    pix_multiply_1x128 (
-			xmm_src, expand_pixel_8_1x128 (m)));
-	    }
-	    else
-	    {
-		*dst = 0;
-	    }
-
-	    w--;
-	    dst++;
-	}
-    }
-
-}
-
-static void
-sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
-                              pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src;
-    uint16_t    *dst_line, *dst, d;
-    uint8_t     *mask_line, *mask;
-    int dst_stride, mask_stride;
-    int32_t w;
-    __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
-
-    __m128i xmm_src, xmm_alpha;
-    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-    __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    xmm_src = expand_pixel_32_1x128 (src);
-    xmm_alpha = expand_alpha_1x128 (xmm_src);
-    mmx_src = xmm_src;
-    mmx_alpha = xmm_alpha;
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 15)
-	{
-	    uint8_t m = *mask++;
-
-	    if (m)
-	    {
-		d = *dst;
-		mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
-		mmx_dest = expand565_16_1x128 (d);
-
-		*dst = pack_565_32_16 (
-		    pack_1x128_32 (
-			in_over_1x128 (
-			    &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
-	    }
-
-	    w--;
-	    dst++;
-	}
-
-	while (w >= 8)
-	{
-            uint32_t m;
-
-	    xmm_dst = load_128_aligned ((__m128i*) dst);
-	    unpack_565_128_4x128 (xmm_dst,
-				  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
-
-            memcpy(&m, mask, sizeof(uint32_t));
-	    mask += 4;
-
-	    if (m)
-	    {
-		xmm_mask = unpack_32_1x128 (m);
-		xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
-
-		/* Unpacking */
-		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
-		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
-					&xmm_mask_lo, &xmm_mask_hi);
-
-		in_over_2x128 (&xmm_src, &xmm_src,
-			       &xmm_alpha, &xmm_alpha,
-			       &xmm_mask_lo, &xmm_mask_hi,
-			       &xmm_dst0, &xmm_dst1);
-	    }
-
-            memcpy(&m, mask, sizeof(uint32_t));
-	    mask += 4;
-
-	    if (m)
-	    {
-		xmm_mask = unpack_32_1x128 (m);
-		xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
-
-		/* Unpacking */
-		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
-		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
-					&xmm_mask_lo, &xmm_mask_hi);
-		in_over_2x128 (&xmm_src, &xmm_src,
-			       &xmm_alpha, &xmm_alpha,
-			       &xmm_mask_lo, &xmm_mask_hi,
-			       &xmm_dst2, &xmm_dst3);
-	    }
-
-	    save_128_aligned (
-		(__m128i*)dst, pack_565_4x128_128 (
-		    &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
-
-	    w -= 8;
-	    dst += 8;
-	}
-
-	while (w)
-	{
-	    uint8_t m = *mask++;
-
-	    if (m)
-	    {
-		d = *dst;
-		mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
-		mmx_dest = expand565_16_1x128 (d);
-
-		*dst = pack_565_32_16 (
-		    pack_1x128_32 (
-			in_over_1x128 (
-			    &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
-	    }
-
-	    w--;
-	    dst++;
-	}
-    }
-
-}
-
-static void
-sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
-                                 pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint16_t    *dst_line, *dst, d;
-    uint32_t    *src_line, *src, s;
-    int dst_stride, src_stride;
-    int32_t w;
-    uint32_t opaque, zero;
-
-    __m128i ms;
-    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 15)
-	{
-	    s = *src++;
-	    d = *dst;
-
-	    ms = unpack_32_1x128 (s);
-
-	    *dst++ = pack_565_32_16 (
-		pack_1x128_32 (
-		    over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
-	    w--;
-	}
-
-	while (w >= 8)
-	{
-	    /* First round */
-	    xmm_src = load_128_unaligned ((__m128i*)src);
-	    xmm_dst = load_128_aligned  ((__m128i*)dst);
-
-	    opaque = is_opaque (xmm_src);
-	    zero = is_zero (xmm_src);
-
-	    unpack_565_128_4x128 (xmm_dst,
-				  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
-	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-
-	    /* preload next round*/
-	    xmm_src = load_128_unaligned ((__m128i*)(src + 4));
-
-	    if (opaque)
-	    {
-		invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
-				     &xmm_dst0, &xmm_dst1);
-	    }
-	    else if (!zero)
-	    {
-		over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
-					&xmm_dst0, &xmm_dst1);
-	    }
-
-	    /* Second round */
-	    opaque = is_opaque (xmm_src);
-	    zero = is_zero (xmm_src);
-
-	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-
-	    if (opaque)
-	    {
-		invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
-				     &xmm_dst2, &xmm_dst3);
-	    }
-	    else if (!zero)
-	    {
-		over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
-					&xmm_dst2, &xmm_dst3);
-	    }
-
-	    save_128_aligned (
-		(__m128i*)dst, pack_565_4x128_128 (
-		    &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
-
-	    w -= 8;
-	    src += 8;
-	    dst += 8;
-	}
-
-	while (w)
-	{
-	    s = *src++;
-	    d = *dst;
-
-	    ms = unpack_32_1x128 (s);
-
-	    *dst++ = pack_565_32_16 (
-		pack_1x128_32 (
-		    over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
-	    w--;
-	}
-    }
-
-}
-
-static void
-sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
-                                 pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t    *dst_line, *dst, d;
-    uint32_t    *src_line, *src, s;
-    int dst_stride, src_stride;
-    int32_t w;
-    uint32_t opaque, zero;
-
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst_lo, xmm_dst_hi;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 15)
-	{
-	    s = *src++;
-	    d = *dst;
-
-	    *dst++ = pack_1x128_32 (
-		over_rev_non_pre_1x128 (
-		    unpack_32_1x128 (s), unpack_32_1x128 (d)));
-
-	    w--;
-	}
-
-	while (w >= 4)
-	{
-	    xmm_src_hi = load_128_unaligned ((__m128i*)src);
-
-	    opaque = is_opaque (xmm_src_hi);
-	    zero = is_zero (xmm_src_hi);
-
-	    unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-
-	    if (opaque)
-	    {
-		invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
-				     &xmm_dst_lo, &xmm_dst_hi);
-
-		save_128_aligned (
-		    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-	    }
-	    else if (!zero)
-	    {
-		xmm_dst_hi = load_128_aligned  ((__m128i*)dst);
-
-		unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-		over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
-					&xmm_dst_lo, &xmm_dst_hi);
-
-		save_128_aligned (
-		    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-	    }
-
-	    w -= 4;
-	    dst += 4;
-	    src += 4;
-	}
-
-	while (w)
-	{
-	    s = *src++;
-	    d = *dst;
-
-	    *dst++ = pack_1x128_32 (
-		over_rev_non_pre_1x128 (
-		    unpack_32_1x128 (s), unpack_32_1x128 (d)));
-
-	    w--;
-	}
-    }
-
-}
-
-static void
-sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
-                                    pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src;
-    uint16_t    *dst_line, *dst, d;
-    uint32_t    *mask_line, *mask, m;
-    int dst_stride, mask_stride;
-    int w;
-    uint32_t pack_cmp;
-
-    __m128i xmm_src, xmm_alpha;
-    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-    __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
-
-    __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
-    xmm_src = expand_pixel_32_1x128 (src);
-    xmm_alpha = expand_alpha_1x128 (xmm_src);
-    mmx_src = xmm_src;
-    mmx_alpha = xmm_alpha;
-
-    while (height--)
-    {
-	w = width;
-	mask = mask_line;
-	dst = dst_line;
-	mask_line += mask_stride;
-	dst_line += dst_stride;
-
-	while (w && ((uintptr_t)dst & 15))
-	{
-	    m = *(uint32_t *) mask;
-
-	    if (m)
-	    {
-		d = *dst;
-		mmx_mask = unpack_32_1x128 (m);
-		mmx_dest = expand565_16_1x128 (d);
-
-		*dst = pack_565_32_16 (
-		    pack_1x128_32 (
-			in_over_1x128 (
-			    &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
-	    }
-
-	    w--;
-	    dst++;
-	    mask++;
-	}
-
-	while (w >= 8)
-	{
-	    /* First round */
-	    xmm_mask = load_128_unaligned ((__m128i*)mask);
-	    xmm_dst = load_128_aligned ((__m128i*)dst);
-
-	    pack_cmp = _mm_movemask_epi8 (
-		_mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
-
-	    unpack_565_128_4x128 (xmm_dst,
-				  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
-	    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
-	    /* preload next round */
-	    xmm_mask = load_128_unaligned ((__m128i*)(mask + 4));
-
-	    /* preload next round */
-	    if (pack_cmp != 0xffff)
-	    {
-		in_over_2x128 (&xmm_src, &xmm_src,
-			       &xmm_alpha, &xmm_alpha,
-			       &xmm_mask_lo, &xmm_mask_hi,
-			       &xmm_dst0, &xmm_dst1);
-	    }
-
-	    /* Second round */
-	    pack_cmp = _mm_movemask_epi8 (
-		_mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
-
-	    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
-	    if (pack_cmp != 0xffff)
-	    {
-		in_over_2x128 (&xmm_src, &xmm_src,
-			       &xmm_alpha, &xmm_alpha,
-			       &xmm_mask_lo, &xmm_mask_hi,
-			       &xmm_dst2, &xmm_dst3);
-	    }
-
-	    save_128_aligned (
-		(__m128i*)dst, pack_565_4x128_128 (
-		    &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
-
-	    w -= 8;
-	    dst += 8;
-	    mask += 8;
-	}
-
-	while (w)
-	{
-	    m = *(uint32_t *) mask;
-
-	    if (m)
-	    {
-		d = *dst;
-		mmx_mask = unpack_32_1x128 (m);
-		mmx_dest = expand565_16_1x128 (d);
-
-		*dst = pack_565_32_16 (
-		    pack_1x128_32 (
-			in_over_1x128 (
-			    &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
-	    }
-
-	    w--;
-	    dst++;
-	    mask++;
-	}
-    }
-
-}
-
-static void
-sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
-                         pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint8_t     *dst_line, *dst;
-    uint8_t     *mask_line, *mask;
-    int dst_stride, mask_stride;
-    uint32_t d;
-    uint32_t src;
-    int32_t w;
-
-    __m128i xmm_alpha;
-    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	while (w && ((uintptr_t)dst & 15))
-	{
-	    uint8_t m = *mask++;
-	    d = (uint32_t) *dst;
-
-	    *dst++ = (uint8_t) pack_1x128_32 (
-		pix_multiply_1x128 (
-		    pix_multiply_1x128 (xmm_alpha,
-				       unpack_32_1x128 (m)),
-		    unpack_32_1x128 (d)));
-	    w--;
-	}
-
-	while (w >= 16)
-	{
-	    xmm_mask = load_128_unaligned ((__m128i*)mask);
-	    xmm_dst = load_128_aligned ((__m128i*)dst);
-
-	    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-	    pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
-				&xmm_mask_lo, &xmm_mask_hi,
-				&xmm_mask_lo, &xmm_mask_hi);
-
-	    pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
-				&xmm_dst_lo, &xmm_dst_hi,
-				&xmm_dst_lo, &xmm_dst_hi);
-
-	    save_128_aligned (
-		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	    mask += 16;
-	    dst += 16;
-	    w -= 16;
-	}
-
-	while (w)
-	{
-	    uint8_t m = *mask++;
-	    d = (uint32_t) *dst;
-
-	    *dst++ = (uint8_t) pack_1x128_32 (
-		pix_multiply_1x128 (
-		    pix_multiply_1x128 (
-			xmm_alpha, unpack_32_1x128 (m)),
-		    unpack_32_1x128 (d)));
-	    w--;
-	}
-    }
-
-}
-
-static void
-sse2_composite_in_n_8 (pixman_implementation_t *imp,
-		       pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint8_t     *dst_line, *dst;
-    int dst_stride;
-    uint32_t d;
-    uint32_t src;
-    int32_t w;
-
-    __m128i xmm_alpha;
-    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
-
-    src = src >> 24;
-
-    if (src == 0xff)
-	return;
-
-    if (src == 0x00)
-    {
-	pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
-		     8, dest_x, dest_y, width, height, src);
-
-	return;
-    }
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	w = width;
-
-	while (w && ((uintptr_t)dst & 15))
-	{
-	    d = (uint32_t) *dst;
-
-	    *dst++ = (uint8_t) pack_1x128_32 (
-		pix_multiply_1x128 (
-		    xmm_alpha,
-		    unpack_32_1x128 (d)));
-	    w--;
-	}
-
-	while (w >= 16)
-	{
-	    xmm_dst = load_128_aligned ((__m128i*)dst);
-
-	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-	    
-	    pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
-				&xmm_dst_lo, &xmm_dst_hi,
-				&xmm_dst_lo, &xmm_dst_hi);
-
-	    save_128_aligned (
-		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	    dst += 16;
-	    w -= 16;
-	}
-
-	while (w)
-	{
-	    d = (uint32_t) *dst;
-
-	    *dst++ = (uint8_t) pack_1x128_32 (
-		pix_multiply_1x128 (
-		    xmm_alpha,
-		    unpack_32_1x128 (d)));
-	    w--;
-	}
-    }
-
-}
-
-static void
-sse2_composite_in_8_8 (pixman_implementation_t *imp,
-                       pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint8_t     *dst_line, *dst;
-    uint8_t     *src_line, *src;
-    int src_stride, dst_stride;
-    int32_t w;
-    uint32_t s, d;
-
-    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && ((uintptr_t)dst & 15))
-	{
-	    s = (uint32_t) *src++;
-	    d = (uint32_t) *dst;
-
-	    *dst++ = (uint8_t) pack_1x128_32 (
-		pix_multiply_1x128 (
-		    unpack_32_1x128 (s), unpack_32_1x128 (d)));
-	    w--;
-	}
-
-	while (w >= 16)
-	{
-	    xmm_src = load_128_unaligned ((__m128i*)src);
-	    xmm_dst = load_128_aligned ((__m128i*)dst);
-
-	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-	    pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
-				&xmm_dst_lo, &xmm_dst_hi,
-				&xmm_dst_lo, &xmm_dst_hi);
-
-	    save_128_aligned (
-		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	    src += 16;
-	    dst += 16;
-	    w -= 16;
-	}
-
-	while (w)
-	{
-	    s = (uint32_t) *src++;
-	    d = (uint32_t) *dst;
-
-	    *dst++ = (uint8_t) pack_1x128_32 (
-		pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (d)));
-	    w--;
-	}
-    }
-
-}
-
-static void
-sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
-			  pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint8_t     *dst_line, *dst;
-    uint8_t     *mask_line, *mask;
-    int dst_stride, mask_stride;
-    int32_t w;
-    uint32_t src;
-    uint32_t d;
-
-    __m128i xmm_alpha;
-    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	while (w && ((uintptr_t)dst & 15))
-	{
-	    uint8_t m = *mask++;
-	    d = (uint32_t) *dst;
-
-	    *dst++ = (uint8_t) pack_1x128_32 (
-		_mm_adds_epu16 (
-		    pix_multiply_1x128 (
-			xmm_alpha, unpack_32_1x128 (m)),
-		    unpack_32_1x128 (d)));
-	    w--;
-	}
-
-	while (w >= 16)
-	{
-	    xmm_mask = load_128_unaligned ((__m128i*)mask);
-	    xmm_dst = load_128_aligned ((__m128i*)dst);
-
-	    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-	    pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
-				&xmm_mask_lo, &xmm_mask_hi,
-				&xmm_mask_lo, &xmm_mask_hi);
-
-	    xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
-	    xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
-
-	    save_128_aligned (
-		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-	    mask += 16;
-	    dst += 16;
-	    w -= 16;
-	}
-
-	while (w)
-	{
-	    uint8_t m = (uint32_t) *mask++;
-	    d = (uint32_t) *dst;
-
-	    *dst++ = (uint8_t) pack_1x128_32 (
-		_mm_adds_epu16 (
-		    pix_multiply_1x128 (
-			xmm_alpha, unpack_32_1x128 (m)),
-		    unpack_32_1x128 (d)));
-
-	    w--;
-	}
-    }
-
-}
-
-static void
-sse2_composite_add_n_8 (pixman_implementation_t *imp,
-			pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint8_t     *dst_line, *dst;
-    int dst_stride;
-    int32_t w;
-    uint32_t src;
-
-    __m128i xmm_src;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    src >>= 24;
-
-    if (src == 0x00)
-	return;
-
-    if (src == 0xff)
-    {
-	pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
-		     8, dest_x, dest_y, width, height, 0xff);
-
-	return;
-    }
-
-    src = (src << 24) | (src << 16) | (src << 8) | src;
-    xmm_src = _mm_set_epi32 (src, src, src, src);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	w = width;
-
-	while (w && ((uintptr_t)dst & 15))
-	{
-	    *dst = (uint8_t)_mm_cvtsi128_si32 (
-		_mm_adds_epu8 (
-		    xmm_src,
-		    _mm_cvtsi32_si128 (*dst)));
-
-	    w--;
-	    dst++;
-	}
-
-	while (w >= 16)
-	{
-	    save_128_aligned (
-		(__m128i*)dst, _mm_adds_epu8 (xmm_src, load_128_aligned  ((__m128i*)dst)));
-
-	    dst += 16;
-	    w -= 16;
-	}
-
-	while (w)
-	{
-	    *dst = (uint8_t)_mm_cvtsi128_si32 (
-		_mm_adds_epu8 (
-		    xmm_src,
-		    _mm_cvtsi32_si128 (*dst)));
-
-	    w--;
-	    dst++;
-	}
-    }
-
-}
-
-static void
-sse2_composite_add_8_8 (pixman_implementation_t *imp,
-			pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint8_t     *dst_line, *dst;
-    uint8_t     *src_line, *src;
-    int dst_stride, src_stride;
-    int32_t w;
-    uint16_t t;
-
-    PIXMAN_IMAGE_GET_LINE (
-	src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	src = src_line;
-
-	dst_line += dst_stride;
-	src_line += src_stride;
-	w = width;
-
-	/* Small head */
-	while (w && (uintptr_t)dst & 3)
-	{
-	    t = (*dst) + (*src++);
-	    *dst++ = t | (0 - (t >> 8));
-	    w--;
-	}
-
-	sse2_combine_add_u (imp, op,
-			    (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
-
-	/* Small tail */
-	dst += w & 0xfffc;
-	src += w & 0xfffc;
-
-	w &= 3;
-
-	while (w)
-	{
-	    t = (*dst) + (*src++);
-	    *dst++ = t | (0 - (t >> 8));
-	    w--;
-	}
-    }
-
-}
-
-static void
-sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
-                              pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    int dst_stride, src_stride;
-
-    PIXMAN_IMAGE_GET_LINE (
-	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-
-	sse2_combine_add_u (imp, op, dst, src, NULL, width);
-    }
-}
-
-static void
-sse2_composite_add_n_8888 (pixman_implementation_t *imp,
-			   pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t *dst_line, *dst, src;
-    int dst_stride;
-
-    __m128i xmm_src;
-
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-    if (src == 0)
-	return;
-
-    if (src == ~0)
-    {
-	pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32,
-		     dest_x, dest_y, width, height, ~0);
-
-	return;
-    }
-
-    xmm_src = _mm_set_epi32 (src, src, src, src);
-    while (height--)
-    {
-	int w = width;
-	uint32_t d;
-
-	dst = dst_line;
-	dst_line += dst_stride;
-
-	while (w && (uintptr_t)dst & 15)
-	{
-	    d = *dst;
-	    *dst++ =
-		_mm_cvtsi128_si32 ( _mm_adds_epu8 (xmm_src, _mm_cvtsi32_si128 (d)));
-	    w--;
-	}
-
-	while (w >= 4)
-	{
-	    save_128_aligned
-		((__m128i*)dst,
-		 _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst)));
-
-	    dst += 4;
-	    w -= 4;
-	}
-
-	while (w--)
-	{
-	    d = *dst;
-	    *dst++ =
-		_mm_cvtsi128_si32 (_mm_adds_epu8 (xmm_src,
-						  _mm_cvtsi32_si128 (d)));
-	}
-    }
-}
-
-static void
-sse2_composite_add_n_8_8888 (pixman_implementation_t *imp,
-			     pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t     *dst_line, *dst;
-    uint8_t     *mask_line, *mask;
-    int dst_stride, mask_stride;
-    int32_t w;
-    uint32_t src;
-
-    __m128i xmm_src;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-    if (src == 0)
-	return;
-    xmm_src = expand_pixel_32_1x128 (src);
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	while (w && ((uintptr_t)dst & 15))
-	{
-	    uint8_t m = *mask++;
-	    if (m)
-	    {
-		*dst = pack_1x128_32
-		    (_mm_adds_epu16
-		     (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)),
-		      unpack_32_1x128 (*dst)));
-	    }
-	    dst++;
-	    w--;
-	}
-
-	while (w >= 4)
-	{
-	    uint32_t m;
-            memcpy(&m, mask, sizeof(uint32_t));
-
-	    if (m)
-	    {
-		__m128i xmm_mask_lo, xmm_mask_hi;
-		__m128i xmm_dst_lo, xmm_dst_hi;
-
-		__m128i xmm_dst = load_128_aligned ((__m128i*)dst);
-		__m128i xmm_mask =
-		    _mm_unpacklo_epi8 (unpack_32_1x128(m),
-				       _mm_setzero_si128 ());
-
-		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
-					&xmm_mask_lo, &xmm_mask_hi);
-
-		pix_multiply_2x128 (&xmm_src, &xmm_src,
-				    &xmm_mask_lo, &xmm_mask_hi,
-				    &xmm_mask_lo, &xmm_mask_hi);
-
-		xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
-		xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
-
-		save_128_aligned (
-		    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-	    }
-
-	    w -= 4;
-	    dst += 4;
-	    mask += 4;
-	}
-
-	while (w)
-	{
-	    uint8_t m = *mask++;
-	    if (m)
-	    {
-		*dst = pack_1x128_32
-		    (_mm_adds_epu16
-		     (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)),
-		      unpack_32_1x128 (*dst)));
-	    }
-	    dst++;
-	    w--;
-	}
-    }
-}
-
-static pixman_bool_t
-sse2_blt (pixman_implementation_t *imp,
-          uint32_t *               src_bits,
-          uint32_t *               dst_bits,
-          int                      src_stride,
-          int                      dst_stride,
-          int                      src_bpp,
-          int                      dst_bpp,
-          int                      src_x,
-          int                      src_y,
-          int                      dest_x,
-          int                      dest_y,
-          int                      width,
-          int                      height)
-{
-    uint8_t *   src_bytes;
-    uint8_t *   dst_bytes;
-    int byte_width;
-
-    if (src_bpp != dst_bpp)
-	return FALSE;
-
-    if (src_bpp == 16)
-    {
-	src_stride = src_stride * (int) sizeof (uint32_t) / 2;
-	dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
-	src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
-	dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
-	byte_width = 2 * width;
-	src_stride *= 2;
-	dst_stride *= 2;
-    }
-    else if (src_bpp == 32)
-    {
-	src_stride = src_stride * (int) sizeof (uint32_t) / 4;
-	dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
-	src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
-	dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
-	byte_width = 4 * width;
-	src_stride *= 4;
-	dst_stride *= 4;
-    }
-    else
-    {
-	return FALSE;
-    }
-
-    while (height--)
-    {
-	int w;
-	uint8_t *s = src_bytes;
-	uint8_t *d = dst_bytes;
-	src_bytes += src_stride;
-	dst_bytes += dst_stride;
-	w = byte_width;
-
-	while (w >= 2 && ((uintptr_t)d & 3))
-	{
-            memmove(d, s, 2);
-	    w -= 2;
-	    s += 2;
-	    d += 2;
-	}
-
-	while (w >= 4 && ((uintptr_t)d & 15))
-	{
-            memmove(d, s, 4);
-
-	    w -= 4;
-	    s += 4;
-	    d += 4;
-	}
-
-	while (w >= 64)
-	{
-	    __m128i xmm0, xmm1, xmm2, xmm3;
-
-	    xmm0 = load_128_unaligned ((__m128i*)(s));
-	    xmm1 = load_128_unaligned ((__m128i*)(s + 16));
-	    xmm2 = load_128_unaligned ((__m128i*)(s + 32));
-	    xmm3 = load_128_unaligned ((__m128i*)(s + 48));
-
-	    save_128_aligned ((__m128i*)(d),    xmm0);
-	    save_128_aligned ((__m128i*)(d + 16), xmm1);
-	    save_128_aligned ((__m128i*)(d + 32), xmm2);
-	    save_128_aligned ((__m128i*)(d + 48), xmm3);
-
-	    s += 64;
-	    d += 64;
-	    w -= 64;
-	}
-
-	while (w >= 16)
-	{
-	    save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) );
-
-	    w -= 16;
-	    d += 16;
-	    s += 16;
-	}
-
-	while (w >= 4)
-	{
-            memmove(d, s, 4);
-
-	    w -= 4;
-	    s += 4;
-	    d += 4;
-	}
-
-	if (w >= 2)
-	{
-            memmove(d, s, 2);
-	    w -= 2;
-	    s += 2;
-	    d += 2;
-	}
-    }
-
-    return TRUE;
-}
-
-static void
-sse2_composite_copy_area (pixman_implementation_t *imp,
-                          pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    sse2_blt (imp, src_image->bits.bits,
-	      dest_image->bits.bits,
-	      src_image->bits.rowstride,
-	      dest_image->bits.rowstride,
-	      PIXMAN_FORMAT_BPP (src_image->bits.format),
-	      PIXMAN_FORMAT_BPP (dest_image->bits.format),
-	      src_x, src_y, dest_x, dest_y, width, height);
-}
-
-static void
-sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
-                                 pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t    *src, *src_line, s;
-    uint32_t    *dst, *dst_line, d;
-    uint8_t         *mask, *mask_line;
-    int src_stride, mask_stride, dst_stride;
-    int32_t w;
-    __m128i ms;
-
-    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-        src = src_line;
-        src_line += src_stride;
-        dst = dst_line;
-        dst_line += dst_stride;
-        mask = mask_line;
-        mask_line += mask_stride;
-
-        w = width;
-
-        while (w && (uintptr_t)dst & 15)
-        {
-            uint8_t m = *mask++;
-            s = 0xff000000 | *src++;
-            d = *dst;
-            ms = unpack_32_1x128 (s);
-
-            if (m != 0xff)
-            {
-		__m128i ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
-		__m128i md = unpack_32_1x128 (d);
-
-                ms = in_over_1x128 (&ms, &mask_00ff, &ma, &md);
-            }
-
-            *dst++ = pack_1x128_32 (ms);
-            w--;
-        }
-
-        while (w >= 4)
-        {
-            uint32_t m;
-            memcpy(&m, mask, sizeof(uint32_t));
-            xmm_src = _mm_or_si128 (
-		load_128_unaligned ((__m128i*)src), mask_ff000000);
-
-            if (m == 0xffffffff)
-            {
-                save_128_aligned ((__m128i*)dst, xmm_src);
-            }
-            else
-            {
-                xmm_dst = load_128_aligned ((__m128i*)dst);
-
-                xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
-
-                unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-                unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-                unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-                expand_alpha_rev_2x128 (
-		    xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-                in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
-			       &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi,
-			       &xmm_dst_lo, &xmm_dst_hi);
-
-                save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-            }
-
-            src += 4;
-            dst += 4;
-            mask += 4;
-            w -= 4;
-        }
-
-        while (w)
-        {
-            uint8_t m = *mask++;
-
-            if (m)
-            {
-                s = 0xff000000 | *src;
-
-                if (m == 0xff)
-                {
-                    *dst = s;
-                }
-                else
-                {
-		    __m128i ma, md, ms;
-
-                    d = *dst;
-
-		    ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
-		    md = unpack_32_1x128 (d);
-		    ms = unpack_32_1x128 (s);
-
-                    *dst = pack_1x128_32 (in_over_1x128 (&ms, &mask_00ff, &ma, &md));
-                }
-
-            }
-
-            src++;
-            dst++;
-            w--;
-        }
-    }
-
-}
-
-static void
-sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
-                                 pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t    *src, *src_line, s;
-    uint32_t    *dst, *dst_line, d;
-    uint8_t         *mask, *mask_line;
-    int src_stride, mask_stride, dst_stride;
-    int32_t w;
-
-    __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
-    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-        src = src_line;
-        src_line += src_stride;
-        dst = dst_line;
-        dst_line += dst_stride;
-        mask = mask_line;
-        mask_line += mask_stride;
-
-        w = width;
-
-        while (w && (uintptr_t)dst & 15)
-        {
-	    uint32_t sa;
-            uint8_t m = *mask++;
-
-            s = *src++;
-            d = *dst;
-
-	    sa = s >> 24;
-
-	    if (m)
-	    {
-		if (sa == 0xff && m == 0xff)
-		{
-		    *dst = s;
-		}
-		else
-		{
-		    __m128i ms, md, ma, msa;
-
-		    ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
-		    ms = unpack_32_1x128 (s);
-		    md = unpack_32_1x128 (d);
-
-		    msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
-
-		    *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
-		}
-	    }
-
-	    dst++;
-            w--;
-        }
-
-        while (w >= 4)
-        {
-            uint32_t m;
-            memcpy(&m, mask, sizeof(uint32_t));
-
-	    if (m)
-	    {
-		xmm_src = load_128_unaligned ((__m128i*)src);
-
-		if (m == 0xffffffff && is_opaque (xmm_src))
-		{
-		    save_128_aligned ((__m128i *)dst, xmm_src);
-		}
-		else
-		{
-		    xmm_dst = load_128_aligned ((__m128i *)dst);
-
-		    xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
-
-		    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-		    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-		    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-		    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
-		    expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-		    in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
-				   &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-		    save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-		}
-	    }
-
-            src += 4;
-            dst += 4;
-            mask += 4;
-            w -= 4;
-        }
-
-        while (w)
-        {
-	    uint32_t sa;
-            uint8_t m = *mask++;
-
-            s = *src++;
-            d = *dst;
-
-	    sa = s >> 24;
-
-	    if (m)
-	    {
-		if (sa == 0xff && m == 0xff)
-		{
-		    *dst = s;
-		}
-		else
-		{
-		    __m128i ms, md, ma, msa;
-
-		    ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
-		    ms = unpack_32_1x128 (s);
-		    md = unpack_32_1x128 (d);
-
-		    msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
-
-		    *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
-		}
-	    }
-
-	    dst++;
-            w--;
-        }
-    }
-
-}
-
-static void
-sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
-				    pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src;
-    uint32_t    *dst_line, *dst;
-    __m128i xmm_src;
-    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_dsta_hi, xmm_dsta_lo;
-    int dst_stride;
-    int32_t w;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
-    xmm_src = expand_pixel_32_1x128 (src);
-
-    while (height--)
-    {
-	dst = dst_line;
-
-	dst_line += dst_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 15)
-	{
-	    __m128i vd;
-
-	    vd = unpack_32_1x128 (*dst);
-
-	    *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
-					      xmm_src));
-	    w--;
-	    dst++;
-	}
-
-	while (w >= 4)
-	{
-	    __m128i tmp_lo, tmp_hi;
-
-	    xmm_dst = load_128_aligned ((__m128i*)dst);
-
-	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-	    expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dsta_lo, &xmm_dsta_hi);
-
-	    tmp_lo = xmm_src;
-	    tmp_hi = xmm_src;
-
-	    over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
-			&xmm_dsta_lo, &xmm_dsta_hi,
-			&tmp_lo, &tmp_hi);
-
-	    save_128_aligned (
-		(__m128i*)dst, pack_2x128_128 (tmp_lo, tmp_hi));
-
-	    w -= 4;
-	    dst += 4;
-	}
-
-	while (w)
-	{
-	    __m128i vd;
-
-	    vd = unpack_32_1x128 (*dst);
-
-	    *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
-					      xmm_src));
-	    w--;
-	    dst++;
-	}
-
-    }
-
-}
-
-static void
-sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
-				    pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t    *src, *src_line, s;
-    uint32_t    *dst, *dst_line, d;
-    uint32_t    *mask, *mask_line;
-    uint32_t    m;
-    int src_stride, mask_stride, dst_stride;
-    int32_t w;
-
-    __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
-    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-        src = src_line;
-        src_line += src_stride;
-        dst = dst_line;
-        dst_line += dst_stride;
-        mask = mask_line;
-        mask_line += mask_stride;
-
-        w = width;
-
-        while (w && (uintptr_t)dst & 15)
-        {
-	    uint32_t sa;
-
-            s = *src++;
-            m = (*mask++) >> 24;
-            d = *dst;
-
-	    sa = s >> 24;
-
-	    if (m)
-	    {
-		if (sa == 0xff && m == 0xff)
-		{
-		    *dst = s;
-		}
-		else
-		{
-		    __m128i ms, md, ma, msa;
-
-		    ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
-		    ms = unpack_32_1x128 (s);
-		    md = unpack_32_1x128 (d);
-
-		    msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
-
-		    *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
-		}
-	    }
-
-	    dst++;
-            w--;
-        }
-
-        while (w >= 4)
-        {
-	    xmm_mask = load_128_unaligned ((__m128i*)mask);
-
-	    if (!is_transparent (xmm_mask))
-	    {
-		xmm_src = load_128_unaligned ((__m128i*)src);
-
-		if (is_opaque (xmm_mask) && is_opaque (xmm_src))
-		{
-		    save_128_aligned ((__m128i *)dst, xmm_src);
-		}
-		else
-		{
-		    xmm_dst = load_128_aligned ((__m128i *)dst);
-
-		    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-		    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-		    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-		    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
-		    expand_alpha_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-		    in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
-				   &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-		    save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-		}
-	    }
-
-            src += 4;
-            dst += 4;
-            mask += 4;
-            w -= 4;
-        }
-
-        while (w)
-        {
-	    uint32_t sa;
-
-            s = *src++;
-            m = (*mask++) >> 24;
-            d = *dst;
-
-	    sa = s >> 24;
-
-	    if (m)
-	    {
-		if (sa == 0xff && m == 0xff)
-		{
-		    *dst = s;
-		}
-		else
-		{
-		    __m128i ms, md, ma, msa;
-
-		    ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
-		    ms = unpack_32_1x128 (s);
-		    md = unpack_32_1x128 (d);
-
-		    msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
-
-		    *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
-		}
-	    }
-
-	    dst++;
-            w--;
-        }
-    }
-
-}
-
-/* A variant of 'sse2_combine_over_u' with minor tweaks */
-static force_inline void
-scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t*       pd,
-                                             const uint32_t* ps,
-                                             int32_t         w,
-                                             pixman_fixed_t  vx,
-                                             pixman_fixed_t  unit_x,
-                                             pixman_fixed_t  src_width_fixed,
-                                             pixman_bool_t   fully_transparent_src)
-{
-    uint32_t s, d;
-    const uint32_t* pm = NULL;
-
-    __m128i xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_src_lo, xmm_src_hi;
-    __m128i xmm_alpha_lo, xmm_alpha_hi;
-
-    if (fully_transparent_src)
-	return;
-
-    /* Align dst on a 16-byte boundary */
-    while (w && ((uintptr_t)pd & 15))
-    {
-	d = *pd;
-	s = combine1 (ps + pixman_fixed_to_int (vx), pm);
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-
-	*pd++ = core_combine_over_u_pixel_sse2 (s, d);
-	if (pm)
-	    pm++;
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	__m128i tmp;
-	uint32_t tmp1, tmp2, tmp3, tmp4;
-
-	tmp1 = *(ps + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-	tmp2 = *(ps + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-	tmp3 = *(ps + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-	tmp4 = *(ps + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-
-	tmp = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
-
-	xmm_src_hi = combine4 ((__m128i*)&tmp, (__m128i*)pm);
-
-	if (is_opaque (xmm_src_hi))
-	{
-	    save_128_aligned ((__m128i*)pd, xmm_src_hi);
-	}
-	else if (!is_zero (xmm_src_hi))
-	{
-	    xmm_dst_hi = load_128_aligned ((__m128i*) pd);
-
-	    unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-	    unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-	    expand_alpha_2x128 (
-		xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
-
-	    over_2x128 (&xmm_src_lo, &xmm_src_hi,
-			&xmm_alpha_lo, &xmm_alpha_hi,
-			&xmm_dst_lo, &xmm_dst_hi);
-
-	    /* rebuid the 4 pixel data and save*/
-	    save_128_aligned ((__m128i*)pd,
-			      pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-	}
-
-	w -= 4;
-	pd += 4;
-	if (pm)
-	    pm += 4;
-    }
-
-    while (w)
-    {
-	d = *pd;
-	s = combine1 (ps + pixman_fixed_to_int (vx), pm);
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-
-	*pd++ = core_combine_over_u_pixel_sse2 (s, d);
-	if (pm)
-	    pm++;
-
-	w--;
-    }
-}
-
-FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,
-		       scaled_nearest_scanline_sse2_8888_8888_OVER,
-		       uint32_t, uint32_t, COVER)
-FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER,
-		       scaled_nearest_scanline_sse2_8888_8888_OVER,
-		       uint32_t, uint32_t, NONE)
-FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,
-		       scaled_nearest_scanline_sse2_8888_8888_OVER,
-		       uint32_t, uint32_t, PAD)
-FAST_NEAREST_MAINLOOP (sse2_8888_8888_normal_OVER,
-		       scaled_nearest_scanline_sse2_8888_8888_OVER,
-		       uint32_t, uint32_t, NORMAL)
-
-static force_inline void
-scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
-					       uint32_t *       dst,
-					       const uint32_t * src,
-					       int32_t          w,
-					       pixman_fixed_t   vx,
-					       pixman_fixed_t   unit_x,
-					       pixman_fixed_t   src_width_fixed,
-					       pixman_bool_t    zero_src)
-{
-    __m128i xmm_mask;
-    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
-    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-    __m128i xmm_alpha_lo, xmm_alpha_hi;
-
-    if (zero_src || (*mask >> 24) == 0)
-	return;
-
-    xmm_mask = create_mask_16_128 (*mask >> 24);
-
-    while (w && (uintptr_t)dst & 15)
-    {
-	uint32_t s = *(src + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-
-	if (s)
-	{
-	    uint32_t d = *dst;
-
-	    __m128i ms = unpack_32_1x128 (s);
-	    __m128i alpha     = expand_alpha_1x128 (ms);
-	    __m128i dest      = xmm_mask;
-	    __m128i alpha_dst = unpack_32_1x128 (d);
-
-	    *dst = pack_1x128_32 (
-		in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
-	}
-	dst++;
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	uint32_t tmp1, tmp2, tmp3, tmp4;
-
-	tmp1 = *(src + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-	tmp2 = *(src + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-	tmp3 = *(src + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-	tmp4 = *(src + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-
-	xmm_src = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
-
-	if (!is_zero (xmm_src))
-	{
-	    xmm_dst = load_128_aligned ((__m128i*)dst);
-
-	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-	    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
-			        &xmm_alpha_lo, &xmm_alpha_hi);
-
-	    in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
-			   &xmm_alpha_lo, &xmm_alpha_hi,
-			   &xmm_mask, &xmm_mask,
-			   &xmm_dst_lo, &xmm_dst_hi);
-
-	    save_128_aligned (
-		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-	}
-
-	dst += 4;
-	w -= 4;
-    }
-
-    while (w)
-    {
-	uint32_t s = *(src + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-
-	if (s)
-	{
-	    uint32_t d = *dst;
-
-	    __m128i ms = unpack_32_1x128 (s);
-	    __m128i alpha = expand_alpha_1x128 (ms);
-	    __m128i mask  = xmm_mask;
-	    __m128i dest  = unpack_32_1x128 (d);
-
-	    *dst = pack_1x128_32 (
-		in_over_1x128 (&ms, &alpha, &mask, &dest));
-	}
-
-	dst++;
-	w--;
-    }
-
-}
-
-FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
-			      scaled_nearest_scanline_sse2_8888_n_8888_OVER,
-			      uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)
-FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
-			      scaled_nearest_scanline_sse2_8888_n_8888_OVER,
-			      uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)
-FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
-			      scaled_nearest_scanline_sse2_8888_n_8888_OVER,
-			      uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
-FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,
-			      scaled_nearest_scanline_sse2_8888_n_8888_OVER,
-			      uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)
-
-#if PSHUFD_IS_FAST
-
-/***********************************************************************************/
-
-# define BILINEAR_DECLARE_VARIABLES						\
-    const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt);	\
-    const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb);	\
-    const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1);		\
-    const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x,	\
-					   unit_x, -unit_x, unit_x, -unit_x);	\
-    const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4,		\
-					   unit_x * 4, -unit_x * 4,		\
-					   unit_x * 4, -unit_x * 4,		\
-					   unit_x * 4, -unit_x * 4);		\
-    const __m128i xmm_zero = _mm_setzero_si128 ();				\
-    __m128i xmm_x = _mm_set_epi16 (vx + unit_x * 3, -(vx + 1) - unit_x * 3,	\
-				   vx + unit_x * 2, -(vx + 1) - unit_x * 2,	\
-				   vx + unit_x * 1, -(vx + 1) - unit_x * 1,	\
-				   vx + unit_x * 0, -(vx + 1) - unit_x * 0);	\
-    __m128i xmm_wh_state;
-
-#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase_)			\
-do {										\
-    int phase = phase_;								\
-    __m128i xmm_wh, xmm_a, xmm_b;						\
-    /* fetch 2x2 pixel block into sse2 registers */				\
-    __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]);		\
-    __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]);		\
-    vx += unit_x;								\
-    /* vertical interpolation */						\
-    xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt);	\
-    xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb);	\
-    xmm_a = _mm_add_epi16 (xmm_a, xmm_b);						\
-    /* calculate horizontal weights */						\
-    if (phase <= 0)								\
-    {										\
-	xmm_wh_state = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x,		\
-					16 - BILINEAR_INTERPOLATION_BITS));	\
-	xmm_x = _mm_add_epi16 (xmm_x, (phase < 0) ? xmm_ux1 : xmm_ux4);		\
-	phase = 0;								\
-    }										\
-    xmm_wh = _mm_shuffle_epi32 (xmm_wh_state, _MM_SHUFFLE (phase, phase,	\
-							   phase, phase));	\
-    /* horizontal interpolation */						\
-    xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 (		\
-		xmm_a, _MM_SHUFFLE (1, 0, 3, 2)), xmm_a), xmm_wh);		\
-    /* shift the result */							\
-    pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS * 2);		\
-} while (0)
-
-#else /************************************************************************/
-
-# define BILINEAR_DECLARE_VARIABLES						\
-    const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt);	\
-    const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb);	\
-    const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1);		\
-    const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x,	\
-					  unit_x, -unit_x, unit_x, -unit_x);	\
-    const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4,		\
-					   unit_x * 4, -unit_x * 4,		\
-					   unit_x * 4, -unit_x * 4,		\
-					   unit_x * 4, -unit_x * 4);		\
-    const __m128i xmm_zero = _mm_setzero_si128 ();				\
-    __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1),		\
-				   vx, -(vx + 1), vx, -(vx + 1))
-
-#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase)			\
-do {										\
-    __m128i xmm_wh, xmm_a, xmm_b;						\
-    /* fetch 2x2 pixel block into sse2 registers */				\
-    __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]);		\
-    __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]);		\
-    (void)xmm_ux4; /* suppress warning: unused variable 'xmm_ux4' */		\
-    vx += unit_x;								\
-    /* vertical interpolation */						\
-    xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt);	\
-    xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb);	\
-    xmm_a = _mm_add_epi16 (xmm_a, xmm_b);					\
-    /* calculate horizontal weights */						\
-    xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x,			\
-					16 - BILINEAR_INTERPOLATION_BITS));	\
-    xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1);					\
-    /* horizontal interpolation */						\
-    xmm_b = _mm_unpacklo_epi64 (/* any value is fine here */ xmm_b, xmm_a);	\
-    xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh);		\
-    /* shift the result */							\
-    pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS * 2);		\
-} while (0)
-
-/***********************************************************************************/
-
-#endif
-
-#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix);					\
-do {										\
-	__m128i xmm_pix;							\
-	BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix, -1);			\
-	xmm_pix = _mm_packs_epi32 (xmm_pix, xmm_pix);				\
-	xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix);				\
-	pix = _mm_cvtsi128_si32 (xmm_pix);					\
-} while(0)
-
-#define BILINEAR_INTERPOLATE_FOUR_PIXELS(pix);					\
-do {										\
-	__m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4;				\
-	BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix1, 0);			\
-	BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix2, 1);			\
-	BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix3, 2);			\
-	BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix4, 3);			\
-	xmm_pix1 = _mm_packs_epi32 (xmm_pix1, xmm_pix2);			\
-	xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4);			\
-	pix = _mm_packus_epi16 (xmm_pix1, xmm_pix3);				\
-} while(0)
-
-#define BILINEAR_SKIP_ONE_PIXEL()						\
-do {										\
-    vx += unit_x;								\
-    xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1);					\
-} while(0)
-
-#define BILINEAR_SKIP_FOUR_PIXELS()						\
-do {										\
-    vx += unit_x * 4;								\
-    xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4);					\
-} while(0)
-
-/***********************************************************************************/
-
-static force_inline void
-scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t *       dst,
-					     const uint32_t * mask,
-					     const uint32_t * src_top,
-					     const uint32_t * src_bottom,
-					     int32_t          w,
-					     int              wt,
-					     int              wb,
-					     pixman_fixed_t   vx_,
-					     pixman_fixed_t   unit_x_,
-					     pixman_fixed_t   max_vx,
-					     pixman_bool_t    zero_src)
-{
-    intptr_t vx = vx_;
-    intptr_t unit_x = unit_x_;
-    BILINEAR_DECLARE_VARIABLES;
-    uint32_t pix1, pix2;
-
-    while (w && ((uintptr_t)dst & 15))
-    {
-	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
-	*dst++ = pix1;
-	w--;
-    }
-
-    while ((w -= 4) >= 0) {
-	__m128i xmm_src;
-	BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src);
-	_mm_store_si128 ((__m128i *)dst, xmm_src);
-	dst += 4;
-    }
-
-    if (w & 2)
-    {
-	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
-	BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
-	*dst++ = pix1;
-	*dst++ = pix2;
-    }
-
-    if (w & 1)
-    {
-	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
-	*dst = pix1;
-    }
-
-}
-
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,
-			       scaled_bilinear_scanline_sse2_8888_8888_SRC,
-			       uint32_t, uint32_t, uint32_t,
-			       COVER, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,
-			       scaled_bilinear_scanline_sse2_8888_8888_SRC,
-			       uint32_t, uint32_t, uint32_t,
-			       PAD, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,
-			       scaled_bilinear_scanline_sse2_8888_8888_SRC,
-			       uint32_t, uint32_t, uint32_t,
-			       NONE, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,
-			       scaled_bilinear_scanline_sse2_8888_8888_SRC,
-			       uint32_t, uint32_t, uint32_t,
-			       NORMAL, FLAG_NONE)
-
-static force_inline void
-scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t *       dst,
-					     const uint32_t * mask,
-					     const uint32_t * src_top,
-					     const uint32_t * src_bottom,
-					     int32_t          w,
-					     int              wt,
-					     int              wb,
-					     pixman_fixed_t   vx_,
-					     pixman_fixed_t   unit_x_,
-					     pixman_fixed_t   max_vx,
-					     pixman_bool_t    zero_src)
-{
-    intptr_t vx = vx_;
-    intptr_t unit_x = unit_x_;
-    BILINEAR_DECLARE_VARIABLES;
-    uint32_t pix1, pix2;
-
-    while (w && ((uintptr_t)dst & 15))
-    {
-	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
-	*dst++ = pix1 | 0xFF000000;
-	w--;
-    }
-
-    while ((w -= 4) >= 0) {
-	__m128i xmm_src;
-	BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src);
-	_mm_store_si128 ((__m128i *)dst, _mm_or_si128 (xmm_src, mask_ff000000));
-	dst += 4;
-    }
-
-    if (w & 2)
-    {
-	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
-	BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
-	*dst++ = pix1 | 0xFF000000;
-	*dst++ = pix2 | 0xFF000000;
-    }
-
-    if (w & 1)
-    {
-	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
-	*dst = pix1 | 0xFF000000;
-    }
-}
-
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_cover_SRC,
-			       scaled_bilinear_scanline_sse2_x888_8888_SRC,
-			       uint32_t, uint32_t, uint32_t,
-			       COVER, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_pad_SRC,
-			       scaled_bilinear_scanline_sse2_x888_8888_SRC,
-			       uint32_t, uint32_t, uint32_t,
-			       PAD, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_normal_SRC,
-			       scaled_bilinear_scanline_sse2_x888_8888_SRC,
-			       uint32_t, uint32_t, uint32_t,
-			       NORMAL, FLAG_NONE)
-
-static force_inline void
-scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t *       dst,
-					      const uint32_t * mask,
-					      const uint32_t * src_top,
-					      const uint32_t * src_bottom,
-					      int32_t          w,
-					      int              wt,
-					      int              wb,
-					      pixman_fixed_t   vx_,
-					      pixman_fixed_t   unit_x_,
-					      pixman_fixed_t   max_vx,
-					      pixman_bool_t    zero_src)
-{
-    intptr_t vx = vx_;
-    intptr_t unit_x = unit_x_;
-    BILINEAR_DECLARE_VARIABLES;
-    uint32_t pix1, pix2;
-
-    while (w && ((uintptr_t)dst & 15))
-    {
-	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
-
-	if (pix1)
-	{
-	    pix2 = *dst;
-	    *dst = core_combine_over_u_pixel_sse2 (pix1, pix2);
-	}
-
-	w--;
-	dst++;
-    }
-
-    while (w  >= 4)
-    {
-	__m128i xmm_src;
-	__m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo;
-	__m128i xmm_alpha_hi, xmm_alpha_lo;
-
-	BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src);
-
-	if (!is_zero (xmm_src))
-	{
-	    if (is_opaque (xmm_src))
-	    {
-		save_128_aligned ((__m128i *)dst, xmm_src);
-	    }
-	    else
-	    {
-		__m128i xmm_dst = load_128_aligned ((__m128i *)dst);
-
-		unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-		expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
-		over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi,
-			    &xmm_dst_lo, &xmm_dst_hi);
-
-		save_128_aligned ((__m128i *)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-	    }
-	}
-
-	w -= 4;
-	dst += 4;
-    }
-
-    while (w)
-    {
-	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
-
-	if (pix1)
-	{
-	    pix2 = *dst;
-	    *dst = core_combine_over_u_pixel_sse2 (pix1, pix2);
-	}
-
-	w--;
-	dst++;
-    }
-}
-
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER,
-			       scaled_bilinear_scanline_sse2_8888_8888_OVER,
-			       uint32_t, uint32_t, uint32_t,
-			       COVER, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER,
-			       scaled_bilinear_scanline_sse2_8888_8888_OVER,
-			       uint32_t, uint32_t, uint32_t,
-			       PAD, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER,
-			       scaled_bilinear_scanline_sse2_8888_8888_OVER,
-			       uint32_t, uint32_t, uint32_t,
-			       NONE, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER,
-			       scaled_bilinear_scanline_sse2_8888_8888_OVER,
-			       uint32_t, uint32_t, uint32_t,
-			       NORMAL, FLAG_NONE)
-
-static force_inline void
-scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t *       dst,
-						const uint8_t  * mask,
-						const uint32_t * src_top,
-						const uint32_t * src_bottom,
-						int32_t          w,
-						int              wt,
-						int              wb,
-						pixman_fixed_t   vx_,
-						pixman_fixed_t   unit_x_,
-						pixman_fixed_t   max_vx,
-						pixman_bool_t    zero_src)
-{
-    intptr_t vx = vx_;
-    intptr_t unit_x = unit_x_;
-    BILINEAR_DECLARE_VARIABLES;
-    uint32_t pix1, pix2;
-
-    while (w && ((uintptr_t)dst & 15))
-    {
-	uint32_t sa;
-	uint8_t m = *mask++;
-
-	if (m)
-	{
-	    BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
-	    sa = pix1 >> 24;
-
-	    if (sa == 0xff && m == 0xff)
-	    {
-		*dst = pix1;
-	    }
-	    else
-	    {
-		__m128i ms, md, ma, msa;
-
-		pix2 = *dst;
-		ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
-		ms = unpack_32_1x128 (pix1);
-		md = unpack_32_1x128 (pix2);
-
-		msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
-
-		*dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
-	    }
-	}
-	else
-	{
-	    BILINEAR_SKIP_ONE_PIXEL ();
-	}
-
-	w--;
-	dst++;
-    }
-
-    while (w >= 4)
-    {
-        uint32_t m;
-
-	__m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
-	__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-	__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-
-        memcpy(&m, mask, sizeof(uint32_t));
-
-	if (m)
-	{
-	    BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src);
-
-	    if (m == 0xffffffff && is_opaque (xmm_src))
-	    {
-		save_128_aligned ((__m128i *)dst, xmm_src);
-	    }
-	    else
-	    {
-		xmm_dst = load_128_aligned ((__m128i *)dst);
-
-		xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
-
-		unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-		expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
-		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-		in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
-			       &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-		save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-	    }
-	}
-	else
-	{
-	    BILINEAR_SKIP_FOUR_PIXELS ();
-	}
-
-	w -= 4;
-	dst += 4;
-	mask += 4;
-    }
-
-    while (w)
-    {
-	uint32_t sa;
-	uint8_t m = *mask++;
-
-	if (m)
-	{
-	    BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
-	    sa = pix1 >> 24;
-
-	    if (sa == 0xff && m == 0xff)
-	    {
-		*dst = pix1;
-	    }
-	    else
-	    {
-		__m128i ms, md, ma, msa;
-
-		pix2 = *dst;
-		ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
-		ms = unpack_32_1x128 (pix1);
-		md = unpack_32_1x128 (pix2);
-
-		msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
-
-		*dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
-	    }
-	}
-	else
-	{
-	    BILINEAR_SKIP_ONE_PIXEL ();
-	}
-
-	w--;
-	dst++;
-    }
-}
-
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER,
-			       scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
-			       uint32_t, uint8_t, uint32_t,
-			       COVER, FLAG_HAVE_NON_SOLID_MASK)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER,
-			       scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
-			       uint32_t, uint8_t, uint32_t,
-			       PAD, FLAG_HAVE_NON_SOLID_MASK)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER,
-			       scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
-			       uint32_t, uint8_t, uint32_t,
-			       NONE, FLAG_HAVE_NON_SOLID_MASK)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER,
-			       scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
-			       uint32_t, uint8_t, uint32_t,
-			       NORMAL, FLAG_HAVE_NON_SOLID_MASK)
-
-static force_inline void
-scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t *       dst,
-						const uint32_t * mask,
-						const uint32_t * src_top,
-						const uint32_t * src_bottom,
-						int32_t          w,
-						int              wt,
-						int              wb,
-						pixman_fixed_t   vx_,
-						pixman_fixed_t   unit_x_,
-						pixman_fixed_t   max_vx,
-						pixman_bool_t    zero_src)
-{
-    intptr_t vx = vx_;
-    intptr_t unit_x = unit_x_;
-    BILINEAR_DECLARE_VARIABLES;
-    uint32_t pix1;
-    __m128i xmm_mask;
-
-    if (zero_src || (*mask >> 24) == 0)
-	return;
-
-    xmm_mask = create_mask_16_128 (*mask >> 24);
-
-    while (w && ((uintptr_t)dst & 15))
-    {
-	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
-	if (pix1)
-	{
-		uint32_t d = *dst;
-
-		__m128i ms = unpack_32_1x128 (pix1);
-		__m128i alpha     = expand_alpha_1x128 (ms);
-		__m128i dest      = xmm_mask;
-		__m128i alpha_dst = unpack_32_1x128 (d);
-
-		*dst = pack_1x128_32
-			(in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
-	}
-
-	dst++;
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	__m128i xmm_src;
-	BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src);
-
-	if (!is_zero (xmm_src))
-	{
-	    __m128i xmm_src_lo, xmm_src_hi;
-	    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-	    __m128i xmm_alpha_lo, xmm_alpha_hi;
-
-	    xmm_dst = load_128_aligned ((__m128i*)dst);
-
-	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-	    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
-				&xmm_alpha_lo, &xmm_alpha_hi);
-
-	    in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
-			   &xmm_alpha_lo, &xmm_alpha_hi,
-			   &xmm_mask, &xmm_mask,
-			   &xmm_dst_lo, &xmm_dst_hi);
-
-	    save_128_aligned
-		((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-	}
-
-	dst += 4;
-	w -= 4;
-    }
-
-    while (w)
-    {
-	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
-	if (pix1)
-	{
-		uint32_t d = *dst;
-
-		__m128i ms = unpack_32_1x128 (pix1);
-		__m128i alpha     = expand_alpha_1x128 (ms);
-		__m128i dest      = xmm_mask;
-		__m128i alpha_dst = unpack_32_1x128 (d);
-
-		*dst = pack_1x128_32
-			(in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
-	}
-
-	dst++;
-	w--;
-    }
-}
-
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
-			       scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
-			       uint32_t, uint32_t, uint32_t,
-			       COVER, FLAG_HAVE_SOLID_MASK)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
-			       scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
-			       uint32_t, uint32_t, uint32_t,
-			       PAD, FLAG_HAVE_SOLID_MASK)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
-			       scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
-			       uint32_t, uint32_t, uint32_t,
-			       NONE, FLAG_HAVE_SOLID_MASK)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,
-			       scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
-			       uint32_t, uint32_t, uint32_t,
-			       NORMAL, FLAG_HAVE_SOLID_MASK)
-
-static const pixman_fast_path_t sse2_fast_paths[] =
-{
-    /* PIXMAN_OP_OVER */
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, sse2_composite_over_n_8_0565),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, sse2_composite_over_n_8_0565),
-    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, sse2_composite_over_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, sse2_composite_over_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, sse2_composite_over_n_0565),
-    PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, sse2_composite_over_n_0565),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, sse2_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, sse2_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, sse2_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, sse2_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, sse2_composite_over_8888_0565),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, sse2_composite_over_8888_0565),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, sse2_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, sse2_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, sse2_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, sse2_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, sse2_composite_over_8888_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, sse2_composite_over_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, sse2_composite_over_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, sse2_composite_over_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, sse2_composite_over_8888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, sse2_composite_over_x888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, sse2_composite_over_x888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, sse2_composite_over_x888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, sse2_composite_over_x888_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, sse2_composite_over_x888_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, sse2_composite_over_x888_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, sse2_composite_over_x888_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, sse2_composite_over_x888_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, sse2_composite_over_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, sse2_composite_over_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, sse2_composite_over_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, sse2_composite_over_8888_n_8888),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, sse2_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, sse2_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, sse2_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, sse2_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, sse2_composite_over_n_8888_0565_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, sse2_composite_over_n_8888_0565_ca),
-    PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, sse2_composite_over_pixbuf_8888),
-    PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, sse2_composite_over_pixbuf_8888),
-    PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, sse2_composite_over_pixbuf_8888),
-    PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, sse2_composite_over_pixbuf_8888),
-    PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, sse2_composite_over_pixbuf_0565),
-    PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, sse2_composite_over_pixbuf_0565),
-    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
-    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
-    
-    /* PIXMAN_OP_OVER_REVERSE */
-    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, sse2_composite_over_reverse_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, sse2_composite_over_reverse_n_8888),
-
-    /* PIXMAN_OP_ADD */
-    PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, sse2_composite_add_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8_8),
-    PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888),
-    PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888),
-    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8),
-    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8),
-    PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, sse2_composite_add_n_8888),
-    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, sse2_composite_add_n_8888),
-    PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, sse2_composite_add_n_8888),
-    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, sse2_composite_add_n_8888),
-    PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, sse2_composite_add_n_8_8888),
-    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, sse2_composite_add_n_8_8888),
-    PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, sse2_composite_add_n_8_8888),
-    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, sse2_composite_add_n_8_8888),
-
-    /* PIXMAN_OP_SRC */
-    PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888),
-    PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, sse2_composite_src_n_8_8888),
-    PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, sse2_composite_src_n_8_8888),
-    PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, sse2_composite_src_n_8_8888),
-    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565),
-    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565),
-    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565),
-    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565),
-    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, sse2_composite_src_x888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, sse2_composite_src_x888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, sse2_composite_copy_area),
-    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, sse2_composite_copy_area),
-    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
-    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
-    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
-    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
-    PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, sse2_composite_copy_area),
-    PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, sse2_composite_copy_area),
-
-    /* PIXMAN_OP_IN */
-    PIXMAN_STD_FAST_PATH (IN, a8, null, a8, sse2_composite_in_8_8),
-    PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8),
-    PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8),
-
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
-
-    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
-    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
-    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
-    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
-
-    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
-    SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, sse2_8888_8888),
-
-    SIMPLE_BILINEAR_FAST_PATH_COVER  (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888),
-    SIMPLE_BILINEAR_FAST_PATH_COVER  (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888),
-    SIMPLE_BILINEAR_FAST_PATH_PAD    (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888),
-    SIMPLE_BILINEAR_FAST_PATH_PAD    (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888),
-    SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888),
-    SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888),
-
-    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
-    SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
-    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
-    SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
-
-    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
-    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
-    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
-    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
-
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888),
-    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888),
-
-    { PIXMAN_OP_NONE },
-};
-
-static uint32_t *
-sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
-{
-    int w = iter->width;
-    __m128i ff000000 = mask_ff000000;
-    uint32_t *dst = iter->buffer;
-    uint32_t *src = (uint32_t *)iter->bits;
-
-    iter->bits += iter->stride;
-
-    while (w && ((uintptr_t)dst) & 0x0f)
-    {
-	*dst++ = (*src++) | 0xff000000;
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	save_128_aligned (
-	    (__m128i *)dst, _mm_or_si128 (
-		load_128_unaligned ((__m128i *)src), ff000000));
-
-	dst += 4;
-	src += 4;
-	w -= 4;
-    }
-
-    while (w)
-    {
-	*dst++ = (*src++) | 0xff000000;
-	w--;
-    }
-
-    return iter->buffer;
-}
-
-static uint32_t *
-sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
-{
-    int w = iter->width;
-    uint32_t *dst = iter->buffer;
-    uint16_t *src = (uint16_t *)iter->bits;
-    __m128i ff000000 = mask_ff000000;
-
-    iter->bits += iter->stride;
-
-    while (w && ((uintptr_t)dst) & 0x0f)
-    {
-	uint16_t s = *src++;
-
-	*dst++ = convert_0565_to_8888 (s);
-	w--;
-    }
-
-    while (w >= 8)
-    {
-	__m128i lo, hi, s;
-
-	s = _mm_loadu_si128 ((__m128i *)src);
-
-	lo = unpack_565_to_8888 (_mm_unpacklo_epi16 (s, _mm_setzero_si128 ()));
-	hi = unpack_565_to_8888 (_mm_unpackhi_epi16 (s, _mm_setzero_si128 ()));
-
-	save_128_aligned ((__m128i *)(dst + 0), _mm_or_si128 (lo, ff000000));
-	save_128_aligned ((__m128i *)(dst + 4), _mm_or_si128 (hi, ff000000));
-
-	dst += 8;
-	src += 8;
-	w -= 8;
-    }
-
-    while (w)
-    {
-	uint16_t s = *src++;
-
-	*dst++ = convert_0565_to_8888 (s);
-	w--;
-    }
-
-    return iter->buffer;
-}
-
-static uint32_t *
-sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
-{
-    int w = iter->width;
-    uint32_t *dst = iter->buffer;
-    uint8_t *src = iter->bits;
-    __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6;
-
-    iter->bits += iter->stride;
-
-    while (w && (((uintptr_t)dst) & 15))
-    {
-        *dst++ = (uint32_t)(*(src++)) << 24;
-        w--;
-    }
-
-    while (w >= 16)
-    {
-	xmm0 = _mm_loadu_si128((__m128i *)src);
-
-	xmm1 = _mm_unpacklo_epi8  (_mm_setzero_si128(), xmm0);
-	xmm2 = _mm_unpackhi_epi8  (_mm_setzero_si128(), xmm0);
-	xmm3 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm1);
-	xmm4 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm1);
-	xmm5 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm2);
-	xmm6 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm2);
-
-	_mm_store_si128(((__m128i *)(dst +  0)), xmm3);
-	_mm_store_si128(((__m128i *)(dst +  4)), xmm4);
-	_mm_store_si128(((__m128i *)(dst +  8)), xmm5);
-	_mm_store_si128(((__m128i *)(dst + 12)), xmm6);
-
-	dst += 16;
-	src += 16;
-	w -= 16;
-    }
-
-    while (w)
-    {
-	*dst++ = (uint32_t)(*(src++)) << 24;
-	w--;
-    }
-
-    return iter->buffer;
-}
-
-#define IMAGE_FLAGS							\
-    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |		\
-     FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
-
-static const pixman_iter_info_t sse2_iters[] = 
-{
-    { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
-      _pixman_iter_init_bits_stride, sse2_fetch_x8r8g8b8, NULL
-    },
-    { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW,
-      _pixman_iter_init_bits_stride, sse2_fetch_r5g6b5, NULL
-    },
-    { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
-      _pixman_iter_init_bits_stride, sse2_fetch_a8, NULL
-    },
-    { PIXMAN_null },
-};
-
-#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
-__attribute__((__force_align_arg_pointer__))
-#endif
-pixman_implementation_t *
-_pixman_implementation_create_sse2 (pixman_implementation_t *fallback)
-{
-    pixman_implementation_t *imp = _pixman_implementation_create (fallback, sse2_fast_paths);
-
-    /* SSE2 constants */
-    mask_565_r  = create_mask_2x32_128 (0x00f80000, 0x00f80000);
-    mask_565_g1 = create_mask_2x32_128 (0x00070000, 0x00070000);
-    mask_565_g2 = create_mask_2x32_128 (0x000000e0, 0x000000e0);
-    mask_565_b  = create_mask_2x32_128 (0x0000001f, 0x0000001f);
-    mask_red   = create_mask_2x32_128 (0x00f80000, 0x00f80000);
-    mask_green = create_mask_2x32_128 (0x0000fc00, 0x0000fc00);
-    mask_blue  = create_mask_2x32_128 (0x000000f8, 0x000000f8);
-    mask_565_fix_rb = create_mask_2x32_128 (0x00e000e0, 0x00e000e0);
-    mask_565_fix_g = create_mask_2x32_128  (0x0000c000, 0x0000c000);
-    mask_0080 = create_mask_16_128 (0x0080);
-    mask_00ff = create_mask_16_128 (0x00ff);
-    mask_0101 = create_mask_16_128 (0x0101);
-    mask_ffff = create_mask_16_128 (0xffff);
-    mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000);
-    mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000);
-    mask_565_rb = create_mask_2x32_128 (0x00f800f8, 0x00f800f8);
-    mask_565_pack_multiplier = create_mask_2x32_128 (0x20000004, 0x20000004);
-
-    /* Set up function pointers */
-    imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u;
-    imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u;
-    imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u;
-    imp->combine_32[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_u;
-    imp->combine_32[PIXMAN_OP_OUT] = sse2_combine_out_u;
-    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_u;
-    imp->combine_32[PIXMAN_OP_ATOP] = sse2_combine_atop_u;
-    imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_u;
-    imp->combine_32[PIXMAN_OP_XOR] = sse2_combine_xor_u;
-    imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u;
-
-    imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u;
-
-    imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca;
-    imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca;
-    imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_IN] = sse2_combine_in_ca;
-    imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_OUT] = sse2_combine_out_ca;
-    imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_ATOP] = sse2_combine_atop_ca;
-    imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_XOR] = sse2_combine_xor_ca;
-    imp->combine_32_ca[PIXMAN_OP_ADD] = sse2_combine_add_ca;
-
-    imp->blt = sse2_blt;
-    imp->fill = sse2_fill;
-
-    imp->iter_info = sse2_iters;
-
-    return imp;
-}
diff --git a/vendor/pixman/pixman/pixman-ssse3.c b/vendor/pixman/pixman/pixman-ssse3.c
deleted file mode 100644
index 0359895af..000000000
--- a/vendor/pixman/pixman/pixman-ssse3.c
+++ /dev/null
@@ -1,351 +0,0 @@
-/*
- * Copyright © 2013 Soren Sandmann Pedersen
- * Copyright © 2013 Red Hat, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author: Soren Sandmann (soren.sandmann@gmail.com)
- */
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include <stdlib.h>
-#include <mmintrin.h>
-#include <xmmintrin.h>
-#include <emmintrin.h>
-#include <tmmintrin.h>
-#include "pixman-private.h"
-#include "pixman-inlines.h"
-
-typedef struct
-{
-    int		y;
-    uint64_t *	buffer;
-} line_t;
-
-typedef struct
-{
-    line_t		lines[2];
-    pixman_fixed_t	y;
-    pixman_fixed_t	x;
-    uint64_t		data[1];
-} bilinear_info_t;
-
-static void
-ssse3_fetch_horizontal (bits_image_t *image, line_t *line,
-			int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
-{
-    uint32_t *bits = image->bits + y * image->rowstride;
-    __m128i vx = _mm_set_epi16 (
-	- (x + 1), x, - (x + 1), x,
-	- (x + ux + 1), x + ux,  - (x + ux + 1), x + ux);
-    __m128i vux = _mm_set_epi16 (
-	- 2 * ux, 2 * ux, - 2 * ux, 2 * ux,
-	- 2 * ux, 2 * ux, - 2 * ux, 2 * ux);
-    __m128i vaddc = _mm_set_epi16 (1, 0, 1, 0, 1, 0, 1, 0);
-    __m128i *b = (__m128i *)line->buffer;
-    __m128i vrl0, vrl1;
-
-    while ((n -= 2) >= 0)
-    {
-	__m128i vw, vr, s;
-
-	vrl1 = _mm_loadl_epi64 (
-	    (__m128i *)(bits + pixman_fixed_to_int (x + ux)));
-	/* vrl1: R1, L1 */
-
-    final_pixel:
-	vrl0 = _mm_loadl_epi64 (
-	    (__m128i *)(bits + pixman_fixed_to_int (x)));
-	/* vrl0: R0, L0 */
-
-	/* The weights are based on vx which is a vector of 
-	 *
-	 *    - (x + 1), x, - (x + 1), x,
-	 *          - (x + ux + 1), x + ux, - (x + ux + 1), x + ux
-	 *
-	 * so the 16 bit weights end up like this:
-	 *
-	 *    iw0, w0, iw0, w0, iw1, w1, iw1, w1
-	 *
-	 * and after shifting and packing, we get these bytes:
-	 *
-	 *    iw0, w0, iw0, w0, iw1, w1, iw1, w1,
-	 *        iw0, w0, iw0, w0, iw1, w1, iw1, w1,
-	 *
-	 * which means the first and the second input pixel 
-	 * have to be interleaved like this:
-	 *
-	 *    la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
-	 *        lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
-	 *
-	 * before maddubsw can be used.
-	 */
-
-	vw = _mm_add_epi16 (
-	    vaddc, _mm_srli_epi16 (vx, 16 - BILINEAR_INTERPOLATION_BITS));
-	/* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1
-	 */
-
-	vw = _mm_packus_epi16 (vw, vw);
-	/* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1,
-	 *         iw0, w0, iw0, w0, iw1, w1, iw1, w1
-	 */
-	vx = _mm_add_epi16 (vx, vux);
-
-	x += 2 * ux;
-
-	vr = _mm_unpacklo_epi16 (vrl1, vrl0);
-	/* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */
-
-	s = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (1, 0, 3, 2));
-	/* s:  lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */
-
-	vr = _mm_unpackhi_epi8 (vr, s);
-	/* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
-	 *         lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
-	 */
-
-	vr = _mm_maddubs_epi16 (vr, vw);
-
-	/* When the weight is 0, the inverse weight is
-	 * 128 which can't be represented in a signed byte.
-	 * As a result maddubsw computes the following:
-	 *
-	 *     r = l * -128 + r * 0
-	 *
-	 * rather than the desired
-	 *
-	 *     r = l * 128 + r * 0
-	 *
-	 * We fix this by taking the absolute value of the
-	 * result.
-	 */
-	vr = _mm_abs_epi16 (vr);
-
-	/* vr: A0, R0, A1, R1, G0, B0, G1, B1 */
-	_mm_store_si128 (b++, vr);
-    }
-
-    if (n == -1)
-    {
-	vrl1 = _mm_setzero_si128();
-	goto final_pixel;
-    }
-
-    line->y = y;
-}
-
-static uint32_t *
-ssse3_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask)
-{
-    pixman_fixed_t fx, ux;
-    bilinear_info_t *info = iter->data;
-    line_t *line0, *line1;
-    int y0, y1;
-    int32_t dist_y;
-    __m128i vw;
-    int i;
-
-    fx = info->x;
-    ux = iter->image->common.transform->matrix[0][0];
-
-    y0 = pixman_fixed_to_int (info->y);
-    y1 = y0 + 1;
-
-    line0 = &info->lines[y0 & 0x01];
-    line1 = &info->lines[y1 & 0x01];
-
-    if (line0->y != y0)
-    {
-	ssse3_fetch_horizontal (
-	    &iter->image->bits, line0, y0, fx, ux, iter->width);
-    }
-
-    if (line1->y != y1)
-    {
-	ssse3_fetch_horizontal (
-	    &iter->image->bits, line1, y1, fx, ux, iter->width);
-    }
-
-    dist_y = pixman_fixed_to_bilinear_weight (info->y);
-    dist_y <<= (16 - BILINEAR_INTERPOLATION_BITS);
-
-    vw = _mm_set_epi16 (
-	dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y);
-
-    for (i = 0; i + 3 < iter->width; i += 4)
-    {
-	__m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i));
-	__m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i));
-	__m128i top1 = _mm_load_si128 ((__m128i *)(line0->buffer + i + 2));
-	__m128i bot1 = _mm_load_si128 ((__m128i *)(line1->buffer + i + 2));
-	__m128i r0, r1, tmp, p;
-
-	r0 = _mm_mulhi_epu16 (
-	    _mm_sub_epi16 (bot0, top0), vw);
-	tmp = _mm_cmplt_epi16 (bot0, top0);
-	tmp = _mm_and_si128 (tmp, vw);
-	r0 = _mm_sub_epi16 (r0, tmp);
-	r0 = _mm_add_epi16 (r0, top0);
-	r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS);
-	/* r0:  A0 R0 A1 R1 G0 B0 G1 B1 */
-	r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1));
-	/* r0:  A1 R1 G1 B1 A0 R0 G0 B0 */
-
-	r1 = _mm_mulhi_epu16 (
-	    _mm_sub_epi16 (bot1, top1), vw);
-	tmp = _mm_cmplt_epi16 (bot1, top1);
-	tmp = _mm_and_si128 (tmp, vw);
-	r1 = _mm_sub_epi16 (r1, tmp);
-	r1 = _mm_add_epi16 (r1, top1);
-	r1 = _mm_srli_epi16 (r1, BILINEAR_INTERPOLATION_BITS);
-	r1 = _mm_shuffle_epi32 (r1, _MM_SHUFFLE (2, 0, 3, 1));
-	/* r1: A3 R3 G3 B3 A2 R2 G2 B2 */
-
-	p = _mm_packus_epi16 (r0, r1);
-
-	_mm_storeu_si128 ((__m128i *)(iter->buffer + i), p);
-    }
-
-    while (i < iter->width)
-    {
-	__m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i));
-	__m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i));
-	__m128i r0, tmp, p;
-
-	r0 = _mm_mulhi_epu16 (
-	    _mm_sub_epi16 (bot0, top0), vw);
-	tmp = _mm_cmplt_epi16 (bot0, top0);
-	tmp = _mm_and_si128 (tmp, vw);
-	r0 = _mm_sub_epi16 (r0, tmp);
-	r0 = _mm_add_epi16 (r0, top0);
-	r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS);
-	/* r0:  A0 R0 A1 R1 G0 B0 G1 B1 */
-	r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1));
-	/* r0:  A1 R1 G1 B1 A0 R0 G0 B0 */
-
-	p = _mm_packus_epi16 (r0, r0);
-
-	if (iter->width - i == 1)
-	{
-	    *(uint32_t *)(iter->buffer + i) = _mm_cvtsi128_si32 (p);
-	    i++;
-	}
-	else
-	{
-	    _mm_storel_epi64 ((__m128i *)(iter->buffer + i), p);
-	    i += 2;
-	}
-    }
-    
-    info->y += iter->image->common.transform->matrix[1][1];
-
-    return iter->buffer;
-}
-
-static void
-ssse3_bilinear_cover_iter_fini (pixman_iter_t *iter)
-{
-    free (iter->data);
-}
-
-static void
-ssse3_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info)
-{
-    int width = iter->width;
-    bilinear_info_t *info;
-    pixman_vector_t v;
-
-    /* Reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2;
-    v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2;
-    v.vector[2] = pixman_fixed_1;
-
-    if (!pixman_transform_point_3d (iter->image->common.transform, &v))
-	goto fail;
-
-    info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t) + 64);
-    if (!info)
-	goto fail;
-
-    info->x = v.vector[0] - pixman_fixed_1 / 2;
-    info->y = v.vector[1] - pixman_fixed_1 / 2;
-
-#define ALIGN(addr)							\
-    ((void *)((((uintptr_t)(addr)) + 15) & (~15)))
-
-    /* It is safe to set the y coordinates to -1 initially
-     * because COVER_CLIP_BILINEAR ensures that we will only
-     * be asked to fetch lines in the [0, height) interval
-     */
-    info->lines[0].y = -1;
-    info->lines[0].buffer = ALIGN (&(info->data[0]));
-    info->lines[1].y = -1;
-    info->lines[1].buffer = ALIGN (info->lines[0].buffer + width);
-
-    iter->get_scanline = ssse3_fetch_bilinear_cover;
-    iter->fini = ssse3_bilinear_cover_iter_fini;
-
-    iter->data = info;
-    return;
-
-fail:
-    /* Something went wrong, either a bad matrix or OOM; in such cases,
-     * we don't guarantee any particular rendering.
-     */
-    _pixman_log_error (
-	FUNC, "Allocation failure or bad matrix, skipping rendering\n");
-    
-    iter->get_scanline = _pixman_iter_get_scanline_noop;
-    iter->fini = NULL;
-}
-
-static const pixman_iter_info_t ssse3_iters[] = 
-{
-    { PIXMAN_a8r8g8b8,
-      (FAST_PATH_STANDARD_FLAGS			|
-       FAST_PATH_SCALE_TRANSFORM		|
-       FAST_PATH_BILINEAR_FILTER		|
-       FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR),
-      ITER_NARROW | ITER_SRC,
-      ssse3_bilinear_cover_iter_init,
-      NULL, NULL
-    },
-
-    { PIXMAN_null },
-};
-
-static const pixman_fast_path_t ssse3_fast_paths[] =
-{
-    { PIXMAN_OP_NONE },
-};
-
-pixman_implementation_t *
-_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback)
-{
-    pixman_implementation_t *imp =
-	_pixman_implementation_create (fallback, ssse3_fast_paths);
-
-    imp->iter_info = ssse3_iters;
-
-    return imp;
-}
diff --git a/vendor/pixman/pixman/pixman-timer.c b/vendor/pixman/pixman/pixman-timer.c
deleted file mode 100644
index 656d90017..000000000
--- a/vendor/pixman/pixman/pixman-timer.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Red Hat not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  Red Hat makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * RED HAT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL RED HAT
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include <stdlib.h>
-#include <stdio.h>
-#include "pixman-private.h"
-
-#ifdef PIXMAN_TIMERS
-
-static pixman_timer_t *timers;
-
-static void
-dump_timers (void)
-{
-    pixman_timer_t *timer;
-
-    for (timer = timers; timer != NULL; timer = timer->next)
-    {
-	printf ("%s:   total: %llu     n: %llu      avg: %f\n",
-	        timer->name,
-	        timer->total,
-	        timer->n_times,
-	        timer->total / (double)timer->n_times);
-    }
-}
-
-void
-pixman_timer_register (pixman_timer_t *timer)
-{
-    static int initialized;
-
-    int atexit (void (*function)(void));
-
-    if (!initialized)
-    {
-	atexit (dump_timers);
-	initialized = 1;
-    }
-
-    timer->next = timers;
-    timers = timer;
-}
-
-#endif
diff --git a/vendor/pixman/pixman/pixman-trap.c b/vendor/pixman/pixman/pixman-trap.c
deleted file mode 100644
index 0ec73dc65..000000000
--- a/vendor/pixman/pixman/pixman-trap.c
+++ /dev/null
@@ -1,711 +0,0 @@
-/*
- * Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc.
- * Copyright © 2004 Keith Packard
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Keith Packard makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
- * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
- * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "pixman-private.h"
-
-/*
- * Compute the smallest value greater than or equal to y which is on a
- * grid row.
- */
-
-PIXMAN_EXPORT pixman_fixed_t
-pixman_sample_ceil_y (pixman_fixed_t y, int n)
-{
-    pixman_fixed_t f = pixman_fixed_frac (y);
-    pixman_fixed_t i = pixman_fixed_floor (y);
-
-    f = DIV (f - Y_FRAC_FIRST (n) + (STEP_Y_SMALL (n) - pixman_fixed_e), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) +
-	Y_FRAC_FIRST (n);
-    
-    if (f > Y_FRAC_LAST (n))
-    {
-	if (pixman_fixed_to_int (i) == 0x7fff)
-	{
-	    f = 0xffff; /* saturate */
-	}
-	else
-	{
-	    f = Y_FRAC_FIRST (n);
-	    i += pixman_fixed_1;
-	}
-    }
-    return (i | f);
-}
-
-/*
- * Compute the largest value strictly less than y which is on a
- * grid row.
- */
-PIXMAN_EXPORT pixman_fixed_t
-pixman_sample_floor_y (pixman_fixed_t y,
-                       int            n)
-{
-    pixman_fixed_t f = pixman_fixed_frac (y);
-    pixman_fixed_t i = pixman_fixed_floor (y);
-
-    f = DIV (f - pixman_fixed_e - Y_FRAC_FIRST (n), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) +
-	Y_FRAC_FIRST (n);
-
-    if (f < Y_FRAC_FIRST (n))
-    {
-	if (pixman_fixed_to_int (i) == 0xffff8000)
-	{
-	    f = 0; /* saturate */
-	}
-	else
-	{
-	    f = Y_FRAC_LAST (n);
-	    i -= pixman_fixed_1;
-	}
-    }
-    return (i | f);
-}
-
-/*
- * Step an edge by any amount (including negative values)
- */
-PIXMAN_EXPORT void
-pixman_edge_step (pixman_edge_t *e,
-                  int            n)
-{
-    pixman_fixed_48_16_t ne;
-
-    e->x += n * e->stepx;
-
-    ne = e->e + n * (pixman_fixed_48_16_t) e->dx;
-
-    if (n >= 0)
-    {
-	if (ne > 0)
-	{
-	    int nx = (ne + e->dy - 1) / e->dy;
-	    e->e = ne - nx * (pixman_fixed_48_16_t) e->dy;
-	    e->x += nx * e->signdx;
-	}
-    }
-    else
-    {
-	if (ne <= -e->dy)
-	{
-	    int nx = (-ne) / e->dy;
-	    e->e = ne + nx * (pixman_fixed_48_16_t) e->dy;
-	    e->x -= nx * e->signdx;
-	}
-    }
-}
-
-/*
- * A private routine to initialize the multi-step
- * elements of an edge structure
- */
-static void
-_pixman_edge_multi_init (pixman_edge_t * e,
-                         int             n,
-                         pixman_fixed_t *stepx_p,
-                         pixman_fixed_t *dx_p)
-{
-    pixman_fixed_t stepx;
-    pixman_fixed_48_16_t ne;
-
-    ne = n * (pixman_fixed_48_16_t) e->dx;
-    stepx = n * e->stepx;
-
-    if (ne > 0)
-    {
-	int nx = ne / e->dy;
-	ne -= nx * (pixman_fixed_48_16_t)e->dy;
-	stepx += nx * e->signdx;
-    }
-
-    *dx_p = ne;
-    *stepx_p = stepx;
-}
-
-/*
- * Initialize one edge structure given the line endpoints and a
- * starting y value
- */
-PIXMAN_EXPORT void
-pixman_edge_init (pixman_edge_t *e,
-                  int            n,
-                  pixman_fixed_t y_start,
-                  pixman_fixed_t x_top,
-                  pixman_fixed_t y_top,
-                  pixman_fixed_t x_bot,
-                  pixman_fixed_t y_bot)
-{
-    pixman_fixed_t dx, dy;
-
-    e->x = x_top;
-    e->e = 0;
-    dx = x_bot - x_top;
-    dy = y_bot - y_top;
-    e->dy = dy;
-    e->dx = 0;
-
-    if (dy)
-    {
-	if (dx >= 0)
-	{
-	    e->signdx = 1;
-	    e->stepx = dx / dy;
-	    e->dx = dx % dy;
-	    e->e = -dy;
-	}
-	else
-	{
-	    e->signdx = -1;
-	    e->stepx = -(-dx / dy);
-	    e->dx = -dx % dy;
-	    e->e = 0;
-	}
-
-	_pixman_edge_multi_init (e, STEP_Y_SMALL (n),
-				 &e->stepx_small, &e->dx_small);
-
-	_pixman_edge_multi_init (e, STEP_Y_BIG (n),
-				 &e->stepx_big, &e->dx_big);
-    }
-    pixman_edge_step (e, y_start - y_top);
-}
-
-/*
- * Initialize one edge structure given a line, starting y value
- * and a pixel offset for the line
- */
-PIXMAN_EXPORT void
-pixman_line_fixed_edge_init (pixman_edge_t *            e,
-                             int                        n,
-                             pixman_fixed_t             y,
-                             const pixman_line_fixed_t *line,
-                             int                        x_off,
-                             int                        y_off)
-{
-    pixman_fixed_t x_off_fixed = pixman_int_to_fixed (x_off);
-    pixman_fixed_t y_off_fixed = pixman_int_to_fixed (y_off);
-    const pixman_point_fixed_t *top, *bot;
-
-    if (line->p1.y <= line->p2.y)
-    {
-	top = &line->p1;
-	bot = &line->p2;
-    }
-    else
-    {
-	top = &line->p2;
-	bot = &line->p1;
-    }
-    
-    pixman_edge_init (e, n, y,
-                      top->x + x_off_fixed,
-                      top->y + y_off_fixed,
-                      bot->x + x_off_fixed,
-                      bot->y + y_off_fixed);
-}
-
-PIXMAN_EXPORT void
-pixman_add_traps (pixman_image_t *     image,
-                  int16_t              x_off,
-                  int16_t              y_off,
-                  int                  ntrap,
-                  const pixman_trap_t *traps)
-{
-    int bpp;
-    int height;
-
-    pixman_fixed_t x_off_fixed;
-    pixman_fixed_t y_off_fixed;
-    pixman_edge_t l, r;
-    pixman_fixed_t t, b;
-
-    _pixman_image_validate (image);
-    
-    height = image->bits.height;
-    bpp = PIXMAN_FORMAT_BPP (image->bits.format);
-
-    x_off_fixed = pixman_int_to_fixed (x_off);
-    y_off_fixed = pixman_int_to_fixed (y_off);
-
-    while (ntrap--)
-    {
-	t = traps->top.y + y_off_fixed;
-	if (t < 0)
-	    t = 0;
-	t = pixman_sample_ceil_y (t, bpp);
-
-	b = traps->bot.y + y_off_fixed;
-	if (pixman_fixed_to_int (b) >= height)
-	    b = pixman_int_to_fixed (height) - 1;
-	b = pixman_sample_floor_y (b, bpp);
-
-	if (b >= t)
-	{
-	    /* initialize edge walkers */
-	    pixman_edge_init (&l, bpp, t,
-	                      traps->top.l + x_off_fixed,
-	                      traps->top.y + y_off_fixed,
-	                      traps->bot.l + x_off_fixed,
-	                      traps->bot.y + y_off_fixed);
-
-	    pixman_edge_init (&r, bpp, t,
-	                      traps->top.r + x_off_fixed,
-	                      traps->top.y + y_off_fixed,
-	                      traps->bot.r + x_off_fixed,
-	                      traps->bot.y + y_off_fixed);
-
-	    pixman_rasterize_edges (image, &l, &r, t, b);
-	}
-
-	traps++;
-    }
-}
-
-#if 0
-static void
-dump_image (pixman_image_t *image,
-            const char *    title)
-{
-    int i, j;
-
-    if (!image->type == BITS)
-	printf ("%s is not a regular image\n", title);
-
-    if (!image->bits.format == PIXMAN_a8)
-	printf ("%s is not an alpha mask\n", title);
-
-    printf ("\n\n\n%s: \n", title);
-
-    for (i = 0; i < image->bits.height; ++i)
-    {
-	uint8_t *line =
-	    (uint8_t *)&(image->bits.bits[i * image->bits.rowstride]);
-
-	for (j = 0; j < image->bits.width; ++j)
-	    printf ("%c", line[j] ? '#' : ' ');
-
-	printf ("\n");
-    }
-}
-#endif
-
-PIXMAN_EXPORT void
-pixman_add_trapezoids (pixman_image_t *          image,
-                       int16_t                   x_off,
-                       int                       y_off,
-                       int                       ntraps,
-                       const pixman_trapezoid_t *traps)
-{
-    int i;
-
-#if 0
-    dump_image (image, "before");
-#endif
-
-    for (i = 0; i < ntraps; ++i)
-    {
-	const pixman_trapezoid_t *trap = &(traps[i]);
-
-	if (!pixman_trapezoid_valid (trap))
-	    continue;
-
-	pixman_rasterize_trapezoid (image, trap, x_off, y_off);
-    }
-
-#if 0
-    dump_image (image, "after");
-#endif
-}
-
-PIXMAN_EXPORT void
-pixman_rasterize_trapezoid (pixman_image_t *          image,
-                            const pixman_trapezoid_t *trap,
-                            int                       x_off,
-                            int                       y_off)
-{
-    int bpp;
-    int height;
-
-    pixman_fixed_t y_off_fixed;
-    pixman_edge_t l, r;
-    pixman_fixed_t t, b;
-
-    return_if_fail (image->type == BITS);
-
-    _pixman_image_validate (image);
-    
-    if (!pixman_trapezoid_valid (trap))
-	return;
-
-    height = image->bits.height;
-    bpp = PIXMAN_FORMAT_BPP (image->bits.format);
-
-    y_off_fixed = pixman_int_to_fixed (y_off);
-
-    t = trap->top + y_off_fixed;
-    if (t < 0)
-	t = 0;
-    t = pixman_sample_ceil_y (t, bpp);
-
-    b = trap->bottom + y_off_fixed;
-    if (pixman_fixed_to_int (b) >= height)
-	b = pixman_int_to_fixed (height) - 1;
-    b = pixman_sample_floor_y (b, bpp);
-    
-    if (b >= t)
-    {
-	/* initialize edge walkers */
-	pixman_line_fixed_edge_init (&l, bpp, t, &trap->left, x_off, y_off);
-	pixman_line_fixed_edge_init (&r, bpp, t, &trap->right, x_off, y_off);
-
-	pixman_rasterize_edges (image, &l, &r, t, b);
-    }
-}
-
-static const pixman_bool_t zero_src_has_no_effect[PIXMAN_N_OPERATORS] =
-{
-    FALSE,	/* Clear		0			0    */
-    FALSE,	/* Src			1			0    */
-    TRUE,	/* Dst			0			1    */
-    TRUE,	/* Over			1			1-Aa */
-    TRUE,	/* OverReverse		1-Ab			1    */
-    FALSE,	/* In			Ab			0    */
-    FALSE,	/* InReverse		0			Aa   */
-    FALSE,	/* Out			1-Ab			0    */
-    TRUE,	/* OutReverse		0			1-Aa */
-    TRUE,	/* Atop			Ab			1-Aa */
-    FALSE,	/* AtopReverse		1-Ab			Aa   */
-    TRUE,	/* Xor			1-Ab			1-Aa */
-    TRUE,	/* Add			1			1    */
-};
-
-static pixman_bool_t
-get_trap_extents (pixman_op_t op, pixman_image_t *dest,
-		  const pixman_trapezoid_t *traps, int n_traps,
-		  pixman_box32_t *box)
-{
-    int i;
-
-    /* When the operator is such that a zero source has an
-     * effect on the underlying image, we have to
-     * composite across the entire destination
-     */
-    if (!zero_src_has_no_effect [op])
-    {
-	box->x1 = 0;
-	box->y1 = 0;
-	box->x2 = dest->bits.width;
-	box->y2 = dest->bits.height;
-	return TRUE;
-    }
-    
-    box->x1 = INT32_MAX;
-    box->y1 = INT32_MAX;
-    box->x2 = INT32_MIN;
-    box->y2 = INT32_MIN;
-	
-    for (i = 0; i < n_traps; ++i)
-    {
-	const pixman_trapezoid_t *trap = &(traps[i]);
-	int y1, y2;
-	    
-	if (!pixman_trapezoid_valid (trap))
-	    continue;
-	    
-	y1 = pixman_fixed_to_int (trap->top);
-	if (y1 < box->y1)
-	    box->y1 = y1;
-	    
-	y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap->bottom));
-	if (y2 > box->y2)
-	    box->y2 = y2;
-	    
-#define EXTEND_MIN(x)							\
-	if (pixman_fixed_to_int ((x)) < box->x1)			\
-	    box->x1 = pixman_fixed_to_int ((x));
-#define EXTEND_MAX(x)							\
-	if (pixman_fixed_to_int (pixman_fixed_ceil ((x))) > box->x2)	\
-	    box->x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x)));
-	    
-#define EXTEND(x)							\
-	EXTEND_MIN(x);							\
-	EXTEND_MAX(x);
-	    
-	EXTEND(trap->left.p1.x);
-	EXTEND(trap->left.p2.x);
-	EXTEND(trap->right.p1.x);
-	EXTEND(trap->right.p2.x);
-    }
-	
-    if (box->x1 >= box->x2 || box->y1 >= box->y2)
-	return FALSE;
-
-    return TRUE;
-}
-
-/*
- * pixman_composite_trapezoids()
- *
- * All the trapezoids are conceptually rendered to an infinitely big image.
- * The (0, 0) coordinates of this image are then aligned with the (x, y)
- * coordinates of the source image, and then both images are aligned with
- * the (x, y) coordinates of the destination. Then these three images are
- * composited across the entire destination.
- */
-PIXMAN_EXPORT void
-pixman_composite_trapezoids (pixman_op_t		op,
-			     pixman_image_t *		src,
-			     pixman_image_t *		dst,
-			     pixman_format_code_t	mask_format,
-			     int			x_src,
-			     int			y_src,
-			     int			x_dst,
-			     int			y_dst,
-			     int			n_traps,
-			     const pixman_trapezoid_t *	traps)
-{
-    int i;
-
-    return_if_fail (PIXMAN_FORMAT_TYPE (mask_format) == PIXMAN_TYPE_A);
-    
-    if (n_traps <= 0)
-	return;
-
-    _pixman_image_validate (src);
-    _pixman_image_validate (dst);
-
-    if (op == PIXMAN_OP_ADD &&
-	(src->common.flags & FAST_PATH_IS_OPAQUE)		&&
-	(mask_format == dst->common.extended_format_code)	&&
-	!(dst->common.have_clip_region))
-    {
-	for (i = 0; i < n_traps; ++i)
-	{
-	    const pixman_trapezoid_t *trap = &(traps[i]);
-	    
-	    if (!pixman_trapezoid_valid (trap))
-		continue;
-	    
-	    pixman_rasterize_trapezoid (dst, trap, x_dst, y_dst);
-	}
-    }
-    else
-    {
-	pixman_image_t *tmp;
-	pixman_box32_t box;
-	int i;
-
-	if (!get_trap_extents (op, dst, traps, n_traps, &box))
-	    return;
-	
-	if (!(tmp = pixman_image_create_bits (
-		  mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1)))
-	    return;
-	
-	for (i = 0; i < n_traps; ++i)
-	{
-	    const pixman_trapezoid_t *trap = &(traps[i]);
-	    
-	    if (!pixman_trapezoid_valid (trap))
-		continue;
-	    
-	    pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1);
-	}
-	
-	pixman_image_composite (op, src, tmp, dst,
-				x_src + box.x1, y_src + box.y1,
-				0, 0,
-				x_dst + box.x1, y_dst + box.y1,
-				box.x2 - box.x1, box.y2 - box.y1);
-	
-	pixman_image_unref (tmp);
-    }
-}
-
-static int
-greater_y (const pixman_point_fixed_t *a, const pixman_point_fixed_t *b)
-{
-    if (a->y == b->y)
-	return a->x > b->x;
-    return a->y > b->y;
-}
-
-/*
- * Note that the definition of this function is a bit odd because
- * of the X coordinate space (y increasing downwards).
- */
-static int
-clockwise (const pixman_point_fixed_t *ref,
-	   const pixman_point_fixed_t *a,
-	   const pixman_point_fixed_t *b)
-{
-    pixman_point_fixed_t	ad, bd;
-
-    ad.x = a->x - ref->x;
-    ad.y = a->y - ref->y;
-    bd.x = b->x - ref->x;
-    bd.y = b->y - ref->y;
-
-    return ((pixman_fixed_32_32_t) bd.y * ad.x -
-	    (pixman_fixed_32_32_t) ad.y * bd.x) < 0;
-}
-
-static void
-triangle_to_trapezoids (const pixman_triangle_t *tri, pixman_trapezoid_t *traps)
-{
-    const pixman_point_fixed_t *top, *left, *right, *tmp;
-
-    top = &tri->p1;
-    left = &tri->p2;
-    right = &tri->p3;
-
-    if (greater_y (top, left))
-    {
-	tmp = left;
-	left = top;
-	top = tmp;
-    }
-
-    if (greater_y (top, right))
-    {
-	tmp = right;
-	right = top;
-	top = tmp;
-    }
-
-    if (clockwise (top, right, left))
-    {
-	tmp = right;
-	right = left;
-	left = tmp;
-    }
-    
-    /*
-     * Two cases:
-     *
-     *		+		+
-     *	       / \             / \
-     *	      /   \           /	  \
-     *	     /     +         +	   \
-     *      /    --           --    \
-     *     /   --               --   \
-     *    / ---                   --- \
-     *	 +--                         --+
-     */
-
-    traps->top = top->y;
-    traps->left.p1 = *top;
-    traps->left.p2 = *left;
-    traps->right.p1 = *top;
-    traps->right.p2 = *right;
-
-    if (right->y < left->y)
-	traps->bottom = right->y;
-    else
-	traps->bottom = left->y;
-
-    traps++;
-
-    *traps = *(traps - 1);
-    
-    if (right->y < left->y)
-    {
-	traps->top = right->y;
-	traps->bottom = left->y;
-	traps->right.p1 = *right;
-	traps->right.p2 = *left;
-    }
-    else
-    {
-	traps->top = left->y;
-	traps->bottom = right->y;
-	traps->left.p1 = *left;
-	traps->left.p2 = *right;
-    }
-}
-
-static pixman_trapezoid_t *
-convert_triangles (int n_tris, const pixman_triangle_t *tris)
-{
-    pixman_trapezoid_t *traps;
-    int i;
-
-    if (n_tris <= 0)
-	return NULL;
-    
-    traps = pixman_malloc_ab (n_tris, 2 * sizeof (pixman_trapezoid_t));
-    if (!traps)
-	return NULL;
-
-    for (i = 0; i < n_tris; ++i)
-	triangle_to_trapezoids (&(tris[i]), traps + 2 * i);
-
-    return traps;
-}
-
-PIXMAN_EXPORT void
-pixman_composite_triangles (pixman_op_t			op,
-			    pixman_image_t *		src,
-			    pixman_image_t *		dst,
-			    pixman_format_code_t	mask_format,
-			    int				x_src,
-			    int				y_src,
-			    int				x_dst,
-			    int				y_dst,
-			    int				n_tris,
-			    const pixman_triangle_t *	tris)
-{
-    pixman_trapezoid_t *traps;
-
-    if ((traps = convert_triangles (n_tris, tris)))
-    {
-	pixman_composite_trapezoids (op, src, dst, mask_format,
-				     x_src, y_src, x_dst, y_dst,
-				     n_tris * 2, traps);
-	
-	free (traps);
-    }
-}
-
-PIXMAN_EXPORT void
-pixman_add_triangles (pixman_image_t          *image,
-		      int32_t	               x_off,
-		      int32_t	               y_off,
-		      int	               n_tris,
-		      const pixman_triangle_t *tris)
-{
-    pixman_trapezoid_t *traps;
-
-    if ((traps = convert_triangles (n_tris, tris)))
-    {
-	pixman_add_trapezoids (image, x_off, y_off,
-			       n_tris * 2, traps);
-
-	free (traps);
-    }
-}
diff --git a/vendor/pixman/pixman/pixman-utils.c b/vendor/pixman/pixman/pixman-utils.c
deleted file mode 100644
index 8c57b0bfa..000000000
--- a/vendor/pixman/pixman/pixman-utils.c
+++ /dev/null
@@ -1,330 +0,0 @@
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 1999 Keith Packard
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  SuSE makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Author:  Keith Packard, SuSE, Inc.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "pixman-private.h"
-
-pixman_bool_t
-_pixman_multiply_overflows_size (size_t a, size_t b)
-{
-    return a >= SIZE_MAX / b;
-}
-
-pixman_bool_t
-_pixman_multiply_overflows_int (unsigned int a, unsigned int b)
-{
-    return a >= INT32_MAX / b;
-}
-
-pixman_bool_t
-_pixman_addition_overflows_int (unsigned int a, unsigned int b)
-{
-    return a > INT32_MAX - b;
-}
-
-void *
-pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c)
-{
-    if (!b || a >= INT32_MAX / b || (a * b) > INT32_MAX - c)
-	return NULL;
-
-    return malloc (a * b + c);
-}
-
-void *
-pixman_malloc_ab (unsigned int a,
-                  unsigned int b)
-{
-    if (a >= INT32_MAX / b)
-	return NULL;
-
-    return malloc (a * b);
-}
-
-void *
-pixman_malloc_abc (unsigned int a,
-                   unsigned int b,
-                   unsigned int c)
-{
-    if (a >= INT32_MAX / b)
-	return NULL;
-    else if (a * b >= INT32_MAX / c)
-	return NULL;
-    else
-	return malloc (a * b * c);
-}
-
-static force_inline uint16_t
-float_to_unorm (float f, int n_bits)
-{
-    uint32_t u;
-
-    if (f > 1.0)
-	f = 1.0;
-    if (f < 0.0)
-	f = 0.0;
-
-    u = f * (1 << n_bits);
-    u -= (u >> n_bits);
-
-    return u;
-}
-
-static force_inline float
-unorm_to_float (uint16_t u, int n_bits)
-{
-    uint32_t m = ((1 << n_bits) - 1);
-
-    return (u & m) * (1.f / (float)m);
-}
-
-/*
- * This function expands images from a8r8g8b8 to argb_t.  To preserve
- * precision, it needs to know from which source format the a8r8g8b8 pixels
- * originally came.
- *
- * For example, if the source was PIXMAN_x1r5g5b5 and the red component
- * contained bits 12345, then the 8-bit value is 12345123.  To correctly
- * expand this to floating point, it should be 12345 / 31.0 and not
- * 12345123 / 255.0.
- */
-void
-pixman_expand_to_float (argb_t               *dst,
-			const uint32_t       *src,
-			pixman_format_code_t  format,
-			int                   width)
-{
-    static const float multipliers[16] = {
-	0.0f,
-	1.0f / ((1 <<  1) - 1),
-	1.0f / ((1 <<  2) - 1),
-	1.0f / ((1 <<  3) - 1),
-	1.0f / ((1 <<  4) - 1),
-	1.0f / ((1 <<  5) - 1),
-	1.0f / ((1 <<  6) - 1),
-	1.0f / ((1 <<  7) - 1),
-	1.0f / ((1 <<  8) - 1),
-	1.0f / ((1 <<  9) - 1),
-	1.0f / ((1 << 10) - 1),
-	1.0f / ((1 << 11) - 1),
-	1.0f / ((1 << 12) - 1),
-	1.0f / ((1 << 13) - 1),
-	1.0f / ((1 << 14) - 1),
-	1.0f / ((1 << 15) - 1),
-    };
-    int a_size, r_size, g_size, b_size;
-    int a_shift, r_shift, g_shift, b_shift;
-    float a_mul, r_mul, g_mul, b_mul;
-    uint32_t a_mask, r_mask, g_mask, b_mask;
-    int i;
-
-    if (!PIXMAN_FORMAT_VIS (format))
-	format = PIXMAN_a8r8g8b8;
-
-    /*
-     * Determine the sizes of each component and the masks and shifts
-     * required to extract them from the source pixel.
-     */
-    a_size = PIXMAN_FORMAT_A (format);
-    r_size = PIXMAN_FORMAT_R (format);
-    g_size = PIXMAN_FORMAT_G (format);
-    b_size = PIXMAN_FORMAT_B (format);
-
-    a_shift = 32 - a_size;
-    r_shift = 24 - r_size;
-    g_shift = 16 - g_size;
-    b_shift =  8 - b_size;
-
-    a_mask = ((1 << a_size) - 1);
-    r_mask = ((1 << r_size) - 1);
-    g_mask = ((1 << g_size) - 1);
-    b_mask = ((1 << b_size) - 1);
-
-    a_mul = multipliers[a_size];
-    r_mul = multipliers[r_size];
-    g_mul = multipliers[g_size];
-    b_mul = multipliers[b_size];
-
-    /* Start at the end so that we can do the expansion in place
-     * when src == dst
-     */
-    for (i = width - 1; i >= 0; i--)
-    {
-	const uint32_t pixel = src[i];
-
-	dst[i].a = a_mask? ((pixel >> a_shift) & a_mask) * a_mul : 1.0f;
-	dst[i].r = ((pixel >> r_shift) & r_mask) * r_mul;
-	dst[i].g = ((pixel >> g_shift) & g_mask) * g_mul;
-	dst[i].b = ((pixel >> b_shift) & b_mask) * b_mul;
-    }
-}
-
-uint16_t
-pixman_float_to_unorm (float f, int n_bits)
-{
-    return float_to_unorm (f, n_bits);
-}
-
-float
-pixman_unorm_to_float (uint16_t u, int n_bits)
-{
-    return unorm_to_float (u, n_bits);
-}
-
-void
-pixman_contract_from_float (uint32_t     *dst,
-			    const argb_t *src,
-			    int           width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i)
-    {
-	uint32_t a, r, g, b;
-
-	a = float_to_unorm (src[i].a, 8);
-	r = float_to_unorm (src[i].r, 8);
-	g = float_to_unorm (src[i].g, 8);
-	b = float_to_unorm (src[i].b, 8);
-
-	dst[i] = (a << 24) | (r << 16) | (g << 8) | (b << 0);
-    }
-}
-
-uint32_t *
-_pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask)
-{
-    return iter->buffer;
-}
-
-void
-_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info)
-{
-    pixman_image_t *image = iter->image;
-    uint8_t *b = (uint8_t *)image->bits.bits;
-    int s = image->bits.rowstride * 4;
-
-    iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (info->format) / 8;
-    iter->stride = s;
-}
-
-#define N_TMP_BOXES (16)
-
-pixman_bool_t
-pixman_region16_copy_from_region32 (pixman_region16_t *dst,
-                                    pixman_region32_t *src)
-{
-    int n_boxes, i;
-    pixman_box32_t *boxes32;
-    pixman_box16_t *boxes16;
-    pixman_bool_t retval;
-
-    boxes32 = pixman_region32_rectangles (src, &n_boxes);
-
-    boxes16 = pixman_malloc_ab (n_boxes, sizeof (pixman_box16_t));
-
-    if (!boxes16)
-	return FALSE;
-
-    for (i = 0; i < n_boxes; ++i)
-    {
-	boxes16[i].x1 = boxes32[i].x1;
-	boxes16[i].y1 = boxes32[i].y1;
-	boxes16[i].x2 = boxes32[i].x2;
-	boxes16[i].y2 = boxes32[i].y2;
-    }
-
-    pixman_region_fini (dst);
-    retval = pixman_region_init_rects (dst, boxes16, n_boxes);
-    free (boxes16);
-    return retval;
-}
-
-pixman_bool_t
-pixman_region32_copy_from_region16 (pixman_region32_t *dst,
-                                    pixman_region16_t *src)
-{
-    int n_boxes, i;
-    pixman_box16_t *boxes16;
-    pixman_box32_t *boxes32;
-    pixman_box32_t tmp_boxes[N_TMP_BOXES];
-    pixman_bool_t retval;
-
-    boxes16 = pixman_region_rectangles (src, &n_boxes);
-
-    if (n_boxes > N_TMP_BOXES)
-	boxes32 = pixman_malloc_ab (n_boxes, sizeof (pixman_box32_t));
-    else
-	boxes32 = tmp_boxes;
-
-    if (!boxes32)
-	return FALSE;
-
-    for (i = 0; i < n_boxes; ++i)
-    {
-	boxes32[i].x1 = boxes16[i].x1;
-	boxes32[i].y1 = boxes16[i].y1;
-	boxes32[i].x2 = boxes16[i].x2;
-	boxes32[i].y2 = boxes16[i].y2;
-    }
-
-    pixman_region32_fini (dst);
-    retval = pixman_region32_init_rects (dst, boxes32, n_boxes);
-
-    if (boxes32 != tmp_boxes)
-	free (boxes32);
-
-    return retval;
-}
-
-/* This function is exported for the sake of the test suite and not part
- * of the ABI.
- */
-PIXMAN_EXPORT pixman_implementation_t *
-_pixman_internal_only_get_implementation (void)
-{
-    return get_implementation ();
-}
-
-void
-_pixman_log_error (const char *function, const char *message)
-{
-    static int n_messages = 0;
-
-    if (n_messages < 10)
-    {
-	fprintf (stderr,
-		 "*** BUG ***\n"
-		 "In %s: %s\n"
-		 "Set a breakpoint on '_pixman_log_error' to debug\n\n",
-                 function, message);
-
-	n_messages++;
-    }
-}
diff --git a/vendor/pixman/pixman/pixman-version.h.in b/vendor/pixman/pixman/pixman-version.h.in
deleted file mode 100644
index 64778a595..000000000
--- a/vendor/pixman/pixman/pixman-version.h.in
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright © 2008 Red Hat, Inc.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Author: Carl D. Worth <cworth@cworth.org>
- */
-
-#ifndef PIXMAN_VERSION_H__
-#define PIXMAN_VERSION_H__
-
-#ifndef PIXMAN_H__
-#  error pixman-version.h should only be included by pixman.h
-#endif
-
-#define PIXMAN_VERSION_MAJOR @PIXMAN_VERSION_MAJOR@
-#define PIXMAN_VERSION_MINOR @PIXMAN_VERSION_MINOR@
-#define PIXMAN_VERSION_MICRO @PIXMAN_VERSION_MICRO@
-
-#define PIXMAN_VERSION_STRING "@PIXMAN_VERSION_MAJOR@.@PIXMAN_VERSION_MINOR@.@PIXMAN_VERSION_MICRO@"
-
-#define PIXMAN_VERSION_ENCODE(major, minor, micro) (	\
-	  ((major) * 10000)				\
-	+ ((minor) *   100)				\
-	+ ((micro) *     1))
-
-#define PIXMAN_VERSION PIXMAN_VERSION_ENCODE(	\
-	PIXMAN_VERSION_MAJOR,			\
-	PIXMAN_VERSION_MINOR,			\
-	PIXMAN_VERSION_MICRO)
-
-#ifndef PIXMAN_API
-# define PIXMAN_API
-#endif
-
-#endif /* PIXMAN_VERSION_H__ */
diff --git a/vendor/pixman/pixman/pixman-vmx.c b/vendor/pixman/pixman/pixman-vmx.c
deleted file mode 100644
index 1086b285d..000000000
--- a/vendor/pixman/pixman/pixman-vmx.c
+++ /dev/null
@@ -1,3159 +0,0 @@
-/*
- * Copyright © 2007 Luca Barbato
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Luca Barbato not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  Luca Barbato makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author:  Luca Barbato (lu_zero@gentoo.org)
- *
- * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-#include "pixman-private.h"
-#include "pixman-combine32.h"
-#include "pixman-inlines.h"
-#include <altivec.h>
-
-#define AVV(x...) {x}
-
-static vector unsigned int mask_ff000000;
-static vector unsigned int mask_red;
-static vector unsigned int mask_green;
-static vector unsigned int mask_blue;
-static vector unsigned int mask_565_fix_rb;
-static vector unsigned int mask_565_fix_g;
-
-static force_inline vector unsigned int
-splat_alpha (vector unsigned int pix)
-{
-#ifdef WORDS_BIGENDIAN
-    return vec_perm (pix, pix,
-		     (vector unsigned char)AVV (
-			 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04,
-			 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C));
-#else
-    return vec_perm (pix, pix,
-		     (vector unsigned char)AVV (
-			 0x03, 0x03, 0x03, 0x03, 0x07, 0x07, 0x07, 0x07,
-			 0x0B, 0x0B, 0x0B, 0x0B, 0x0F, 0x0F, 0x0F, 0x0F));
-#endif
-}
-
-static force_inline vector unsigned int
-splat_pixel (vector unsigned int pix)
-{
-    return vec_perm (pix, pix,
-		     (vector unsigned char)AVV (
-			 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
-			 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03));
-}
-
-static force_inline vector unsigned int
-pix_multiply (vector unsigned int p, vector unsigned int a)
-{
-    vector unsigned short hi, lo, mod;
-
-    /* unpack to short */
-    hi = (vector unsigned short)
-#ifdef WORDS_BIGENDIAN
-	vec_mergeh ((vector unsigned char)AVV (0),
-		    (vector unsigned char)p);
-#else
-	vec_mergeh ((vector unsigned char) p,
-		    (vector unsigned char) AVV (0));
-#endif
-
-    mod = (vector unsigned short)
-#ifdef WORDS_BIGENDIAN
-	vec_mergeh ((vector unsigned char)AVV (0),
-		    (vector unsigned char)a);
-#else
-	vec_mergeh ((vector unsigned char) a,
-		    (vector unsigned char) AVV (0));
-#endif
-
-    hi = vec_mladd (hi, mod, (vector unsigned short)
-                    AVV (0x0080, 0x0080, 0x0080, 0x0080,
-                         0x0080, 0x0080, 0x0080, 0x0080));
-
-    hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));
-
-    hi = vec_sr (hi, vec_splat_u16 (8));
-
-    /* unpack to short */
-    lo = (vector unsigned short)
-#ifdef WORDS_BIGENDIAN
-	vec_mergel ((vector unsigned char)AVV (0),
-		    (vector unsigned char)p);
-#else
-	vec_mergel ((vector unsigned char) p,
-		    (vector unsigned char) AVV (0));
-#endif
-
-    mod = (vector unsigned short)
-#ifdef WORDS_BIGENDIAN
-	vec_mergel ((vector unsigned char)AVV (0),
-		    (vector unsigned char)a);
-#else
-	vec_mergel ((vector unsigned char) a,
-		    (vector unsigned char) AVV (0));
-#endif
-
-    lo = vec_mladd (lo, mod, (vector unsigned short)
-                    AVV (0x0080, 0x0080, 0x0080, 0x0080,
-                         0x0080, 0x0080, 0x0080, 0x0080));
-
-    lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));
-
-    lo = vec_sr (lo, vec_splat_u16 (8));
-
-    return (vector unsigned int)vec_packsu (hi, lo);
-}
-
-static force_inline vector unsigned int
-pix_add (vector unsigned int a, vector unsigned int b)
-{
-    return (vector unsigned int)vec_adds ((vector unsigned char)a,
-                                          (vector unsigned char)b);
-}
-
-static force_inline vector unsigned int
-pix_add_mul (vector unsigned int x,
-             vector unsigned int a,
-             vector unsigned int y,
-             vector unsigned int b)
-{
-    vector unsigned int t1, t2;
-
-    t1 = pix_multiply (x, a);
-    t2 = pix_multiply (y, b);
-
-    return pix_add (t1, t2);
-}
-
-static force_inline vector unsigned int
-negate (vector unsigned int src)
-{
-    return vec_nor (src, src);
-}
-
-/* dest*~srca + src */
-static force_inline vector unsigned int
-over (vector unsigned int src,
-      vector unsigned int srca,
-      vector unsigned int dest)
-{
-    vector unsigned char tmp = (vector unsigned char)
-	pix_multiply (dest, negate (srca));
-
-    tmp = vec_adds ((vector unsigned char)src, tmp);
-    return (vector unsigned int)tmp;
-}
-
-/* in == pix_multiply */
-#define in_over(src, srca, mask, dest)					\
-    over (pix_multiply (src, mask),					\
-          pix_multiply (srca, mask), dest)
-
-#ifdef WORDS_BIGENDIAN
-
-#define COMPUTE_SHIFT_MASK(source)					\
-    source ## _mask = vec_lvsl (0, source);
-
-#define COMPUTE_SHIFT_MASKS(dest, source)				\
-    source ## _mask = vec_lvsl (0, source);
-
-#define COMPUTE_SHIFT_MASKC(dest, source, mask)				\
-    mask ## _mask = vec_lvsl (0, mask);					\
-    source ## _mask = vec_lvsl (0, source);
-
-#define LOAD_VECTOR(source)				  \
-do							  \
-{							  \
-    vector unsigned char tmp1, tmp2;			  \
-    tmp1 = (typeof(tmp1))vec_ld (0, source);		  \
-    tmp2 = (typeof(tmp2))vec_ld (15, source);		  \
-    v ## source = (typeof(v ## source)) 		  \
-	vec_perm (tmp1, tmp2, source ## _mask);		  \
-} while (0)
-
-#define LOAD_VECTORS(dest, source)			  \
-do							  \
-{							  \
-    LOAD_VECTOR(source);				  \
-    v ## dest = (typeof(v ## dest))vec_ld (0, dest);	  \
-} while (0)
-
-#define LOAD_VECTORSC(dest, source, mask)		  \
-do							  \
-{							  \
-    LOAD_VECTORS(dest, source); 			  \
-    LOAD_VECTOR(mask);					  \
-} while (0)
-
-#define DECLARE_SRC_MASK_VAR vector unsigned char src_mask
-#define DECLARE_MASK_MASK_VAR vector unsigned char mask_mask
-
-#else
-
-/* Now the COMPUTE_SHIFT_{MASK, MASKS, MASKC} below are just no-op.
- * They are defined that way because little endian altivec can do unaligned
- * reads natively and have no need for constructing the permutation pattern
- * variables.
- */
-#define COMPUTE_SHIFT_MASK(source)
-
-#define COMPUTE_SHIFT_MASKS(dest, source)
-
-#define COMPUTE_SHIFT_MASKC(dest, source, mask)
-
-# define LOAD_VECTOR(source)				\
-    v ## source = (typeof(v ## source))vec_xl(0, source);
-
-# define LOAD_VECTORS(dest, source)			\
-    LOAD_VECTOR(source);				\
-    LOAD_VECTOR(dest);					\
-
-# define LOAD_VECTORSC(dest, source, mask)		\
-    LOAD_VECTORS(dest, source); 			\
-    LOAD_VECTOR(mask);					\
-
-#define DECLARE_SRC_MASK_VAR
-#define DECLARE_MASK_MASK_VAR
-
-#endif /* WORDS_BIGENDIAN */
-
-#define LOAD_VECTORSM(dest, source, mask)				\
-    LOAD_VECTORSC (dest, source, mask); 				\
-    v ## source = pix_multiply (v ## source,				\
-                                splat_alpha (v ## mask));
-
-#define STORE_VECTOR(dest)						\
-    vec_st ((vector unsigned int) v ## dest, 0, dest);
-
-/* load 4 pixels from a 16-byte boundary aligned address */
-static force_inline vector unsigned int
-load_128_aligned (const uint32_t* src)
-{
-    return *((vector unsigned int *) src);
-}
-
-/* load 4 pixels from a unaligned address */
-static force_inline vector unsigned int
-load_128_unaligned (const uint32_t* src)
-{
-    vector unsigned int vsrc;
-    DECLARE_SRC_MASK_VAR;
-
-    COMPUTE_SHIFT_MASK (src);
-    LOAD_VECTOR (src);
-
-    return vsrc;
-}
-
-/* save 4 pixels on a 16-byte boundary aligned address */
-static force_inline void
-save_128_aligned (uint32_t* data,
-		  vector unsigned int vdata)
-{
-    STORE_VECTOR(data)
-}
-
-static force_inline vector unsigned int
-create_mask_1x32_128 (const uint32_t *src)
-{
-    vector unsigned int vsrc;
-    DECLARE_SRC_MASK_VAR;
-
-    COMPUTE_SHIFT_MASK (src);
-    LOAD_VECTOR (src);
-    return vec_splat(vsrc, 0);
-}
-
-static force_inline vector unsigned int
-create_mask_32_128 (uint32_t mask)
-{
-    return create_mask_1x32_128(&mask);
-}
-
-static force_inline vector unsigned int
-unpacklo_128_16x8 (vector unsigned int data1, vector unsigned int data2)
-{
-    vector unsigned char lo;
-
-    /* unpack to short */
-    lo = (vector unsigned char)
-#ifdef WORDS_BIGENDIAN
-	vec_mergel ((vector unsigned char) data2,
-		    (vector unsigned char) data1);
-#else
-	vec_mergel ((vector unsigned char) data1,
-		    (vector unsigned char) data2);
-#endif
-
-    return (vector unsigned int) lo;
-}
-
-static force_inline vector unsigned int
-unpackhi_128_16x8 (vector unsigned int data1, vector unsigned int data2)
-{
-    vector unsigned char hi;
-
-    /* unpack to short */
-    hi = (vector unsigned char)
-#ifdef WORDS_BIGENDIAN
-	vec_mergeh ((vector unsigned char) data2,
-		    (vector unsigned char) data1);
-#else
-	vec_mergeh ((vector unsigned char) data1,
-		    (vector unsigned char) data2);
-#endif
-
-    return (vector unsigned int) hi;
-}
-
-static force_inline vector unsigned int
-unpacklo_128_8x16 (vector unsigned int data1, vector unsigned int data2)
-{
-    vector unsigned short lo;
-
-    /* unpack to char */
-    lo = (vector unsigned short)
-#ifdef WORDS_BIGENDIAN
-	vec_mergel ((vector unsigned short) data2,
-		    (vector unsigned short) data1);
-#else
-	vec_mergel ((vector unsigned short) data1,
-		    (vector unsigned short) data2);
-#endif
-
-    return (vector unsigned int) lo;
-}
-
-static force_inline vector unsigned int
-unpackhi_128_8x16 (vector unsigned int data1, vector unsigned int data2)
-{
-    vector unsigned short hi;
-
-    /* unpack to char */
-    hi = (vector unsigned short)
-#ifdef WORDS_BIGENDIAN
-	vec_mergeh ((vector unsigned short) data2,
-		    (vector unsigned short) data1);
-#else
-	vec_mergeh ((vector unsigned short) data1,
-		    (vector unsigned short) data2);
-#endif
-
-    return (vector unsigned int) hi;
-}
-
-static force_inline void
-unpack_128_2x128 (vector unsigned int data1, vector unsigned int data2,
-		    vector unsigned int* data_lo, vector unsigned int* data_hi)
-{
-    *data_lo = unpacklo_128_16x8(data1, data2);
-    *data_hi = unpackhi_128_16x8(data1, data2);
-}
-
-static force_inline void
-unpack_128_2x128_16 (vector unsigned int data1, vector unsigned int data2,
-		    vector unsigned int* data_lo, vector unsigned int* data_hi)
-{
-    *data_lo = unpacklo_128_8x16(data1, data2);
-    *data_hi = unpackhi_128_8x16(data1, data2);
-}
-
-static force_inline vector unsigned int
-unpack_565_to_8888 (vector unsigned int lo)
-{
-    vector unsigned int r, g, b, rb, t;
-
-    r = vec_and (vec_sl(lo, create_mask_32_128(8)), mask_red);
-    g = vec_and (vec_sl(lo, create_mask_32_128(5)), mask_green);
-    b = vec_and (vec_sl(lo, create_mask_32_128(3)), mask_blue);
-
-    rb = vec_or (r, b);
-    t  = vec_and (rb, mask_565_fix_rb);
-    t  = vec_sr (t, create_mask_32_128(5));
-    rb = vec_or (rb, t);
-
-    t  = vec_and (g, mask_565_fix_g);
-    t  = vec_sr (t, create_mask_32_128(6));
-    g  = vec_or (g, t);
-
-    return vec_or (rb, g);
-}
-
-static force_inline int
-is_opaque (vector unsigned int x)
-{
-    uint32_t cmp_result;
-    vector bool int ffs = vec_cmpeq(x, x);
-
-    cmp_result = vec_all_eq(x, ffs);
-
-    return (cmp_result & 0x8888) == 0x8888;
-}
-
-static force_inline int
-is_zero (vector unsigned int x)
-{
-    uint32_t cmp_result;
-
-    cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));
-
-    return cmp_result == 0xffff;
-}
-
-static force_inline int
-is_transparent (vector unsigned int x)
-{
-    uint32_t cmp_result;
-
-    cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));
-    return (cmp_result & 0x8888) == 0x8888;
-}
-
-static force_inline uint32_t
-core_combine_over_u_pixel_vmx (uint32_t src, uint32_t dst)
-{
-    uint32_t a;
-
-    a = ALPHA_8(src);
-
-    if (a == 0xff)
-    {
-	return src;
-    }
-    else if (src)
-    {
-	UN8x4_MUL_UN8_ADD_UN8x4(dst, (~a & MASK), src);
-    }
-
-    return dst;
-}
-
-static force_inline uint32_t
-combine1 (const uint32_t *ps, const uint32_t *pm)
-{
-    uint32_t s = *ps;
-
-    if (pm)
-	UN8x4_MUL_UN8(s, ALPHA_8(*pm));
-
-    return s;
-}
-
-static force_inline vector unsigned int
-combine4 (const uint32_t* ps, const uint32_t* pm)
-{
-    vector unsigned int src, msk;
-
-    if (pm)
-    {
-	msk = load_128_unaligned(pm);
-
-	if (is_transparent(msk))
-	    return (vector unsigned int) AVV(0);
-    }
-
-    src = load_128_unaligned(ps);
-
-    if (pm)
-	src = pix_multiply(src, msk);
-
-    return src;
-}
-
-static void
-vmx_combine_over_u_no_mask (uint32_t *      dest,
-                            const uint32_t *src,
-                            int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc;
-    DECLARE_SRC_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-	uint32_t ia = ALPHA_8 (~s);
-
-	UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
-
-	*dest++ = d;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKS (dest, src);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-
-	LOAD_VECTORS (dest, src);
-
-	vdest = over (vsrc, splat_alpha (vsrc), vdest);
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-	uint32_t ia = ALPHA_8 (~s);
-
-	UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
-
-	dest[i] = d;
-    }
-}
-
-static void
-vmx_combine_over_u_mask (uint32_t *      dest,
-                         const uint32_t *src,
-                         const uint32_t *mask,
-                         int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t m = ALPHA_8 (*mask++);
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-	uint32_t ia;
-
-	UN8x4_MUL_UN8 (s, m);
-
-	ia = ALPHA_8 (~s);
-
-	UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
-	*dest++ = d;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSM (dest, src, mask);
-
-	vdest = over (vsrc, splat_alpha (vsrc), vdest);
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t m = ALPHA_8 (mask[i]);
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-	uint32_t ia;
-
-	UN8x4_MUL_UN8 (s, m);
-
-	ia = ALPHA_8 (~s);
-
-	UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
-	dest[i] = d;
-    }
-}
-
-static void
-vmx_combine_over_u (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    if (mask)
-	vmx_combine_over_u_mask (dest, src, mask, width);
-    else
-	vmx_combine_over_u_no_mask (dest, src, width);
-}
-
-static void
-vmx_combine_over_reverse_u_no_mask (uint32_t *      dest,
-                                    const uint32_t *src,
-                                    int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc;
-    DECLARE_SRC_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-	uint32_t ia = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
-	*dest++ = s;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKS (dest, src);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-
-	LOAD_VECTORS (dest, src);
-
-	vdest = over (vdest, splat_alpha (vdest), vsrc);
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-	uint32_t ia = ALPHA_8 (~dest[i]);
-
-	UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
-	dest[i] = s;
-    }
-}
-
-static void
-vmx_combine_over_reverse_u_mask (uint32_t *      dest,
-                                 const uint32_t *src,
-                                 const uint32_t *mask,
-                                 int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t m = ALPHA_8 (*mask++);
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-	uint32_t ia = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8 (s, m);
-
-	UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
-	*dest++ = s;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-
-	LOAD_VECTORSM (dest, src, mask);
-
-	vdest = over (vdest, splat_alpha (vdest), vsrc);
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t m = ALPHA_8 (mask[i]);
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-	uint32_t ia = ALPHA_8 (~dest[i]);
-
-	UN8x4_MUL_UN8 (s, m);
-
-	UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
-	dest[i] = s;
-    }
-}
-
-static void
-vmx_combine_over_reverse_u (pixman_implementation_t *imp,
-                            pixman_op_t              op,
-                            uint32_t *               dest,
-                            const uint32_t *         src,
-                            const uint32_t *         mask,
-                            int                      width)
-{
-    if (mask)
-	vmx_combine_over_reverse_u_mask (dest, src, mask, width);
-    else
-	vmx_combine_over_reverse_u_no_mask (dest, src, width);
-}
-
-static void
-vmx_combine_in_u_no_mask (uint32_t *      dest,
-                          const uint32_t *src,
-                          int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc;
-    DECLARE_SRC_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t s = *src++;
-	uint32_t a = ALPHA_8 (*dest);
-
-	UN8x4_MUL_UN8 (s, a);
-	*dest++ = s;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKS (dest, src);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORS (dest, src);
-
-	vdest = pix_multiply (vsrc, splat_alpha (vdest));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t s = src[i];
-	uint32_t a = ALPHA_8 (dest[i]);
-
-	UN8x4_MUL_UN8 (s, a);
-	dest[i] = s;
-    }
-}
-
-static void
-vmx_combine_in_u_mask (uint32_t *      dest,
-                       const uint32_t *src,
-                       const uint32_t *mask,
-                       int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t m = ALPHA_8 (*mask++);
-	uint32_t s = *src++;
-	uint32_t a = ALPHA_8 (*dest);
-
-	UN8x4_MUL_UN8 (s, m);
-	UN8x4_MUL_UN8 (s, a);
-
-	*dest++ = s;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSM (dest, src, mask);
-
-	vdest = pix_multiply (vsrc, splat_alpha (vdest));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t m = ALPHA_8 (mask[i]);
-	uint32_t s = src[i];
-	uint32_t a = ALPHA_8 (dest[i]);
-
-	UN8x4_MUL_UN8 (s, m);
-	UN8x4_MUL_UN8 (s, a);
-
-	dest[i] = s;
-    }
-}
-
-static void
-vmx_combine_in_u (pixman_implementation_t *imp,
-                  pixman_op_t              op,
-                  uint32_t *               dest,
-                  const uint32_t *         src,
-                  const uint32_t *         mask,
-                  int                      width)
-{
-    if (mask)
-	vmx_combine_in_u_mask (dest, src, mask, width);
-    else
-	vmx_combine_in_u_no_mask (dest, src, width);
-}
-
-static void
-vmx_combine_in_reverse_u_no_mask (uint32_t *      dest,
-                                  const uint32_t *src,
-                                  int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc;
-    DECLARE_SRC_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t d = *dest;
-	uint32_t a = ALPHA_8 (*src++);
-
-	UN8x4_MUL_UN8 (d, a);
-
-	*dest++ = d;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKS (dest, src);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORS (dest, src);
-
-	vdest = pix_multiply (vdest, splat_alpha (vsrc));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t d = dest[i];
-	uint32_t a = ALPHA_8 (src[i]);
-
-	UN8x4_MUL_UN8 (d, a);
-
-	dest[i] = d;
-    }
-}
-
-static void
-vmx_combine_in_reverse_u_mask (uint32_t *      dest,
-                               const uint32_t *src,
-                               const uint32_t *mask,
-                               int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t m = ALPHA_8 (*mask++);
-	uint32_t d = *dest;
-	uint32_t a = *src++;
-
-	UN8x4_MUL_UN8 (a, m);
-	a = ALPHA_8 (a);
-	UN8x4_MUL_UN8 (d, a);
-
-	*dest++ = d;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSM (dest, src, mask);
-
-	vdest = pix_multiply (vdest, splat_alpha (vsrc));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t m = ALPHA_8 (mask[i]);
-	uint32_t d = dest[i];
-	uint32_t a = src[i];
-
-	UN8x4_MUL_UN8 (a, m);
-	a = ALPHA_8 (a);
-	UN8x4_MUL_UN8 (d, a);
-
-	dest[i] = d;
-    }
-}
-
-static void
-vmx_combine_in_reverse_u (pixman_implementation_t *imp,
-                          pixman_op_t              op,
-                          uint32_t *               dest,
-                          const uint32_t *         src,
-                          const uint32_t *         mask,
-                          int                      width)
-{
-    if (mask)
-	vmx_combine_in_reverse_u_mask (dest, src, mask, width);
-    else
-	vmx_combine_in_reverse_u_no_mask (dest, src, width);
-}
-
-static void
-vmx_combine_out_u_no_mask (uint32_t *      dest,
-                           const uint32_t *src,
-                           int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc;
-    DECLARE_SRC_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t s = *src++;
-	uint32_t a = ALPHA_8 (~(*dest));
-
-	UN8x4_MUL_UN8 (s, a);
-
-	*dest++ = s;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKS (dest, src);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORS (dest, src);
-
-	vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t s = src[i];
-	uint32_t a = ALPHA_8 (~dest[i]);
-
-	UN8x4_MUL_UN8 (s, a);
-
-	dest[i] = s;
-    }
-}
-
-static void
-vmx_combine_out_u_mask (uint32_t *      dest,
-                        const uint32_t *src,
-                        const uint32_t *mask,
-                        int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t m = ALPHA_8 (*mask++);
-	uint32_t s = *src++;
-	uint32_t a = ALPHA_8 (~(*dest));
-
-	UN8x4_MUL_UN8 (s, m);
-	UN8x4_MUL_UN8 (s, a);
-
-	*dest++ = s;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSM (dest, src, mask);
-
-	vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t m = ALPHA_8 (mask[i]);
-	uint32_t s = src[i];
-	uint32_t a = ALPHA_8 (~dest[i]);
-
-	UN8x4_MUL_UN8 (s, m);
-	UN8x4_MUL_UN8 (s, a);
-
-	dest[i] = s;
-    }
-}
-
-static void
-vmx_combine_out_u (pixman_implementation_t *imp,
-                   pixman_op_t              op,
-                   uint32_t *               dest,
-                   const uint32_t *         src,
-                   const uint32_t *         mask,
-                   int                      width)
-{
-    if (mask)
-	vmx_combine_out_u_mask (dest, src, mask, width);
-    else
-	vmx_combine_out_u_no_mask (dest, src, width);
-}
-
-static void
-vmx_combine_out_reverse_u_no_mask (uint32_t *      dest,
-                                   const uint32_t *src,
-                                   int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc;
-    DECLARE_SRC_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t d = *dest;
-	uint32_t a = ALPHA_8 (~(*src++));
-
-	UN8x4_MUL_UN8 (d, a);
-
-	*dest++ = d;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKS (dest, src);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-
-	LOAD_VECTORS (dest, src);
-
-	vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t d = dest[i];
-	uint32_t a = ALPHA_8 (~src[i]);
-
-	UN8x4_MUL_UN8 (d, a);
-
-	dest[i] = d;
-    }
-}
-
-static void
-vmx_combine_out_reverse_u_mask (uint32_t *      dest,
-                                const uint32_t *src,
-                                const uint32_t *mask,
-                                int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t m = ALPHA_8 (*mask++);
-	uint32_t d = *dest;
-	uint32_t a = *src++;
-
-	UN8x4_MUL_UN8 (a, m);
-	a = ALPHA_8 (~a);
-	UN8x4_MUL_UN8 (d, a);
-
-	*dest++ = d;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSM (dest, src, mask);
-
-	vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t m = ALPHA_8 (mask[i]);
-	uint32_t d = dest[i];
-	uint32_t a = src[i];
-
-	UN8x4_MUL_UN8 (a, m);
-	a = ALPHA_8 (~a);
-	UN8x4_MUL_UN8 (d, a);
-
-	dest[i] = d;
-    }
-}
-
-static void
-vmx_combine_out_reverse_u (pixman_implementation_t *imp,
-                           pixman_op_t              op,
-                           uint32_t *               dest,
-                           const uint32_t *         src,
-                           const uint32_t *         mask,
-                           int                      width)
-{
-    if (mask)
-	vmx_combine_out_reverse_u_mask (dest, src, mask, width);
-    else
-	vmx_combine_out_reverse_u_no_mask (dest, src, width);
-}
-
-static void
-vmx_combine_atop_u_no_mask (uint32_t *      dest,
-                            const uint32_t *src,
-                            int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc;
-    DECLARE_SRC_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-	uint32_t dest_a = ALPHA_8 (d);
-	uint32_t src_ia = ALPHA_8 (~s);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
-
-	*dest++ = s;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKS (dest, src);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORS (dest, src);
-
-	vdest = pix_add_mul (vsrc, splat_alpha (vdest),
-			     vdest, splat_alpha (negate (vsrc)));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-	uint32_t dest_a = ALPHA_8 (d);
-	uint32_t src_ia = ALPHA_8 (~s);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
-
-	dest[i] = s;
-    }
-}
-
-static void
-vmx_combine_atop_u_mask (uint32_t *      dest,
-                         const uint32_t *src,
-                         const uint32_t *mask,
-                         int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t m = ALPHA_8 (*mask++);
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-	uint32_t dest_a = ALPHA_8 (d);
-	uint32_t src_ia;
-
-	UN8x4_MUL_UN8 (s, m);
-
-	src_ia = ALPHA_8 (~s);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
-
-	*dest++ = s;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSM (dest, src, mask);
-
-	vdest = pix_add_mul (vsrc, splat_alpha (vdest),
-			     vdest, splat_alpha (negate (vsrc)));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t m = ALPHA_8 (mask[i]);
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-	uint32_t dest_a = ALPHA_8 (d);
-	uint32_t src_ia;
-
-	UN8x4_MUL_UN8 (s, m);
-
-	src_ia = ALPHA_8 (~s);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
-
-	dest[i] = s;
-    }
-}
-
-static void
-vmx_combine_atop_u (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    if (mask)
-	vmx_combine_atop_u_mask (dest, src, mask, width);
-    else
-	vmx_combine_atop_u_no_mask (dest, src, width);
-}
-
-static void
-vmx_combine_atop_reverse_u_no_mask (uint32_t *      dest,
-                                    const uint32_t *src,
-                                    int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc;
-    DECLARE_SRC_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-	uint32_t src_a = ALPHA_8 (s);
-	uint32_t dest_ia = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
-
-	*dest++ = s;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKS (dest, src);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORS (dest, src);
-
-	vdest = pix_add_mul (vdest, splat_alpha (vsrc),
-			     vsrc, splat_alpha (negate (vdest)));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-	uint32_t src_a = ALPHA_8 (s);
-	uint32_t dest_ia = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
-
-	dest[i] = s;
-    }
-}
-
-static void
-vmx_combine_atop_reverse_u_mask (uint32_t *      dest,
-                                 const uint32_t *src,
-                                 const uint32_t *mask,
-                                 int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t m = ALPHA_8 (*mask++);
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-	uint32_t src_a;
-	uint32_t dest_ia = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8 (s, m);
-
-	src_a = ALPHA_8 (s);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
-
-	*dest++ = s;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSM (dest, src, mask);
-
-	vdest = pix_add_mul (vdest, splat_alpha (vsrc),
-			     vsrc, splat_alpha (negate (vdest)));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t m = ALPHA_8 (mask[i]);
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-	uint32_t src_a;
-	uint32_t dest_ia = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8 (s, m);
-
-	src_a = ALPHA_8 (s);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
-
-	dest[i] = s;
-    }
-}
-
-static void
-vmx_combine_atop_reverse_u (pixman_implementation_t *imp,
-                            pixman_op_t              op,
-                            uint32_t *               dest,
-                            const uint32_t *         src,
-                            const uint32_t *         mask,
-                            int                      width)
-{
-    if (mask)
-	vmx_combine_atop_reverse_u_mask (dest, src, mask, width);
-    else
-	vmx_combine_atop_reverse_u_no_mask (dest, src, width);
-}
-
-static void
-vmx_combine_xor_u_no_mask (uint32_t *      dest,
-                           const uint32_t *src,
-                           int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc;
-    DECLARE_SRC_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-	uint32_t src_ia = ALPHA_8 (~s);
-	uint32_t dest_ia = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
-
-	*dest++ = s;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKS (dest, src);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORS (dest, src);
-
-	vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
-			     vdest, splat_alpha (negate (vsrc)));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-	uint32_t src_ia = ALPHA_8 (~s);
-	uint32_t dest_ia = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
-
-	dest[i] = s;
-    }
-}
-
-static void
-vmx_combine_xor_u_mask (uint32_t *      dest,
-                        const uint32_t *src,
-                        const uint32_t *mask,
-                        int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t m = ALPHA_8 (*mask++);
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-	uint32_t src_ia;
-	uint32_t dest_ia = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8 (s, m);
-
-	src_ia = ALPHA_8 (~s);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
-
-	*dest++ = s;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSM (dest, src, mask);
-
-	vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
-			     vdest, splat_alpha (negate (vsrc)));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t m = ALPHA_8 (mask[i]);
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-	uint32_t src_ia;
-	uint32_t dest_ia = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8 (s, m);
-
-	src_ia = ALPHA_8 (~s);
-
-	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
-
-	dest[i] = s;
-    }
-}
-
-static void
-vmx_combine_xor_u (pixman_implementation_t *imp,
-                   pixman_op_t              op,
-                   uint32_t *               dest,
-                   const uint32_t *         src,
-                   const uint32_t *         mask,
-                   int                      width)
-{
-    if (mask)
-	vmx_combine_xor_u_mask (dest, src, mask, width);
-    else
-	vmx_combine_xor_u_no_mask (dest, src, width);
-}
-
-static void
-vmx_combine_add_u_no_mask (uint32_t *      dest,
-                           const uint32_t *src,
-                           int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc;
-    DECLARE_SRC_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-
-	UN8x4_ADD_UN8x4 (d, s);
-
-	*dest++ = d;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKS (dest, src);
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORS (dest, src);
-
-	vdest = pix_add (vsrc, vdest);
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-
-	UN8x4_ADD_UN8x4 (d, s);
-
-	dest[i] = d;
-    }
-}
-
-static void
-vmx_combine_add_u_mask (uint32_t *      dest,
-                        const uint32_t *src,
-                        const uint32_t *mask,
-                        int             width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t m = ALPHA_8 (*mask++);
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-
-	UN8x4_MUL_UN8 (s, m);
-	UN8x4_ADD_UN8x4 (d, s);
-
-	*dest++ = d;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSM (dest, src, mask);
-
-	vdest = pix_add (vsrc, vdest);
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t m = ALPHA_8 (mask[i]);
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-
-	UN8x4_MUL_UN8 (s, m);
-	UN8x4_ADD_UN8x4 (d, s);
-
-	dest[i] = d;
-    }
-}
-
-static void
-vmx_combine_add_u (pixman_implementation_t *imp,
-                   pixman_op_t              op,
-                   uint32_t *               dest,
-                   const uint32_t *         src,
-                   const uint32_t *         mask,
-                   int                      width)
-{
-    if (mask)
-	vmx_combine_add_u_mask (dest, src, mask, width);
-    else
-	vmx_combine_add_u_no_mask (dest, src, width);
-}
-
-static void
-vmx_combine_src_ca (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t a = *mask++;
-	uint32_t s = *src++;
-
-	UN8x4_MUL_UN8x4 (s, a);
-
-	*dest++ = s;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSC (dest, src, mask);
-
-	vdest = pix_multiply (vsrc, vmask);
-
-	STORE_VECTOR (dest);
-
-	mask += 4;
-	src += 4;
-	dest += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t a = mask[i];
-	uint32_t s = src[i];
-
-	UN8x4_MUL_UN8x4 (s, a);
-
-	dest[i] = s;
-    }
-}
-
-static void
-vmx_combine_over_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               dest,
-                     const uint32_t *         src,
-                     const uint32_t *         mask,
-                     int                      width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t a = *mask++;
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-	uint32_t sa = ALPHA_8 (s);
-
-	UN8x4_MUL_UN8x4 (s, a);
-	UN8x4_MUL_UN8 (a, sa);
-	UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s);
-
-	*dest++ = d;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSC (dest, src, mask);
-
-	vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest);
-
-	STORE_VECTOR (dest);
-
-	mask += 4;
-	src += 4;
-	dest += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t a = mask[i];
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-	uint32_t sa = ALPHA_8 (s);
-
-	UN8x4_MUL_UN8x4 (s, a);
-	UN8x4_MUL_UN8 (a, sa);
-	UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s);
-
-	dest[i] = d;
-    }
-}
-
-static void
-vmx_combine_over_reverse_ca (pixman_implementation_t *imp,
-                             pixman_op_t              op,
-                             uint32_t *               dest,
-                             const uint32_t *         src,
-                             const uint32_t *         mask,
-                             int                      width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t a = *mask++;
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-	uint32_t ida = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8x4 (s, a);
-	UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d);
-
-	*dest++ = s;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSC (dest, src, mask);
-
-	vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask));
-
-	STORE_VECTOR (dest);
-
-	mask += 4;
-	src += 4;
-	dest += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t a = mask[i];
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-	uint32_t ida = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8x4 (s, a);
-	UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d);
-
-	dest[i] = s;
-    }
-}
-
-static void
-vmx_combine_in_ca (pixman_implementation_t *imp,
-                   pixman_op_t              op,
-                   uint32_t *               dest,
-                   const uint32_t *         src,
-                   const uint32_t *         mask,
-                   int                      width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t a = *mask++;
-	uint32_t s = *src++;
-	uint32_t da = ALPHA_8 (*dest);
-
-	UN8x4_MUL_UN8x4 (s, a);
-	UN8x4_MUL_UN8 (s, da);
-
-	*dest++ = s;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSC (dest, src, mask);
-
-	vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t a = mask[i];
-	uint32_t s = src[i];
-	uint32_t da = ALPHA_8 (dest[i]);
-
-	UN8x4_MUL_UN8x4 (s, a);
-	UN8x4_MUL_UN8 (s, da);
-
-	dest[i] = s;
-    }
-}
-
-static void
-vmx_combine_in_reverse_ca (pixman_implementation_t *imp,
-                           pixman_op_t              op,
-                           uint32_t *               dest,
-                           const uint32_t *         src,
-                           const uint32_t *         mask,
-                           int                      width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t a = *mask++;
-	uint32_t d = *dest;
-	uint32_t sa = ALPHA_8 (*src++);
-
-	UN8x4_MUL_UN8 (a, sa);
-	UN8x4_MUL_UN8x4 (d, a);
-
-	*dest++ = d;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-
-	LOAD_VECTORSC (dest, src, mask);
-
-	vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc)));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t a = mask[i];
-	uint32_t d = dest[i];
-	uint32_t sa = ALPHA_8 (src[i]);
-
-	UN8x4_MUL_UN8 (a, sa);
-	UN8x4_MUL_UN8x4 (d, a);
-
-	dest[i] = d;
-    }
-}
-
-static void
-vmx_combine_out_ca (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t a = *mask++;
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-	uint32_t da = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8x4 (s, a);
-	UN8x4_MUL_UN8 (s, da);
-
-	*dest++ = s;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSC (dest, src, mask);
-
-	vdest = pix_multiply (
-	    pix_multiply (vsrc, vmask), splat_alpha (negate (vdest)));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t a = mask[i];
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-	uint32_t da = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8x4 (s, a);
-	UN8x4_MUL_UN8 (s, da);
-
-	dest[i] = s;
-    }
-}
-
-static void
-vmx_combine_out_reverse_ca (pixman_implementation_t *imp,
-                            pixman_op_t              op,
-                            uint32_t *               dest,
-                            const uint32_t *         src,
-                            const uint32_t *         mask,
-                            int                      width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t a = *mask++;
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-	uint32_t sa = ALPHA_8 (s);
-
-	UN8x4_MUL_UN8 (a, sa);
-	UN8x4_MUL_UN8x4 (d, ~a);
-
-	*dest++ = d;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSC (dest, src, mask);
-
-	vdest = pix_multiply (
-	    vdest, negate (pix_multiply (vmask, splat_alpha (vsrc))));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t a = mask[i];
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-	uint32_t sa = ALPHA_8 (s);
-
-	UN8x4_MUL_UN8 (a, sa);
-	UN8x4_MUL_UN8x4 (d, ~a);
-
-	dest[i] = d;
-    }
-}
-
-static void
-vmx_combine_atop_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               dest,
-                     const uint32_t *         src,
-                     const uint32_t *         mask,
-                     int                      width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask, vsrca;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t a = *mask++;
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-	uint32_t sa = ALPHA_8 (s);
-	uint32_t da = ALPHA_8 (d);
-
-	UN8x4_MUL_UN8x4 (s, a);
-	UN8x4_MUL_UN8 (a, sa);
-	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
-
-	*dest++ = d;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSC (dest, src, mask);
-
-	vsrca = splat_alpha (vsrc);
-
-	vsrc = pix_multiply (vsrc, vmask);
-	vmask = pix_multiply (vmask, vsrca);
-
-	vdest = pix_add_mul (vsrc, splat_alpha (vdest),
-			     negate (vmask), vdest);
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t a = mask[i];
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-	uint32_t sa = ALPHA_8 (s);
-	uint32_t da = ALPHA_8 (d);
-
-	UN8x4_MUL_UN8x4 (s, a);
-	UN8x4_MUL_UN8 (a, sa);
-	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
-
-	dest[i] = d;
-    }
-}
-
-static void
-vmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
-                             pixman_op_t              op,
-                             uint32_t *               dest,
-                             const uint32_t *         src,
-                             const uint32_t *         mask,
-                             int                      width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t a = *mask++;
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-	uint32_t sa = ALPHA_8 (s);
-	uint32_t da = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8x4 (s, a);
-	UN8x4_MUL_UN8 (a, sa);
-	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da);
-
-	*dest++ = d;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSC (dest, src, mask);
-
-	vdest = pix_add_mul (vdest,
-			     pix_multiply (vmask, splat_alpha (vsrc)),
-			     pix_multiply (vsrc, vmask),
-			     negate (splat_alpha (vdest)));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t a = mask[i];
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-	uint32_t sa = ALPHA_8 (s);
-	uint32_t da = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8x4 (s, a);
-	UN8x4_MUL_UN8 (a, sa);
-	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da);
-
-	dest[i] = d;
-    }
-}
-
-static void
-vmx_combine_xor_ca (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t a = *mask++;
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-	uint32_t sa = ALPHA_8 (s);
-	uint32_t da = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8x4 (s, a);
-	UN8x4_MUL_UN8 (a, sa);
-	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
-
-	*dest++ = d;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSC (dest, src, mask);
-
-	vdest = pix_add_mul (vdest,
-			     negate (pix_multiply (vmask, splat_alpha (vsrc))),
-			     pix_multiply (vsrc, vmask),
-			     negate (splat_alpha (vdest)));
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t a = mask[i];
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-	uint32_t sa = ALPHA_8 (s);
-	uint32_t da = ALPHA_8 (~d);
-
-	UN8x4_MUL_UN8x4 (s, a);
-	UN8x4_MUL_UN8 (a, sa);
-	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
-
-	dest[i] = d;
-    }
-}
-
-static void
-vmx_combine_add_ca (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    int i;
-    vector unsigned int vdest, vsrc, vmask;
-    DECLARE_SRC_MASK_VAR;
-    DECLARE_MASK_MASK_VAR;
-
-    while (width && ((uintptr_t)dest & 15))
-    {
-	uint32_t a = *mask++;
-	uint32_t s = *src++;
-	uint32_t d = *dest;
-
-	UN8x4_MUL_UN8x4 (s, a);
-	UN8x4_ADD_UN8x4 (s, d);
-
-	*dest++ = s;
-	width--;
-    }
-
-    COMPUTE_SHIFT_MASKC (dest, src, mask);
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width / 4; i > 0; i--)
-    {
-	LOAD_VECTORSC (dest, src, mask);
-
-	vdest = pix_add (pix_multiply (vsrc, vmask), vdest);
-
-	STORE_VECTOR (dest);
-
-	src += 4;
-	dest += 4;
-	mask += 4;
-    }
-
-    for (i = width % 4; --i >= 0;)
-    {
-	uint32_t a = mask[i];
-	uint32_t s = src[i];
-	uint32_t d = dest[i];
-
-	UN8x4_MUL_UN8x4 (s, a);
-	UN8x4_ADD_UN8x4 (s, d);
-
-	dest[i] = s;
-    }
-}
-
-static void
-vmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
-                              pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src, srca;
-    uint32_t *dst_line, *dst;
-    uint8_t *mask_line;
-    int dst_stride, mask_stride;
-    int32_t w;
-    uint32_t m, d, s, ia;
-
-    vector unsigned int vsrc, valpha, vmask, vdst;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    srca = ALPHA_8(src);
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    vsrc = (vector unsigned int) {src, src, src, src};
-    valpha = splat_alpha(vsrc);
-
-    while (height--)
-    {
-	const uint8_t *pm = mask_line;
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask_line += mask_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 15)
-	{
-	    s = src;
-	    m = *pm++;
-
-	    if (m)
-	    {
-		d = *dst;
-		UN8x4_MUL_UN8 (s, m);
-		ia = ALPHA_8 (~s);
-		UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
-		*dst = d;
-	    }
-
-	    w--;
-	    dst++;
-	}
-
-	while (w >= 4)
-	{
-	    m = *((uint32_t*)pm);
-
-	    if (srca == 0xff && m == 0xffffffff)
-	    {
-		save_128_aligned(dst, vsrc);
-	    }
-	    else if (m)
-	    {
-		vmask = splat_pixel((vector unsigned int) {m, m, m, m});
-
-		/* dst is 16-byte aligned */
-		vdst = in_over (vsrc, valpha, vmask, load_128_aligned (dst));
-
-		save_128_aligned(dst, vdst);
-	    }
-
-	    w -= 4;
-	    dst += 4;
-	    pm += 4;
-	}
-
-	while (w)
-	{
-	    s = src;
-	    m = *pm++;
-
-	    if (m)
-	    {
-		d = *dst;
-		UN8x4_MUL_UN8 (s, m);
-		ia = ALPHA_8 (~s);
-		UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
-		*dst = d;
-	    }
-
-	    w--;
-	    dst++;
-	}
-    }
-
-}
-
-static pixman_bool_t
-vmx_fill (pixman_implementation_t *imp,
-           uint32_t *               bits,
-           int                      stride,
-           int                      bpp,
-           int                      x,
-           int                      y,
-           int                      width,
-           int                      height,
-           uint32_t		    filler)
-{
-    uint32_t byte_width;
-    uint8_t *byte_line;
-
-    vector unsigned int vfiller;
-
-    if (bpp == 8)
-    {
-	uint8_t b;
-	uint16_t w;
-
-	stride = stride * (int) sizeof (uint32_t) / 1;
-	byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
-	byte_width = width;
-	stride *= 1;
-
-	b = filler & 0xff;
-	w = (b << 8) | b;
-	filler = (w << 16) | w;
-    }
-    else if (bpp == 16)
-    {
-	stride = stride * (int) sizeof (uint32_t) / 2;
-	byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
-	byte_width = 2 * width;
-	stride *= 2;
-
-        filler = (filler & 0xffff) * 0x00010001;
-    }
-    else if (bpp == 32)
-    {
-	stride = stride * (int) sizeof (uint32_t) / 4;
-	byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
-	byte_width = 4 * width;
-	stride *= 4;
-    }
-    else
-    {
-	return FALSE;
-    }
-
-    vfiller = create_mask_1x32_128(&filler);
-
-    while (height--)
-    {
-	int w;
-	uint8_t *d = byte_line;
-	byte_line += stride;
-	w = byte_width;
-
-	if (w >= 1 && ((uintptr_t)d & 1))
-	{
-	    *(uint8_t *)d = filler;
-	    w -= 1;
-	    d += 1;
-	}
-
-	while (w >= 2 && ((uintptr_t)d & 3))
-	{
-	    *(uint16_t *)d = filler;
-	    w -= 2;
-	    d += 2;
-	}
-
-	while (w >= 4 && ((uintptr_t)d & 15))
-	{
-	    *(uint32_t *)d = filler;
-
-	    w -= 4;
-	    d += 4;
-	}
-
-	while (w >= 128)
-	{
-	    vec_st(vfiller, 0, (uint32_t *) d);
-	    vec_st(vfiller, 0, (uint32_t *) d + 4);
-	    vec_st(vfiller, 0, (uint32_t *) d + 8);
-	    vec_st(vfiller, 0, (uint32_t *) d + 12);
-	    vec_st(vfiller, 0, (uint32_t *) d + 16);
-	    vec_st(vfiller, 0, (uint32_t *) d + 20);
-	    vec_st(vfiller, 0, (uint32_t *) d + 24);
-	    vec_st(vfiller, 0, (uint32_t *) d + 28);
-
-	    d += 128;
-	    w -= 128;
-	}
-
-	if (w >= 64)
-	{
-	    vec_st(vfiller, 0, (uint32_t *) d);
-	    vec_st(vfiller, 0, (uint32_t *) d + 4);
-	    vec_st(vfiller, 0, (uint32_t *) d + 8);
-	    vec_st(vfiller, 0, (uint32_t *) d + 12);
-
-	    d += 64;
-	    w -= 64;
-	}
-
-	if (w >= 32)
-	{
-	    vec_st(vfiller, 0, (uint32_t *) d);
-	    vec_st(vfiller, 0, (uint32_t *) d + 4);
-
-	    d += 32;
-	    w -= 32;
-	}
-
-	if (w >= 16)
-	{
-	    vec_st(vfiller, 0, (uint32_t *) d);
-
-	    d += 16;
-	    w -= 16;
-	}
-
-	while (w >= 4)
-	{
-	    *(uint32_t *)d = filler;
-
-	    w -= 4;
-	    d += 4;
-	}
-
-	if (w >= 2)
-	{
-	    *(uint16_t *)d = filler;
-	    w -= 2;
-	    d += 2;
-	}
-
-	if (w >= 1)
-	{
-	    *(uint8_t *)d = filler;
-	    w -= 1;
-	    d += 1;
-	}
-    }
-
-    return TRUE;
-}
-
-static void
-vmx_composite_src_x888_8888 (pixman_implementation_t *imp,
-			      pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    int32_t w;
-    int dst_stride, src_stride;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (uintptr_t)dst & 15)
-	{
-	    *dst++ = *src++ | 0xff000000;
-	    w--;
-	}
-
-	while (w >= 16)
-	{
-	    vector unsigned int vmx_src1, vmx_src2, vmx_src3, vmx_src4;
-
-	    vmx_src1 = load_128_unaligned (src);
-	    vmx_src2 = load_128_unaligned (src + 4);
-	    vmx_src3 = load_128_unaligned (src + 8);
-	    vmx_src4 = load_128_unaligned (src + 12);
-
-	    save_128_aligned (dst, vec_or (vmx_src1, mask_ff000000));
-	    save_128_aligned (dst + 4, vec_or (vmx_src2, mask_ff000000));
-	    save_128_aligned (dst + 8, vec_or (vmx_src3, mask_ff000000));
-	    save_128_aligned (dst + 12, vec_or (vmx_src4, mask_ff000000));
-
-	    dst += 16;
-	    src += 16;
-	    w -= 16;
-	}
-
-	while (w)
-	{
-	    *dst++ = *src++ | 0xff000000;
-	    w--;
-	}
-    }
-}
-
-static void
-vmx_composite_over_n_8888 (pixman_implementation_t *imp,
-                           pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t *dst_line, *dst;
-    uint32_t src, ia;
-    int      i, w, dst_stride;
-    vector unsigned int vdst, vsrc, via;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
-    vsrc = (vector unsigned int){src, src, src, src};
-    via = negate (splat_alpha (vsrc));
-    ia = ALPHA_8 (~src);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	w = width;
-
-	while (w && ((uintptr_t)dst & 15))
-	{
-	    uint32_t d = *dst;
-	    UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, src);
-	    *dst++ = d;
-	    w--;
-	}
-
-	for (i = w / 4; i > 0; i--)
-	{
-	    vdst = pix_multiply (load_128_aligned (dst), via);
-	    save_128_aligned (dst, pix_add (vsrc, vdst));
-	    dst += 4;
-	}
-
-	for (i = w % 4; --i >= 0;)
-	{
-	    uint32_t d = dst[i];
-	    UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, src);
-	    dst[i] = d;
-	}
-    }
-}
-
-static void
-vmx_composite_over_8888_8888 (pixman_implementation_t *imp,
-                               pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    int dst_stride, src_stride;
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-
-    PIXMAN_IMAGE_GET_LINE (
-    dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-    src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    dst = dst_line;
-    src = src_line;
-
-    while (height--)
-    {
-        vmx_combine_over_u (imp, op, dst, src, NULL, width);
-
-        dst += dst_stride;
-        src += src_stride;
-    }
-}
-
-static void
-vmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
-                                    pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t src, ia;
-    uint32_t    *dst_line, d;
-    uint32_t    *mask_line, m;
-    uint32_t pack_cmp;
-    int dst_stride, mask_stride;
-
-    vector unsigned int vsrc, valpha, vmask, vdest;
-
-    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
-    vsrc = (vector unsigned int) {src, src, src, src};
-    valpha = splat_alpha(vsrc);
-    ia = ALPHA_8 (src);
-
-    while (height--)
-    {
-	int w = width;
-	const uint32_t *pm = (uint32_t *)mask_line;
-	uint32_t *pd = (uint32_t *)dst_line;
-	uint32_t s;
-
-	dst_line += dst_stride;
-	mask_line += mask_stride;
-
-	while (w && (uintptr_t)pd & 15)
-	{
-	    s = src;
-	    m = *pm++;
-
-	    if (m)
-	    {
-		d = *pd;
-		UN8x4_MUL_UN8x4 (s, m);
-		UN8x4_MUL_UN8 (m, ia);
-		m = ~m;
-		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, m, s);
-		*pd = d;
-	    }
-
-	    pd++;
-	    w--;
-	}
-
-	while (w >= 4)
-	{
-	    /* pm is NOT necessarily 16-byte aligned */
-	    vmask = load_128_unaligned (pm);
-
-	    pack_cmp = vec_all_eq(vmask, (vector unsigned int) AVV(0));
-
-	    /* if all bits in mask are zero, pack_cmp is not 0 */
-	    if (pack_cmp == 0)
-	    {
-		/* pd is 16-byte aligned */
-		vdest = in_over (vsrc, valpha, vmask, load_128_aligned (pd));
-
-		save_128_aligned(pd, vdest);
-	    }
-
-	    pd += 4;
-	    pm += 4;
-	    w -= 4;
-	}
-
-	while (w)
-	{
-	    s = src;
-	    m = *pm++;
-
-	    if (m)
-	    {
-		d = *pd;
-		UN8x4_MUL_UN8x4 (s, m);
-		UN8x4_MUL_UN8 (m, ia);
-		m = ~m;
-		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, m, s);
-		*pd = d;
-	    }
-
-	    pd++;
-	    w--;
-	}
-    }
-}
-
-static void
-vmx_composite_add_8_8 (pixman_implementation_t *imp,
-            pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint8_t     *dst_line, *dst;
-    uint8_t     *src_line, *src;
-    int dst_stride, src_stride;
-    int32_t w;
-    uint16_t t;
-
-    PIXMAN_IMAGE_GET_LINE (
-    src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-    dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	src = src_line;
-
-	dst_line += dst_stride;
-	src_line += src_stride;
-	w = width;
-
-	/* Small head */
-	while (w && (uintptr_t)dst & 3)
-	{
-	    t = (*dst) + (*src++);
-	    *dst++ = t | (0 - (t >> 8));
-	    w--;
-	}
-
-	vmx_combine_add_u (imp, op,
-		    (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
-
-	/* Small tail */
-	dst += w & 0xfffc;
-	src += w & 0xfffc;
-
-	w &= 3;
-
-	while (w)
-	{
-	    t = (*dst) + (*src++);
-	    *dst++ = t | (0 - (t >> 8));
-	    w--;
-	}
-    }
-}
-
-static void
-vmx_composite_add_8888_8888 (pixman_implementation_t *imp,
-                              pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    int dst_stride, src_stride;
-
-    PIXMAN_IMAGE_GET_LINE (
-	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (
-	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-
-	vmx_combine_add_u (imp, op, dst, src, NULL, width);
-    }
-}
-
-static force_inline void
-scaled_nearest_scanline_vmx_8888_8888_OVER (uint32_t*       pd,
-                                            const uint32_t* ps,
-                                            int32_t         w,
-                                            pixman_fixed_t  vx,
-                                            pixman_fixed_t  unit_x,
-                                            pixman_fixed_t  src_width_fixed,
-                                            pixman_bool_t   fully_transparent_src)
-{
-    uint32_t s, d;
-    const uint32_t* pm = NULL;
-
-    vector unsigned int vsrc, vdst;
-
-    if (fully_transparent_src)
-	return;
-
-    /* Align dst on a 16-byte boundary */
-    while (w && ((uintptr_t)pd & 15))
-    {
-	d = *pd;
-	s = combine1 (ps + pixman_fixed_to_int (vx), pm);
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-
-	*pd++ = core_combine_over_u_pixel_vmx (s, d);
-	if (pm)
-	    pm++;
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	vector unsigned int tmp;
-	uint32_t tmp1, tmp2, tmp3, tmp4;
-
-	tmp1 = *(ps + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-	tmp2 = *(ps + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-	tmp3 = *(ps + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-	tmp4 = *(ps + pixman_fixed_to_int (vx));
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-
-	tmp[0] = tmp1;
-	tmp[1] = tmp2;
-	tmp[2] = tmp3;
-	tmp[3] = tmp4;
-
-	vsrc = combine4 ((const uint32_t *) &tmp, pm);
-
-	if (is_opaque (vsrc))
-	{
-	    save_128_aligned (pd, vsrc);
-	}
-	else if (!is_zero (vsrc))
-	{
-	    vdst = over(vsrc, splat_alpha(vsrc), load_128_aligned (pd));
-
-	    save_128_aligned (pd, vdst);
-	}
-
-	w -= 4;
-	pd += 4;
-	if (pm)
-	    pm += 4;
-    }
-
-    while (w)
-    {
-	d = *pd;
-	s = combine1 (ps + pixman_fixed_to_int (vx), pm);
-	vx += unit_x;
-	while (vx >= 0)
-	    vx -= src_width_fixed;
-
-	*pd++ = core_combine_over_u_pixel_vmx (s, d);
-	if (pm)
-	    pm++;
-
-	w--;
-    }
-}
-
-FAST_NEAREST_MAINLOOP (vmx_8888_8888_cover_OVER,
-		       scaled_nearest_scanline_vmx_8888_8888_OVER,
-		       uint32_t, uint32_t, COVER)
-FAST_NEAREST_MAINLOOP (vmx_8888_8888_none_OVER,
-		       scaled_nearest_scanline_vmx_8888_8888_OVER,
-		       uint32_t, uint32_t, NONE)
-FAST_NEAREST_MAINLOOP (vmx_8888_8888_pad_OVER,
-		       scaled_nearest_scanline_vmx_8888_8888_OVER,
-		       uint32_t, uint32_t, PAD)
-FAST_NEAREST_MAINLOOP (vmx_8888_8888_normal_OVER,
-		       scaled_nearest_scanline_vmx_8888_8888_OVER,
-		       uint32_t, uint32_t, NORMAL)
-
-static const pixman_fast_path_t vmx_fast_paths[] =
-{
-    PIXMAN_STD_FAST_PATH (OVER, solid,    null, a8r8g8b8, vmx_composite_over_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid,    null, x8r8g8b8, vmx_composite_over_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, vmx_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, vmx_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, vmx_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, vmx_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, vmx_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, vmx_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, vmx_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, vmx_composite_over_n_8888_8888_ca),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, vmx_composite_over_n_8888_8888_ca),
-
-    /* PIXMAN_OP_ADD */
-    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, vmx_composite_add_8_8),
-    PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, vmx_composite_add_8888_8888),
-    PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, vmx_composite_add_8888_8888),
-
-    /* PIXMAN_OP_SRC */
-    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, vmx_composite_src_x888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, vmx_composite_src_x888_8888),
-
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, vmx_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, vmx_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, vmx_8888_8888),
-    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, vmx_8888_8888),
-
-    {   PIXMAN_OP_NONE	},
-};
-
-static uint32_t *
-vmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
-{
-    int w = iter->width;
-    vector unsigned int ff000000 = mask_ff000000;
-    uint32_t *dst = iter->buffer;
-    uint32_t *src = (uint32_t *)iter->bits;
-
-    iter->bits += iter->stride;
-
-    while (w && ((uintptr_t)dst) & 0x0f)
-    {
-	*dst++ = (*src++) | 0xff000000;
-	w--;
-    }
-
-    while (w >= 4)
-    {
-	save_128_aligned(dst, vec_or(load_128_unaligned(src), ff000000));
-
-	dst += 4;
-	src += 4;
-	w -= 4;
-    }
-
-    while (w)
-    {
-	*dst++ = (*src++) | 0xff000000;
-	w--;
-    }
-
-    return iter->buffer;
-}
-
-static uint32_t *
-vmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
-{
-    int w = iter->width;
-    uint32_t *dst = iter->buffer;
-    uint8_t *src = iter->bits;
-    vector unsigned int vmx0, vmx1, vmx2, vmx3, vmx4, vmx5, vmx6;
-
-    iter->bits += iter->stride;
-
-    while (w && (((uintptr_t)dst) & 15))
-    {
-        *dst++ = *(src++) << 24;
-        w--;
-    }
-
-    while (w >= 16)
-    {
-	vmx0 = load_128_unaligned((uint32_t *) src);
-
-	unpack_128_2x128((vector unsigned int) AVV(0), vmx0, &vmx1, &vmx2);
-	unpack_128_2x128_16((vector unsigned int) AVV(0), vmx1, &vmx3, &vmx4);
-	unpack_128_2x128_16((vector unsigned int) AVV(0), vmx2, &vmx5, &vmx6);
-
-	save_128_aligned(dst, vmx6);
-	save_128_aligned((dst +  4), vmx5);
-	save_128_aligned((dst +  8), vmx4);
-	save_128_aligned((dst + 12), vmx3);
-
-	dst += 16;
-	src += 16;
-	w -= 16;
-    }
-
-    while (w)
-    {
-	*dst++ = *(src++) << 24;
-	w--;
-    }
-
-    return iter->buffer;
-}
-
-#define IMAGE_FLAGS							\
-    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |		\
-     FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
-
-static const pixman_iter_info_t vmx_iters[] =
-{
-    { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
-      _pixman_iter_init_bits_stride, vmx_fetch_x8r8g8b8, NULL
-    },
-    { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
-      _pixman_iter_init_bits_stride, vmx_fetch_a8, NULL
-    },
-    { PIXMAN_null },
-};
-
-pixman_implementation_t *
-_pixman_implementation_create_vmx (pixman_implementation_t *fallback)
-{
-    pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths);
-
-    /* VMX constants */
-    mask_ff000000 = create_mask_32_128 (0xff000000);
-    mask_red   = create_mask_32_128 (0x00f80000);
-    mask_green = create_mask_32_128 (0x0000fc00);
-    mask_blue  = create_mask_32_128 (0x000000f8);
-    mask_565_fix_rb = create_mask_32_128 (0x00e000e0);
-    mask_565_fix_g = create_mask_32_128  (0x0000c000);
-
-    /* Set up function pointers */
-
-    imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u;
-    imp->combine_32[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_u;
-    imp->combine_32[PIXMAN_OP_IN] = vmx_combine_in_u;
-    imp->combine_32[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_u;
-    imp->combine_32[PIXMAN_OP_OUT] = vmx_combine_out_u;
-    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_u;
-    imp->combine_32[PIXMAN_OP_ATOP] = vmx_combine_atop_u;
-    imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_u;
-    imp->combine_32[PIXMAN_OP_XOR] = vmx_combine_xor_u;
-
-    imp->combine_32[PIXMAN_OP_ADD] = vmx_combine_add_u;
-
-    imp->combine_32_ca[PIXMAN_OP_SRC] = vmx_combine_src_ca;
-    imp->combine_32_ca[PIXMAN_OP_OVER] = vmx_combine_over_ca;
-    imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_IN] = vmx_combine_in_ca;
-    imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_OUT] = vmx_combine_out_ca;
-    imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_ATOP] = vmx_combine_atop_ca;
-    imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca;
-    imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca;
-
-    imp->fill = vmx_fill;
-
-    imp->iter_info = vmx_iters;
-
-    return imp;
-}
diff --git a/vendor/pixman/pixman/pixman-x86.c b/vendor/pixman/pixman/pixman-x86.c
deleted file mode 100644
index 7f4d80e94..000000000
--- a/vendor/pixman/pixman/pixman-x86.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  SuSE makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-
-#include "pixman-private.h"
-
-#if defined(USE_X86_MMX) || defined (USE_SSE2) || defined (USE_SSSE3)
-
-/* The CPU detection code needs to be in a file not compiled with
- * "-mmmx -msse", as gcc would generate CMOV instructions otherwise
- * that would lead to SIGILL instructions on old CPUs that don't have
- * it.
- */
-
-typedef enum
-{
-    X86_MMX			= (1 << 0),
-    X86_MMX_EXTENSIONS		= (1 << 1),
-    X86_SSE			= (1 << 2) | X86_MMX_EXTENSIONS,
-    X86_SSE2			= (1 << 3),
-    X86_CMOV			= (1 << 4),
-    X86_SSSE3			= (1 << 5)
-} cpu_features_t;
-
-#ifdef HAVE_GETISAX
-
-#include <sys/auxv.h>
-
-static cpu_features_t
-detect_cpu_features (void)
-{
-    cpu_features_t features = 0;
-    unsigned int result = 0;
-
-    if (getisax (&result, 1))
-    {
-	if (result & AV_386_CMOV)
-	    features |= X86_CMOV;
-	if (result & AV_386_MMX)
-	    features |= X86_MMX;
-	if (result & AV_386_AMD_MMX)
-	    features |= X86_MMX_EXTENSIONS;
-	if (result & AV_386_SSE)
-	    features |= X86_SSE;
-	if (result & AV_386_SSE2)
-	    features |= X86_SSE2;
-	if (result & AV_386_SSSE3)
-	    features |= X86_SSSE3;
-    }
-
-    return features;
-}
-
-#else
-
-#define _PIXMAN_X86_64							\
-    (defined(__amd64__) || defined(__x86_64__) || defined(_M_AMD64))
-
-static pixman_bool_t
-have_cpuid (void)
-{
-#if _PIXMAN_X86_64 || defined (_MSC_VER)
-
-    return TRUE;
-
-#elif defined (__GNUC__)
-    uint32_t result;
-
-    __asm__ volatile (
-        "pushf"				"\n\t"
-        "pop %%eax"			"\n\t"
-        "mov %%eax, %%ecx"		"\n\t"
-        "xor $0x00200000, %%eax"	"\n\t"
-        "push %%eax"			"\n\t"
-        "popf"				"\n\t"
-        "pushf"				"\n\t"
-        "pop %%eax"			"\n\t"
-        "xor %%ecx, %%eax"		"\n\t"
-	"mov %%eax, %0"			"\n\t"
-	: "=r" (result)
-	:
-	: "%eax", "%ecx");
-
-    return !!result;
-
-#else
-#error "Unknown compiler"
-#endif
-}
-
-static void
-pixman_cpuid (uint32_t feature,
-	      uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
-{
-#if defined (__GNUC__)
-
-#if _PIXMAN_X86_64
-    __asm__ volatile (
-        "cpuid"				"\n\t"
-	: "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
-	: "a" (feature));
-#else
-    /* On x86-32 we need to be careful about the handling of %ebx
-     * and %esp. We can't declare either one as clobbered
-     * since they are special registers (%ebx is the "PIC
-     * register" holding an offset to global data, %esp the
-     * stack pointer), so we need to make sure that %ebx is
-     * preserved, and that %esp has its original value when
-     * accessing the output operands.
-     */
-    __asm__ volatile (
-	"xchg %%ebx, %1"		"\n\t"
-	"cpuid"				"\n\t"
-	"xchg %%ebx, %1"		"\n\t"
-	: "=a" (*a), "=r" (*b), "=c" (*c), "=d" (*d)
-	: "a" (feature));
-#endif
-
-#elif defined (_MSC_VER)
-    int info[4];
-
-    __cpuid (info, feature);
-
-    *a = info[0];
-    *b = info[1];
-    *c = info[2];
-    *d = info[3];
-#else
-#error Unknown compiler
-#endif
-}
-
-static cpu_features_t
-detect_cpu_features (void)
-{
-    uint32_t a, b, c, d;
-    cpu_features_t features = 0;
-
-    if (!have_cpuid())
-	return features;
-
-    /* Get feature bits */
-    pixman_cpuid (0x01, &a, &b, &c, &d);
-    if (d & (1 << 15))
-	features |= X86_CMOV;
-    if (d & (1 << 23))
-	features |= X86_MMX;
-    if (d & (1 << 25))
-	features |= X86_SSE;
-    if (d & (1 << 26))
-	features |= X86_SSE2;
-    if (c & (1 << 9))
-	features |= X86_SSSE3;
-
-    /* Check for AMD specific features */
-    if ((features & X86_MMX) && !(features & X86_SSE))
-    {
-	char vendor[13];
-
-	/* Get vendor string */
-	memset (vendor, 0, sizeof vendor);
-
-	pixman_cpuid (0x00, &a, &b, &c, &d);
-	memcpy (vendor + 0, &b, 4);
-	memcpy (vendor + 4, &d, 4);
-	memcpy (vendor + 8, &c, 4);
-
-	if (strcmp (vendor, "AuthenticAMD") == 0 ||
-	    strcmp (vendor, "HygonGenuine") == 0 ||
-	    strcmp (vendor, "Geode by NSC") == 0)
-	{
-	    pixman_cpuid (0x80000000, &a, &b, &c, &d);
-	    if (a >= 0x80000001)
-	    {
-		pixman_cpuid (0x80000001, &a, &b, &c, &d);
-
-		if (d & (1 << 22))
-		    features |= X86_MMX_EXTENSIONS;
-	    }
-	}
-    }
-
-    return features;
-}
-
-#endif
-
-static pixman_bool_t
-have_feature (cpu_features_t feature)
-{
-    static pixman_bool_t initialized;
-    static cpu_features_t features;
-
-    if (!initialized)
-    {
-	features = detect_cpu_features();
-	initialized = TRUE;
-    }
-
-    return (features & feature) == feature;
-}
-
-#endif
-
-pixman_implementation_t *
-_pixman_x86_get_implementations (pixman_implementation_t *imp)
-{
-#define MMX_BITS  (X86_MMX | X86_MMX_EXTENSIONS)
-#define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2)
-#define SSSE3_BITS (X86_SSE | X86_SSE2 | X86_SSSE3)
-
-#ifdef USE_X86_MMX
-    if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS))
-	imp = _pixman_implementation_create_mmx (imp);
-#endif
-
-#ifdef USE_SSE2
-    if (!_pixman_disabled ("sse2") && have_feature (SSE2_BITS))
-	imp = _pixman_implementation_create_sse2 (imp);
-#endif
-
-#ifdef USE_SSSE3
-    if (!_pixman_disabled ("ssse3") && have_feature (SSSE3_BITS))
-	imp = _pixman_implementation_create_ssse3 (imp);
-#endif
-
-    return imp;
-}
diff --git a/vendor/pixman/pixman/pixman.c b/vendor/pixman/pixman/pixman.c
deleted file mode 100644
index 82ec236a6..000000000
--- a/vendor/pixman/pixman/pixman.c
+++ /dev/null
@@ -1,1134 +0,0 @@
-/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  SuSE makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Author:  Keith Packard, SuSE, Inc.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <pixman-config.h>
-#endif
-#include "pixman-private.h"
-
-#include <stdlib.h>
-
-pixman_implementation_t *global_implementation;
-
-#ifdef TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR
-static void __attribute__((constructor))
-pixman_constructor (void)
-{
-    global_implementation = _pixman_choose_implementation ();
-}
-#endif
-
-typedef struct operator_info_t operator_info_t;
-
-struct operator_info_t
-{
-    uint8_t	opaque_info[4];
-};
-
-#define PACK(neither, src, dest, both)			\
-    {{	    (uint8_t)PIXMAN_OP_ ## neither,		\
-	    (uint8_t)PIXMAN_OP_ ## src,			\
-	    (uint8_t)PIXMAN_OP_ ## dest,		\
-	    (uint8_t)PIXMAN_OP_ ## both		}}
-
-static const operator_info_t operator_table[] =
-{
-    /*    Neither Opaque         Src Opaque             Dst Opaque             Both Opaque */
-    PACK (CLEAR,                 CLEAR,                 CLEAR,                 CLEAR),
-    PACK (SRC,                   SRC,                   SRC,                   SRC),
-    PACK (DST,                   DST,                   DST,                   DST),
-    PACK (OVER,                  SRC,                   OVER,                  SRC),
-    PACK (OVER_REVERSE,          OVER_REVERSE,          DST,                   DST),
-    PACK (IN,                    IN,                    SRC,                   SRC),
-    PACK (IN_REVERSE,            DST,                   IN_REVERSE,            DST),
-    PACK (OUT,                   OUT,                   CLEAR,                 CLEAR),
-    PACK (OUT_REVERSE,           CLEAR,                 OUT_REVERSE,           CLEAR),
-    PACK (ATOP,                  IN,                    OVER,                  SRC),
-    PACK (ATOP_REVERSE,          OVER_REVERSE,          IN_REVERSE,            DST),
-    PACK (XOR,                   OUT,                   OUT_REVERSE,           CLEAR),
-    PACK (ADD,                   ADD,                   ADD,                   ADD),
-    PACK (SATURATE,              OVER_REVERSE,          DST,                   DST),
-
-    {{ 0 /* 0x0e */ }},
-    {{ 0 /* 0x0f */ }},
-
-    PACK (CLEAR,                 CLEAR,                 CLEAR,                 CLEAR),
-    PACK (SRC,                   SRC,                   SRC,                   SRC),
-    PACK (DST,                   DST,                   DST,                   DST),
-    PACK (DISJOINT_OVER,         DISJOINT_OVER,         DISJOINT_OVER,         DISJOINT_OVER),
-    PACK (DISJOINT_OVER_REVERSE, DISJOINT_OVER_REVERSE, DISJOINT_OVER_REVERSE, DISJOINT_OVER_REVERSE),
-    PACK (DISJOINT_IN,           DISJOINT_IN,           DISJOINT_IN,           DISJOINT_IN),
-    PACK (DISJOINT_IN_REVERSE,   DISJOINT_IN_REVERSE,   DISJOINT_IN_REVERSE,   DISJOINT_IN_REVERSE),
-    PACK (DISJOINT_OUT,          DISJOINT_OUT,          DISJOINT_OUT,          DISJOINT_OUT),
-    PACK (DISJOINT_OUT_REVERSE,  DISJOINT_OUT_REVERSE,  DISJOINT_OUT_REVERSE,  DISJOINT_OUT_REVERSE),
-    PACK (DISJOINT_ATOP,         DISJOINT_ATOP,         DISJOINT_ATOP,         DISJOINT_ATOP),
-    PACK (DISJOINT_ATOP_REVERSE, DISJOINT_ATOP_REVERSE, DISJOINT_ATOP_REVERSE, DISJOINT_ATOP_REVERSE),
-    PACK (DISJOINT_XOR,          DISJOINT_XOR,          DISJOINT_XOR,          DISJOINT_XOR),
-
-    {{ 0 /* 0x1c */ }},
-    {{ 0 /* 0x1d */ }},
-    {{ 0 /* 0x1e */ }},
-    {{ 0 /* 0x1f */ }},
-
-    PACK (CLEAR,                 CLEAR,                 CLEAR,                 CLEAR),
-    PACK (SRC,                   SRC,                   SRC,                   SRC),
-    PACK (DST,                   DST,                   DST,                   DST),
-    PACK (CONJOINT_OVER,         CONJOINT_OVER,         CONJOINT_OVER,         CONJOINT_OVER),
-    PACK (CONJOINT_OVER_REVERSE, CONJOINT_OVER_REVERSE, CONJOINT_OVER_REVERSE, CONJOINT_OVER_REVERSE),
-    PACK (CONJOINT_IN,           CONJOINT_IN,           CONJOINT_IN,           CONJOINT_IN),
-    PACK (CONJOINT_IN_REVERSE,   CONJOINT_IN_REVERSE,   CONJOINT_IN_REVERSE,   CONJOINT_IN_REVERSE),
-    PACK (CONJOINT_OUT,          CONJOINT_OUT,          CONJOINT_OUT,          CONJOINT_OUT),
-    PACK (CONJOINT_OUT_REVERSE,  CONJOINT_OUT_REVERSE,  CONJOINT_OUT_REVERSE,  CONJOINT_OUT_REVERSE),
-    PACK (CONJOINT_ATOP,         CONJOINT_ATOP,         CONJOINT_ATOP,         CONJOINT_ATOP),
-    PACK (CONJOINT_ATOP_REVERSE, CONJOINT_ATOP_REVERSE, CONJOINT_ATOP_REVERSE, CONJOINT_ATOP_REVERSE),
-    PACK (CONJOINT_XOR,          CONJOINT_XOR,          CONJOINT_XOR,          CONJOINT_XOR),
-
-    {{ 0 /* 0x2c */ }},
-    {{ 0 /* 0x2d */ }},
-    {{ 0 /* 0x2e */ }},
-    {{ 0 /* 0x2f */ }},
-
-    PACK (MULTIPLY,              MULTIPLY,              MULTIPLY,              MULTIPLY),
-    PACK (SCREEN,                SCREEN,                SCREEN,                SCREEN),
-    PACK (OVERLAY,               OVERLAY,               OVERLAY,               OVERLAY),
-    PACK (DARKEN,                DARKEN,                DARKEN,                DARKEN),
-    PACK (LIGHTEN,               LIGHTEN,               LIGHTEN,               LIGHTEN),
-    PACK (COLOR_DODGE,           COLOR_DODGE,           COLOR_DODGE,           COLOR_DODGE),
-    PACK (COLOR_BURN,            COLOR_BURN,            COLOR_BURN,            COLOR_BURN),
-    PACK (HARD_LIGHT,            HARD_LIGHT,            HARD_LIGHT,            HARD_LIGHT),
-    PACK (SOFT_LIGHT,            SOFT_LIGHT,            SOFT_LIGHT,            SOFT_LIGHT),
-    PACK (DIFFERENCE,            DIFFERENCE,            DIFFERENCE,            DIFFERENCE),
-    PACK (EXCLUSION,             EXCLUSION,             EXCLUSION,             EXCLUSION),
-    PACK (HSL_HUE,               HSL_HUE,               HSL_HUE,               HSL_HUE),
-    PACK (HSL_SATURATION,        HSL_SATURATION,        HSL_SATURATION,        HSL_SATURATION),
-    PACK (HSL_COLOR,             HSL_COLOR,             HSL_COLOR,             HSL_COLOR),
-    PACK (HSL_LUMINOSITY,        HSL_LUMINOSITY,        HSL_LUMINOSITY,        HSL_LUMINOSITY),
-};
-
-/*
- * Optimize the current operator based on opacity of source or destination
- * The output operator should be mathematically equivalent to the source.
- */
-static pixman_op_t
-optimize_operator (pixman_op_t     op,
-		   uint32_t        src_flags,
-		   uint32_t        mask_flags,
-		   uint32_t        dst_flags)
-{
-    pixman_bool_t is_source_opaque, is_dest_opaque;
-
-#define OPAQUE_SHIFT 13
-    
-    COMPILE_TIME_ASSERT (FAST_PATH_IS_OPAQUE == (1 << OPAQUE_SHIFT));
-    
-    is_dest_opaque = (dst_flags & FAST_PATH_IS_OPAQUE);
-    is_source_opaque = ((src_flags & mask_flags) & FAST_PATH_IS_OPAQUE);
-
-    is_dest_opaque >>= OPAQUE_SHIFT - 1;
-    is_source_opaque >>= OPAQUE_SHIFT;
-
-    return operator_table[op].opaque_info[is_dest_opaque | is_source_opaque];
-}
-
-/*
- * Computing composite region
- */
-static inline pixman_bool_t
-clip_general_image (pixman_region32_t * region,
-                    pixman_region32_t * clip,
-                    int                 dx,
-                    int                 dy)
-{
-    if (pixman_region32_n_rects (region) == 1 &&
-        pixman_region32_n_rects (clip) == 1)
-    {
-	pixman_box32_t *  rbox = pixman_region32_rectangles (region, NULL);
-	pixman_box32_t *  cbox = pixman_region32_rectangles (clip, NULL);
-	int v;
-
-	if (rbox->x1 < (v = cbox->x1 + dx))
-	    rbox->x1 = v;
-	if (rbox->x2 > (v = cbox->x2 + dx))
-	    rbox->x2 = v;
-	if (rbox->y1 < (v = cbox->y1 + dy))
-	    rbox->y1 = v;
-	if (rbox->y2 > (v = cbox->y2 + dy))
-	    rbox->y2 = v;
-	if (rbox->x1 >= rbox->x2 || rbox->y1 >= rbox->y2)
-	{
-	    pixman_region32_init (region);
-	    return FALSE;
-	}
-    }
-    else if (pixman_region32_empty (clip))
-    {
-	return FALSE;
-    }
-    else
-    {
-	if (dx || dy)
-	    pixman_region32_translate (region, -dx, -dy);
-
-	if (!pixman_region32_intersect (region, region, clip))
-	    return FALSE;
-
-	if (dx || dy)
-	    pixman_region32_translate (region, dx, dy);
-    }
-
-    return pixman_region32_not_empty (region);
-}
-
-static inline pixman_bool_t
-clip_source_image (pixman_region32_t * region,
-                   pixman_image_t *    image,
-                   int                 dx,
-                   int                 dy)
-{
-    /* Source clips are ignored, unless they are explicitly turned on
-     * and the clip in question was set by an X client. (Because if
-     * the clip was not set by a client, then it is a hierarchy
-     * clip and those should always be ignored for sources).
-     */
-    if (!image->common.clip_sources || !image->common.client_clip)
-	return TRUE;
-
-    return clip_general_image (region,
-                               &image->common.clip_region,
-                               dx, dy);
-}
-
-/*
- * returns FALSE if the final region is empty.  Indistinguishable from
- * an allocation failure, but rendering ignores those anyways.
- */
-pixman_bool_t
-_pixman_compute_composite_region32 (pixman_region32_t * region,
-				    pixman_image_t *    src_image,
-				    pixman_image_t *    mask_image,
-				    pixman_image_t *    dest_image,
-				    int32_t             src_x,
-				    int32_t             src_y,
-				    int32_t             mask_x,
-				    int32_t             mask_y,
-				    int32_t             dest_x,
-				    int32_t             dest_y,
-				    int32_t             width,
-				    int32_t             height)
-{
-    region->extents.x1 = dest_x;
-    region->extents.x2 = dest_x + width;
-    region->extents.y1 = dest_y;
-    region->extents.y2 = dest_y + height;
-
-    region->extents.x1 = MAX (region->extents.x1, 0);
-    region->extents.y1 = MAX (region->extents.y1, 0);
-    region->extents.x2 = MIN (region->extents.x2, dest_image->bits.width);
-    region->extents.y2 = MIN (region->extents.y2, dest_image->bits.height);
-
-    region->data = 0;
-
-    /* Check for empty operation */
-    if (region->extents.x1 >= region->extents.x2 ||
-        region->extents.y1 >= region->extents.y2)
-    {
-	region->extents.x1 = 0;
-	region->extents.x2 = 0;
-	region->extents.y1 = 0;
-	region->extents.y2 = 0;
-	return FALSE;
-    }
-
-    if (dest_image->common.have_clip_region)
-    {
-	if (!clip_general_image (region, &dest_image->common.clip_region, 0, 0))
-	    return FALSE;
-    }
-
-    if (dest_image->common.alpha_map)
-    {
-	if (!pixman_region32_intersect_rect (region, region,
-					     dest_image->common.alpha_origin_x,
-					     dest_image->common.alpha_origin_y,
-					     dest_image->common.alpha_map->width,
-					     dest_image->common.alpha_map->height))
-	{
-	    return FALSE;
-	}
-	if (pixman_region32_empty (region))
-	    return FALSE;
-	if (dest_image->common.alpha_map->common.have_clip_region)
-	{
-	    if (!clip_general_image (region, &dest_image->common.alpha_map->common.clip_region,
-				     -dest_image->common.alpha_origin_x,
-				     -dest_image->common.alpha_origin_y))
-	    {
-		return FALSE;
-	    }
-	}
-    }
-
-    /* clip against src */
-    if (src_image->common.have_clip_region)
-    {
-	if (!clip_source_image (region, src_image, dest_x - src_x, dest_y - src_y))
-	    return FALSE;
-    }
-    if (src_image->common.alpha_map && src_image->common.alpha_map->common.have_clip_region)
-    {
-	if (!clip_source_image (region, (pixman_image_t *)src_image->common.alpha_map,
-	                        dest_x - (src_x - src_image->common.alpha_origin_x),
-	                        dest_y - (src_y - src_image->common.alpha_origin_y)))
-	{
-	    return FALSE;
-	}
-    }
-    /* clip against mask */
-    if (mask_image && mask_image->common.have_clip_region)
-    {
-	if (!clip_source_image (region, mask_image, dest_x - mask_x, dest_y - mask_y))
-	    return FALSE;
-
-	if (mask_image->common.alpha_map && mask_image->common.alpha_map->common.have_clip_region)
-	{
-	    if (!clip_source_image (region, (pixman_image_t *)mask_image->common.alpha_map,
-	                            dest_x - (mask_x - mask_image->common.alpha_origin_x),
-	                            dest_y - (mask_y - mask_image->common.alpha_origin_y)))
-	    {
-		return FALSE;
-	    }
-	}
-    }
-
-    return TRUE;
-}
-
-typedef struct box_48_16 box_48_16_t;
-
-struct box_48_16
-{
-    pixman_fixed_48_16_t        x1;
-    pixman_fixed_48_16_t        y1;
-    pixman_fixed_48_16_t        x2;
-    pixman_fixed_48_16_t        y2;
-};
-
-static pixman_bool_t
-compute_transformed_extents (pixman_transform_t   *transform,
-			     const pixman_box32_t *extents,
-			     box_48_16_t          *transformed)
-{
-    pixman_fixed_48_16_t tx1, ty1, tx2, ty2;
-    pixman_fixed_t x1, y1, x2, y2;
-    int i;
-
-    x1 = pixman_int_to_fixed (extents->x1) + pixman_fixed_1 / 2;
-    y1 = pixman_int_to_fixed (extents->y1) + pixman_fixed_1 / 2;
-    x2 = pixman_int_to_fixed (extents->x2) - pixman_fixed_1 / 2;
-    y2 = pixman_int_to_fixed (extents->y2) - pixman_fixed_1 / 2;
-
-    if (!transform)
-    {
-	transformed->x1 = x1;
-	transformed->y1 = y1;
-	transformed->x2 = x2;
-	transformed->y2 = y2;
-
-	return TRUE;
-    }
-
-    tx1 = ty1 = INT64_MAX;
-    tx2 = ty2 = INT64_MIN;
-
-    for (i = 0; i < 4; ++i)
-    {
-	pixman_fixed_48_16_t tx, ty;
-	pixman_vector_t v;
-
-	v.vector[0] = (i & 0x01)? x1 : x2;
-	v.vector[1] = (i & 0x02)? y1 : y2;
-	v.vector[2] = pixman_fixed_1;
-
-	if (!pixman_transform_point (transform, &v))
-	    return FALSE;
-
-	tx = (pixman_fixed_48_16_t)v.vector[0];
-	ty = (pixman_fixed_48_16_t)v.vector[1];
-
-	if (tx < tx1)
-	    tx1 = tx;
-	if (ty < ty1)
-	    ty1 = ty;
-	if (tx > tx2)
-	    tx2 = tx;
-	if (ty > ty2)
-	    ty2 = ty;
-    }
-
-    transformed->x1 = tx1;
-    transformed->y1 = ty1;
-    transformed->x2 = tx2;
-    transformed->y2 = ty2;
-
-    return TRUE;
-}
-
-#define IS_16BIT(x) (((x) >= INT16_MIN) && ((x) <= INT16_MAX))
-#define ABS(f)      (((f) < 0)?  (-(f)) : (f))
-#define IS_16_16(f) (((f) >= pixman_min_fixed_48_16 && ((f) <= pixman_max_fixed_48_16)))
-
-static pixman_bool_t
-analyze_extent (pixman_image_t       *image,
-		const pixman_box32_t *extents,
-		uint32_t             *flags)
-{
-    pixman_transform_t *transform;
-    pixman_fixed_t x_off, y_off;
-    pixman_fixed_t width, height;
-    pixman_fixed_t *params;
-    box_48_16_t transformed;
-    pixman_box32_t exp_extents;
-
-    if (!image)
-	return TRUE;
-
-    /* Some compositing functions walk one step
-     * outside the destination rectangle, so we
-     * check here that the expanded-by-one source
-     * extents in destination space fits in 16 bits
-     */
-    if (!IS_16BIT (extents->x1 - 1)		||
-	!IS_16BIT (extents->y1 - 1)		||
-	!IS_16BIT (extents->x2 + 1)		||
-	!IS_16BIT (extents->y2 + 1))
-    {
-	return FALSE;
-    }
-
-    transform = image->common.transform;
-    if (image->common.type == BITS)
-    {
-	/* During repeat mode calculations we might convert the
-	 * width/height of an image to fixed 16.16, so we need
-	 * them to be smaller than 16 bits.
-	 */
-	if (image->bits.width >= 0x7fff	|| image->bits.height >= 0x7fff)
-	    return FALSE;
-
-	if ((image->common.flags & FAST_PATH_ID_TRANSFORM) == FAST_PATH_ID_TRANSFORM &&
-	    extents->x1 >= 0 &&
-	    extents->y1 >= 0 &&
-	    extents->x2 <= image->bits.width &&
-	    extents->y2 <= image->bits.height)
-	{
-	    *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
-	    return TRUE;
-	}
-
-	switch (image->common.filter)
-	{
-	case PIXMAN_FILTER_CONVOLUTION:
-	    params = image->common.filter_params;
-	    x_off = - pixman_fixed_e - ((params[0] - pixman_fixed_1) >> 1);
-	    y_off = - pixman_fixed_e - ((params[1] - pixman_fixed_1) >> 1);
-	    width = params[0];
-	    height = params[1];
-	    break;
-
-	case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
-	    params = image->common.filter_params;
-	    x_off = - pixman_fixed_e - ((params[0] - pixman_fixed_1) >> 1);
-	    y_off = - pixman_fixed_e - ((params[1] - pixman_fixed_1) >> 1);
-	    width = params[0];
-	    height = params[1];
-	    break;
-	    
-	case PIXMAN_FILTER_GOOD:
-	case PIXMAN_FILTER_BEST:
-	case PIXMAN_FILTER_BILINEAR:
-	    x_off = - pixman_fixed_1 / 2;
-	    y_off = - pixman_fixed_1 / 2;
-	    width = pixman_fixed_1;
-	    height = pixman_fixed_1;
-	    break;
-
-	case PIXMAN_FILTER_FAST:
-	case PIXMAN_FILTER_NEAREST:
-	    x_off = - pixman_fixed_e;
-	    y_off = - pixman_fixed_e;
-	    width = 0;
-	    height = 0;
-	    break;
-
-	default:
-	    return FALSE;
-	}
-    }
-    else
-    {
-	x_off = 0;
-	y_off = 0;
-	width = 0;
-	height = 0;
-    }
-
-    if (!compute_transformed_extents (transform, extents, &transformed))
-	return FALSE;
-
-    if (image->common.type == BITS)
-    {
-	if (pixman_fixed_to_int (transformed.x1 - pixman_fixed_e) >= 0                &&
-	    pixman_fixed_to_int (transformed.y1 - pixman_fixed_e) >= 0                &&
-	    pixman_fixed_to_int (transformed.x2 - pixman_fixed_e) < image->bits.width &&
-	    pixman_fixed_to_int (transformed.y2 - pixman_fixed_e) < image->bits.height)
-	{
-	    *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
-	}
-
-	if (pixman_fixed_to_int (transformed.x1 - pixman_fixed_1 / 2) >= 0		  &&
-	    pixman_fixed_to_int (transformed.y1 - pixman_fixed_1 / 2) >= 0		  &&
-	    pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2) < image->bits.width &&
-	    pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2) < image->bits.height)
-	{
-	    *flags |= FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR;
-	}
-    }
-
-    /* Check we don't overflow when the destination extents are expanded by one.
-     * This ensures that compositing functions can simply walk the source space
-     * using 16.16 variables without worrying about overflow.
-     */
-    exp_extents = *extents;
-    exp_extents.x1 -= 1;
-    exp_extents.y1 -= 1;
-    exp_extents.x2 += 1;
-    exp_extents.y2 += 1;
-
-    if (!compute_transformed_extents (transform, &exp_extents, &transformed))
-	return FALSE;
-    
-    if (!IS_16_16 (transformed.x1 + x_off - 8 * pixman_fixed_e)	||
-	!IS_16_16 (transformed.y1 + y_off - 8 * pixman_fixed_e)	||
-	!IS_16_16 (transformed.x2 + x_off + 8 * pixman_fixed_e + width)	||
-	!IS_16_16 (transformed.y2 + y_off + 8 * pixman_fixed_e + height))
-    {
-	return FALSE;
-    }
-
-    return TRUE;
-}
-
-/*
- * Work around GCC bug causing crashes in Mozilla with SSE2
- *
- * When using -msse, gcc generates movdqa instructions assuming that
- * the stack is 16 byte aligned. Unfortunately some applications, such
- * as Mozilla and Mono, end up aligning the stack to 4 bytes, which
- * causes the movdqa instructions to fail.
- *
- * The __force_align_arg_pointer__ makes gcc generate a prologue that
- * realigns the stack pointer to 16 bytes.
- *
- * On x86-64 this is not necessary because the standard ABI already
- * calls for a 16 byte aligned stack.
- *
- * See https://bugs.freedesktop.org/show_bug.cgi?id=15693
- */
-#if defined (USE_SSE2) && defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
-__attribute__((__force_align_arg_pointer__))
-#endif
-PIXMAN_EXPORT void
-pixman_image_composite32 (pixman_op_t      op,
-                          pixman_image_t * src,
-                          pixman_image_t * mask,
-                          pixman_image_t * dest,
-                          int32_t          src_x,
-                          int32_t          src_y,
-                          int32_t          mask_x,
-                          int32_t          mask_y,
-                          int32_t          dest_x,
-                          int32_t          dest_y,
-                          int32_t          width,
-                          int32_t          height)
-{
-    pixman_format_code_t src_format, mask_format, dest_format;
-    pixman_region32_t region;
-    pixman_box32_t extents;
-    pixman_implementation_t *imp;
-    pixman_composite_func_t func;
-    pixman_composite_info_t info;
-    const pixman_box32_t *pbox;
-    int n;
-
-    _pixman_image_validate (src);
-    if (mask)
-	_pixman_image_validate (mask);
-    _pixman_image_validate (dest);
-
-    src_format = src->common.extended_format_code;
-    info.src_flags = src->common.flags;
-
-    if (mask && !(mask->common.flags & FAST_PATH_IS_OPAQUE))
-    {
-	mask_format = mask->common.extended_format_code;
-	info.mask_flags = mask->common.flags;
-    }
-    else
-    {
-	mask_format = PIXMAN_null;
-	info.mask_flags = FAST_PATH_IS_OPAQUE | FAST_PATH_NO_ALPHA_MAP;
-    }
-
-    dest_format = dest->common.extended_format_code;
-    info.dest_flags = dest->common.flags;
-
-    /* Check for pixbufs */
-    if ((mask_format == PIXMAN_a8r8g8b8 || mask_format == PIXMAN_a8b8g8r8) &&
-	(src->type == BITS && src->bits.bits == mask->bits.bits)	   &&
-	(src->common.repeat == mask->common.repeat)			   &&
-	(info.src_flags & info.mask_flags & FAST_PATH_ID_TRANSFORM)	   &&
-	(src_x == mask_x && src_y == mask_y))
-    {
-	if (src_format == PIXMAN_x8b8g8r8)
-	    src_format = mask_format = PIXMAN_pixbuf;
-	else if (src_format == PIXMAN_x8r8g8b8)
-	    src_format = mask_format = PIXMAN_rpixbuf;
-    }
-
-    pixman_region32_init (&region);
-
-    if (!_pixman_compute_composite_region32 (
-	    &region, src, mask, dest,
-	    src_x, src_y, mask_x, mask_y, dest_x, dest_y, width, height))
-    {
-	goto out;
-    }
-
-    extents = *pixman_region32_extents (&region);
-
-    extents.x1 -= dest_x - src_x;
-    extents.y1 -= dest_y - src_y;
-    extents.x2 -= dest_x - src_x;
-    extents.y2 -= dest_y - src_y;
-
-    if (!analyze_extent (src, &extents, &info.src_flags))
-	goto out;
-
-    extents.x1 -= src_x - mask_x;
-    extents.y1 -= src_y - mask_y;
-    extents.x2 -= src_x - mask_x;
-    extents.y2 -= src_y - mask_y;
-
-    if (!analyze_extent (mask, &extents, &info.mask_flags))
-	goto out;
-
-    /* If the clip is within the source samples, and the samples are
-     * opaque, then the source is effectively opaque.
-     */
-#define NEAREST_OPAQUE	(FAST_PATH_SAMPLES_OPAQUE |			\
-			 FAST_PATH_NEAREST_FILTER |			\
-			 FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
-#define BILINEAR_OPAQUE	(FAST_PATH_SAMPLES_OPAQUE |			\
-			 FAST_PATH_BILINEAR_FILTER |			\
-			 FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR)
-
-    if ((info.src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
-	(info.src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
-    {
-	info.src_flags |= FAST_PATH_IS_OPAQUE;
-    }
-
-    if ((info.mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
-	(info.mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
-    {
-	info.mask_flags |= FAST_PATH_IS_OPAQUE;
-    }
-
-    /*
-     * Check if we can replace our operator by a simpler one
-     * if the src or dest are opaque. The output operator should be
-     * mathematically equivalent to the source.
-     */
-    info.op = optimize_operator (op, info.src_flags, info.mask_flags, info.dest_flags);
-
-    _pixman_implementation_lookup_composite (
-	get_implementation (), info.op,
-	src_format, info.src_flags,
-	mask_format, info.mask_flags,
-	dest_format, info.dest_flags,
-	&imp, &func);
-
-    info.src_image = src;
-    info.mask_image = mask;
-    info.dest_image = dest;
-
-    pbox = pixman_region32_rectangles (&region, &n);
-
-    while (n--)
-    {
-	info.src_x = pbox->x1 + src_x - dest_x;
-	info.src_y = pbox->y1 + src_y - dest_y;
-	info.mask_x = pbox->x1 + mask_x - dest_x;
-	info.mask_y = pbox->y1 + mask_y - dest_y;
-	info.dest_x = pbox->x1;
-	info.dest_y = pbox->y1;
-	info.width = pbox->x2 - pbox->x1;
-	info.height = pbox->y2 - pbox->y1;
-
-	func (imp, &info);
-
-	pbox++;
-    }
-
-out:
-    pixman_region32_fini (&region);
-}
-
-PIXMAN_EXPORT void
-pixman_image_composite (pixman_op_t      op,
-                        pixman_image_t * src,
-                        pixman_image_t * mask,
-                        pixman_image_t * dest,
-                        int16_t          src_x,
-                        int16_t          src_y,
-                        int16_t          mask_x,
-                        int16_t          mask_y,
-                        int16_t          dest_x,
-                        int16_t          dest_y,
-                        uint16_t         width,
-                        uint16_t         height)
-{
-    pixman_image_composite32 (op, src, mask, dest, src_x, src_y, 
-                              mask_x, mask_y, dest_x, dest_y, width, height);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_blt (uint32_t *src_bits,
-            uint32_t *dst_bits,
-            int       src_stride,
-            int       dst_stride,
-            int       src_bpp,
-            int       dst_bpp,
-            int       src_x,
-            int       src_y,
-            int       dest_x,
-            int       dest_y,
-            int       width,
-            int       height)
-{
-    return _pixman_implementation_blt (get_implementation(),
-				       src_bits, dst_bits, src_stride, dst_stride,
-                                       src_bpp, dst_bpp,
-                                       src_x, src_y,
-                                       dest_x, dest_y,
-                                       width, height);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_fill (uint32_t *bits,
-             int       stride,
-             int       bpp,
-             int       x,
-             int       y,
-             int       width,
-             int       height,
-             uint32_t  filler)
-{
-    return _pixman_implementation_fill (
-	get_implementation(), bits, stride, bpp, x, y, width, height, filler);
-}
-
-static uint32_t
-color_to_uint32 (const pixman_color_t *color)
-{
-    return
-        (color->alpha >> 8 << 24) |
-        (color->red >> 8 << 16) |
-        (color->green & 0xff00) |
-        (color->blue >> 8);
-}
-
-static pixman_bool_t
-color_to_pixel (const pixman_color_t *color,
-                uint32_t *            pixel,
-                pixman_format_code_t  format)
-{
-    uint32_t c = color_to_uint32 (color);
-
-    if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_RGBA_FLOAT)
-    {
-	return FALSE;
-    }
-
-    if (!(format == PIXMAN_a8r8g8b8     ||
-          format == PIXMAN_x8r8g8b8     ||
-          format == PIXMAN_a8b8g8r8     ||
-          format == PIXMAN_x8b8g8r8     ||
-          format == PIXMAN_b8g8r8a8     ||
-          format == PIXMAN_b8g8r8x8     ||
-          format == PIXMAN_r8g8b8a8     ||
-          format == PIXMAN_r8g8b8x8     ||
-          format == PIXMAN_r5g6b5       ||
-          format == PIXMAN_b5g6r5       ||
-          format == PIXMAN_a8           ||
-          format == PIXMAN_a1))
-    {
-	return FALSE;
-    }
-
-    if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_ABGR)
-    {
-	c = ((c & 0xff000000) >>  0) |
-	    ((c & 0x00ff0000) >> 16) |
-	    ((c & 0x0000ff00) >>  0) |
-	    ((c & 0x000000ff) << 16);
-    }
-    if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_BGRA)
-    {
-	c = ((c & 0xff000000) >> 24) |
-	    ((c & 0x00ff0000) >>  8) |
-	    ((c & 0x0000ff00) <<  8) |
-	    ((c & 0x000000ff) << 24);
-    }
-    if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_RGBA)
-	c = ((c & 0xff000000) >> 24) | (c << 8);
-
-    if (format == PIXMAN_a1)
-	c = c >> 31;
-    else if (format == PIXMAN_a8)
-	c = c >> 24;
-    else if (format == PIXMAN_r5g6b5 ||
-             format == PIXMAN_b5g6r5)
-	c = convert_8888_to_0565 (c);
-
-#if 0
-    printf ("color: %x %x %x %x\n", color->alpha, color->red, color->green, color->blue);
-    printf ("pixel: %x\n", c);
-#endif
-
-    *pixel = c;
-    return TRUE;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_fill_rectangles (pixman_op_t                 op,
-                              pixman_image_t *            dest,
-			      const pixman_color_t *      color,
-                              int                         n_rects,
-                              const pixman_rectangle16_t *rects)
-{
-    pixman_box32_t stack_boxes[6];
-    pixman_box32_t *boxes;
-    pixman_bool_t result;
-    int i;
-
-    if (n_rects > 6)
-    {
-        boxes = pixman_malloc_ab (sizeof (pixman_box32_t), n_rects);
-        if (boxes == NULL)
-            return FALSE;
-    }
-    else
-    {
-        boxes = stack_boxes;
-    }
-
-    for (i = 0; i < n_rects; ++i)
-    {
-        boxes[i].x1 = rects[i].x;
-        boxes[i].y1 = rects[i].y;
-        boxes[i].x2 = boxes[i].x1 + rects[i].width;
-        boxes[i].y2 = boxes[i].y1 + rects[i].height;
-    }
-
-    result = pixman_image_fill_boxes (op, dest, color, n_rects, boxes);
-
-    if (boxes != stack_boxes)
-        free (boxes);
-    
-    return result;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_fill_boxes (pixman_op_t           op,
-                         pixman_image_t *      dest,
-                         const pixman_color_t *color,
-                         int                   n_boxes,
-                         const pixman_box32_t *boxes)
-{
-    pixman_image_t *solid;
-    pixman_color_t c;
-    int i;
-
-    _pixman_image_validate (dest);
-    
-    if (color->alpha == 0xffff)
-    {
-        if (op == PIXMAN_OP_OVER)
-            op = PIXMAN_OP_SRC;
-    }
-
-    if (op == PIXMAN_OP_CLEAR)
-    {
-        c.red = 0;
-        c.green = 0;
-        c.blue = 0;
-        c.alpha = 0;
-
-        color = &c;
-
-        op = PIXMAN_OP_SRC;
-    }
-
-    if (op == PIXMAN_OP_SRC)
-    {
-        uint32_t pixel;
-
-        if (color_to_pixel (color, &pixel, dest->bits.format))
-        {
-            pixman_region32_t fill_region;
-            int n_rects, j;
-            pixman_box32_t *rects;
-
-            if (!pixman_region32_init_rects (&fill_region, boxes, n_boxes))
-                return FALSE;
-
-            if (dest->common.have_clip_region)
-            {
-                if (!pixman_region32_intersect (&fill_region,
-                                                &fill_region,
-                                                &dest->common.clip_region))
-                    return FALSE;
-            }
-
-            rects = pixman_region32_rectangles (&fill_region, &n_rects);
-            for (j = 0; j < n_rects; ++j)
-            {
-                const pixman_box32_t *rect = &(rects[j]);
-                pixman_fill (dest->bits.bits, dest->bits.rowstride, PIXMAN_FORMAT_BPP (dest->bits.format),
-                             rect->x1, rect->y1, rect->x2 - rect->x1, rect->y2 - rect->y1,
-                             pixel);
-            }
-
-            pixman_region32_fini (&fill_region);
-            return TRUE;
-        }
-    }
-
-    solid = pixman_image_create_solid_fill (color);
-    if (!solid)
-        return FALSE;
-
-    for (i = 0; i < n_boxes; ++i)
-    {
-        const pixman_box32_t *box = &(boxes[i]);
-
-        pixman_image_composite32 (op, solid, NULL, dest,
-                                  0, 0, 0, 0,
-                                  box->x1, box->y1,
-                                  box->x2 - box->x1, box->y2 - box->y1);
-    }
-
-    pixman_image_unref (solid);
-
-    return TRUE;
-}
-
-/**
- * pixman_version:
- *
- * Returns the version of the pixman library encoded in a single
- * integer as per %PIXMAN_VERSION_ENCODE. The encoding ensures that
- * later versions compare greater than earlier versions.
- *
- * A run-time comparison to check that pixman's version is greater than
- * or equal to version X.Y.Z could be performed as follows:
- *
- * <informalexample><programlisting>
- * if (pixman_version() >= PIXMAN_VERSION_ENCODE(X,Y,Z)) {...}
- * </programlisting></informalexample>
- *
- * See also pixman_version_string() as well as the compile-time
- * equivalents %PIXMAN_VERSION and %PIXMAN_VERSION_STRING.
- *
- * Return value: the encoded version.
- **/
-PIXMAN_EXPORT int
-pixman_version (void)
-{
-    return PIXMAN_VERSION;
-}
-
-/**
- * pixman_version_string:
- *
- * Returns the version of the pixman library as a human-readable string
- * of the form "X.Y.Z".
- *
- * See also pixman_version() as well as the compile-time equivalents
- * %PIXMAN_VERSION_STRING and %PIXMAN_VERSION.
- *
- * Return value: a string containing the version.
- **/
-PIXMAN_EXPORT const char*
-pixman_version_string (void)
-{
-    return PIXMAN_VERSION_STRING;
-}
-
-/**
- * pixman_format_supported_source:
- * @format: A pixman_format_code_t format
- *
- * Return value: whether the provided format code is a supported
- * format for a pixman surface used as a source in
- * rendering.
- *
- * Currently, all pixman_format_code_t values are supported.
- **/
-PIXMAN_EXPORT pixman_bool_t
-pixman_format_supported_source (pixman_format_code_t format)
-{
-    switch (format)
-    {
-    /* 32 bpp formats */
-    case PIXMAN_a2b10g10r10:
-    case PIXMAN_x2b10g10r10:
-    case PIXMAN_a2r10g10b10:
-    case PIXMAN_x2r10g10b10:
-    case PIXMAN_a8r8g8b8:
-    case PIXMAN_a8r8g8b8_sRGB:
-    case PIXMAN_r8g8b8_sRGB:
-    case PIXMAN_x8r8g8b8:
-    case PIXMAN_a8b8g8r8:
-    case PIXMAN_x8b8g8r8:
-    case PIXMAN_b8g8r8a8:
-    case PIXMAN_b8g8r8x8:
-    case PIXMAN_r8g8b8a8:
-    case PIXMAN_r8g8b8x8:
-    case PIXMAN_r8g8b8:
-    case PIXMAN_b8g8r8:
-    case PIXMAN_r5g6b5:
-    case PIXMAN_b5g6r5:
-    case PIXMAN_x14r6g6b6:
-    /* 16 bpp formats */
-    case PIXMAN_a1r5g5b5:
-    case PIXMAN_x1r5g5b5:
-    case PIXMAN_a1b5g5r5:
-    case PIXMAN_x1b5g5r5:
-    case PIXMAN_a4r4g4b4:
-    case PIXMAN_x4r4g4b4:
-    case PIXMAN_a4b4g4r4:
-    case PIXMAN_x4b4g4r4:
-    /* 8bpp formats */
-    case PIXMAN_a8:
-    case PIXMAN_r3g3b2:
-    case PIXMAN_b2g3r3:
-    case PIXMAN_a2r2g2b2:
-    case PIXMAN_a2b2g2r2:
-    case PIXMAN_c8:
-    case PIXMAN_g8:
-    case PIXMAN_x4a4:
-    /* Collides with PIXMAN_c8
-       case PIXMAN_x4c4:
-     */
-    /* Collides with PIXMAN_g8
-       case PIXMAN_x4g4:
-     */
-    /* 4bpp formats */
-    case PIXMAN_a4:
-    case PIXMAN_r1g2b1:
-    case PIXMAN_b1g2r1:
-    case PIXMAN_a1r1g1b1:
-    case PIXMAN_a1b1g1r1:
-    case PIXMAN_c4:
-    case PIXMAN_g4:
-    /* 1bpp formats */
-    case PIXMAN_a1:
-    case PIXMAN_g1:
-    /* YUV formats */
-    case PIXMAN_yuy2:
-    case PIXMAN_yv12:
-	return TRUE;
-
-    default:
-	return FALSE;
-    }
-}
-
-/**
- * pixman_format_supported_destination:
- * @format: A pixman_format_code_t format
- *
- * Return value: whether the provided format code is a supported
- * format for a pixman surface used as a destination in
- * rendering.
- *
- * Currently, all pixman_format_code_t values are supported
- * except for the YUV formats.
- **/
-PIXMAN_EXPORT pixman_bool_t
-pixman_format_supported_destination (pixman_format_code_t format)
-{
-    /* YUV formats cannot be written to at the moment */
-    if (format == PIXMAN_yuy2 || format == PIXMAN_yv12)
-	return FALSE;
-
-    return pixman_format_supported_source (format);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_compute_composite_region (pixman_region16_t * region,
-                                 pixman_image_t *    src_image,
-                                 pixman_image_t *    mask_image,
-                                 pixman_image_t *    dest_image,
-                                 int16_t             src_x,
-                                 int16_t             src_y,
-                                 int16_t             mask_x,
-                                 int16_t             mask_y,
-                                 int16_t             dest_x,
-                                 int16_t             dest_y,
-                                 uint16_t            width,
-                                 uint16_t            height)
-{
-    pixman_region32_t r32;
-    pixman_bool_t retval;
-
-    pixman_region32_init (&r32);
-
-    retval = _pixman_compute_composite_region32 (
-	&r32, src_image, mask_image, dest_image,
-	src_x, src_y, mask_x, mask_y, dest_x, dest_y,
-	width, height);
-
-    if (retval)
-    {
-	if (!pixman_region16_copy_from_region32 (region, &r32))
-	    retval = FALSE;
-    }
-
-    pixman_region32_fini (&r32);
-    return retval;
-}
diff --git a/vendor/pixman/pixman/pixman.h b/vendor/pixman/pixman/pixman.h
deleted file mode 100644
index d697b5357..000000000
--- a/vendor/pixman/pixman/pixman.h
+++ /dev/null
@@ -1,1426 +0,0 @@
-/***********************************************************
-
-Copyright 1987, 1998  The Open Group
-
-Permission to use, copy, modify, distribute, and sell this software and its
-documentation for any purpose is hereby granted without fee, provided that
-the above copyright notice appear in all copies and that both that
-copyright notice and this permission notice appear in supporting
-documentation.
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
-AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-Except as contained in this notice, the name of The Open Group shall not be
-used in advertising or otherwise to promote the sale, use or other dealings
-in this Software without prior written authorization from The Open Group.
-
-Copyright 1987 by Digital Equipment Corporation, Maynard, Massachusetts.
-
-                        All Rights Reserved
-
-Permission to use, copy, modify, and distribute this software and its
-documentation for any purpose and without fee is hereby granted,
-provided that the above copyright notice appear in all copies and that
-both that copyright notice and this permission notice appear in
-supporting documentation, and that the name of Digital not be
-used in advertising or publicity pertaining to distribution of the
-software without specific, written prior permission.
-
-DIGITAL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
-ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
-DIGITAL BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
-ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
-WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
-ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
-SOFTWARE.
-
-******************************************************************/
-/*
- * Copyright © 1998, 2004 Keith Packard
- * Copyright   2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Keith Packard makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
- * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
- * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifndef PIXMAN_H__
-#define PIXMAN_H__
-
-#include <pixman-version.h>
-
-#ifdef  __cplusplus
-#define PIXMAN_BEGIN_DECLS extern "C" {
-#define PIXMAN_END_DECLS }
-#else
-#define PIXMAN_BEGIN_DECLS
-#define PIXMAN_END_DECLS
-#endif
-
-PIXMAN_BEGIN_DECLS
-
-/*
- * Standard integers
- */
-
-#if !defined (PIXMAN_DONT_DEFINE_STDINT)
-
-#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) || defined (__sun) || defined (sun) || defined (__digital__) || defined (__HP_cc)
-#  include <inttypes.h>
-/* VS 2010 (_MSC_VER 1600) has stdint.h */
-#elif defined (_MSC_VER) && _MSC_VER < 1600
-typedef __int8 int8_t;
-typedef unsigned __int8 uint8_t;
-typedef __int16 int16_t;
-typedef unsigned __int16 uint16_t;
-typedef __int32 int32_t;
-typedef unsigned __int32 uint32_t;
-typedef __int64 int64_t;
-typedef unsigned __int64 uint64_t;
-#elif defined (_AIX)
-#  include <sys/inttypes.h>
-#else
-#  include <stdint.h>
-#endif
-
-#endif
-
-/*
- * Boolean
- */
-typedef int pixman_bool_t;
-
-/*
- * Fixpoint numbers
- */
-typedef int64_t			pixman_fixed_32_32_t;
-typedef pixman_fixed_32_32_t	pixman_fixed_48_16_t;
-typedef uint32_t		pixman_fixed_1_31_t;
-typedef uint32_t		pixman_fixed_1_16_t;
-typedef int32_t			pixman_fixed_16_16_t;
-typedef pixman_fixed_16_16_t	pixman_fixed_t;
-
-#define pixman_fixed_e			((pixman_fixed_t) 1)
-#define pixman_fixed_1			(pixman_int_to_fixed(1))
-#define pixman_fixed_1_minus_e		(pixman_fixed_1 - pixman_fixed_e)
-#define pixman_fixed_minus_1		(pixman_int_to_fixed(-1))
-#define pixman_fixed_to_int(f)		((int) ((f) >> 16))
-#define pixman_int_to_fixed(i)		((pixman_fixed_t) ((uint32_t) (i) << 16))
-#define pixman_fixed_to_double(f)	(double) ((f) / (double) pixman_fixed_1)
-#define pixman_double_to_fixed(d)	((pixman_fixed_t) ((d) * 65536.0))
-#define pixman_fixed_frac(f)		((f) & pixman_fixed_1_minus_e)
-#define pixman_fixed_floor(f)		((f) & ~pixman_fixed_1_minus_e)
-#define pixman_fixed_ceil(f)		pixman_fixed_floor ((f) + pixman_fixed_1_minus_e)
-#define pixman_fixed_fraction(f)	((f) & pixman_fixed_1_minus_e)
-#define pixman_fixed_mod_2(f)		((f) & (pixman_fixed1 | pixman_fixed_1_minus_e))
-#define pixman_max_fixed_48_16		((pixman_fixed_48_16_t) 0x7fffffff)
-#define pixman_min_fixed_48_16		(-((pixman_fixed_48_16_t) 1 << 31))
-
-/*
- * Misc structs
- */
-typedef struct pixman_color pixman_color_t;
-typedef struct pixman_point_fixed pixman_point_fixed_t;
-typedef struct pixman_line_fixed pixman_line_fixed_t;
-typedef struct pixman_vector pixman_vector_t;
-typedef struct pixman_transform pixman_transform_t;
-
-struct pixman_color
-{
-    uint16_t	red;
-    uint16_t    green;
-    uint16_t    blue;
-    uint16_t    alpha;
-};
-
-struct pixman_point_fixed
-{
-    pixman_fixed_t	x;
-    pixman_fixed_t	y;
-};
-
-struct pixman_line_fixed
-{
-    pixman_point_fixed_t	p1, p2;
-};
-
-/*
- * Fixed point matrices
- */
-
-struct pixman_vector
-{
-    pixman_fixed_t	vector[3];
-};
-
-struct pixman_transform
-{
-    pixman_fixed_t	matrix[3][3];
-};
-
-/* forward declaration (sorry) */
-struct pixman_box16;
-typedef  union pixman_image		pixman_image_t;
-
-PIXMAN_API
-void          pixman_transform_init_identity    (struct pixman_transform       *matrix);
-
-PIXMAN_API
-pixman_bool_t pixman_transform_point_3d         (const struct pixman_transform *transform,
-						 struct pixman_vector          *vector);
-
-PIXMAN_API
-pixman_bool_t pixman_transform_point            (const struct pixman_transform *transform,
-						 struct pixman_vector          *vector);
-
-PIXMAN_API
-pixman_bool_t pixman_transform_multiply         (struct pixman_transform       *dst,
-						 const struct pixman_transform *l,
-						 const struct pixman_transform *r);
-
-PIXMAN_API
-void          pixman_transform_init_scale       (struct pixman_transform       *t,
-						 pixman_fixed_t                 sx,
-						 pixman_fixed_t                 sy);
-
-PIXMAN_API
-pixman_bool_t pixman_transform_scale            (struct pixman_transform       *forward,
-						 struct pixman_transform       *reverse,
-						 pixman_fixed_t                 sx,
-						 pixman_fixed_t                 sy);
-
-PIXMAN_API
-void          pixman_transform_init_rotate      (struct pixman_transform       *t,
-						 pixman_fixed_t                 cos,
-						 pixman_fixed_t                 sin);
-
-PIXMAN_API
-pixman_bool_t pixman_transform_rotate           (struct pixman_transform       *forward,
-						 struct pixman_transform       *reverse,
-						 pixman_fixed_t                 c,
-						 pixman_fixed_t                 s);
-
-PIXMAN_API
-void          pixman_transform_init_translate   (struct pixman_transform       *t,
-						 pixman_fixed_t                 tx,
-						 pixman_fixed_t                 ty);
-
-PIXMAN_API
-pixman_bool_t pixman_transform_translate        (struct pixman_transform       *forward,
-						 struct pixman_transform       *reverse,
-						 pixman_fixed_t                 tx,
-						 pixman_fixed_t                 ty);
-
-PIXMAN_API
-pixman_bool_t pixman_transform_bounds           (const struct pixman_transform *matrix,
-						 struct pixman_box16           *b);
-
-PIXMAN_API
-pixman_bool_t pixman_transform_invert           (struct pixman_transform       *dst,
-						 const struct pixman_transform *src);
-
-PIXMAN_API
-pixman_bool_t pixman_transform_is_identity      (const struct pixman_transform *t);
-
-PIXMAN_API
-pixman_bool_t pixman_transform_is_scale         (const struct pixman_transform *t);
-
-PIXMAN_API
-pixman_bool_t pixman_transform_is_int_translate (const struct pixman_transform *t);
-
-PIXMAN_API
-pixman_bool_t pixman_transform_is_inverse       (const struct pixman_transform *a,
-						 const struct pixman_transform *b);
-
-/*
- * Floating point matrices
- */
-typedef struct pixman_f_transform pixman_f_transform_t;
-typedef struct pixman_f_vector pixman_f_vector_t;
-
-struct pixman_f_vector
-{
-    double  v[3];
-};
-
-struct pixman_f_transform
-{
-    double  m[3][3];
-};
-
-
-PIXMAN_API
-pixman_bool_t pixman_transform_from_pixman_f_transform (struct pixman_transform         *t,
-							const struct pixman_f_transform *ft);
-
-PIXMAN_API
-void          pixman_f_transform_from_pixman_transform (struct pixman_f_transform       *ft,
-							const struct pixman_transform   *t);
-
-PIXMAN_API
-pixman_bool_t pixman_f_transform_invert                (struct pixman_f_transform       *dst,
-							const struct pixman_f_transform *src);
-
-PIXMAN_API
-pixman_bool_t pixman_f_transform_point                 (const struct pixman_f_transform *t,
-							struct pixman_f_vector          *v);
-
-PIXMAN_API
-void          pixman_f_transform_point_3d              (const struct pixman_f_transform *t,
-							struct pixman_f_vector          *v);
-
-PIXMAN_API
-void          pixman_f_transform_multiply              (struct pixman_f_transform       *dst,
-							const struct pixman_f_transform *l,
-							const struct pixman_f_transform *r);
-
-PIXMAN_API
-void          pixman_f_transform_init_scale            (struct pixman_f_transform       *t,
-							double                           sx,
-							double                           sy);
-
-PIXMAN_API
-pixman_bool_t pixman_f_transform_scale                 (struct pixman_f_transform       *forward,
-							struct pixman_f_transform       *reverse,
-							double                           sx,
-							double                           sy);
-
-PIXMAN_API
-void          pixman_f_transform_init_rotate           (struct pixman_f_transform       *t,
-							double                           cos,
-							double                           sin);
-
-PIXMAN_API
-pixman_bool_t pixman_f_transform_rotate                (struct pixman_f_transform       *forward,
-							struct pixman_f_transform       *reverse,
-							double                           c,
-							double                           s);
-
-PIXMAN_API
-void          pixman_f_transform_init_translate        (struct pixman_f_transform       *t,
-							double                           tx,
-							double                           ty);
-
-PIXMAN_API
-pixman_bool_t pixman_f_transform_translate             (struct pixman_f_transform       *forward,
-							struct pixman_f_transform       *reverse,
-							double                           tx,
-							double                           ty);
-
-PIXMAN_API
-pixman_bool_t pixman_f_transform_bounds                (const struct pixman_f_transform *t,
-							struct pixman_box16             *b);
-
-PIXMAN_API
-void          pixman_f_transform_init_identity         (struct pixman_f_transform       *t);
-
-typedef enum
-{
-    PIXMAN_REPEAT_NONE,
-    PIXMAN_REPEAT_NORMAL,
-    PIXMAN_REPEAT_PAD,
-    PIXMAN_REPEAT_REFLECT
-} pixman_repeat_t;
-
-typedef enum
-{
-    PIXMAN_DITHER_NONE,
-    PIXMAN_DITHER_FAST,
-    PIXMAN_DITHER_GOOD,
-    PIXMAN_DITHER_BEST,
-    PIXMAN_DITHER_ORDERED_BAYER_8,
-    PIXMAN_DITHER_ORDERED_BLUE_NOISE_64,
-} pixman_dither_t;
-
-typedef enum
-{
-    PIXMAN_FILTER_FAST,
-    PIXMAN_FILTER_GOOD,
-    PIXMAN_FILTER_BEST,
-    PIXMAN_FILTER_NEAREST,
-    PIXMAN_FILTER_BILINEAR,
-    PIXMAN_FILTER_CONVOLUTION,
-
-    /* The SEPARABLE_CONVOLUTION filter takes the following parameters:
-     *
-     *         width:           integer given as 16.16 fixpoint number
-     *         height:          integer given as 16.16 fixpoint number
-     *         x_phase_bits:	integer given as 16.16 fixpoint
-     *         y_phase_bits:	integer given as 16.16 fixpoint
-     *         xtables:         (1 << x_phase_bits) tables of size width
-     *         ytables:         (1 << y_phase_bits) tables of size height
-     *
-     * When sampling at (x, y), the location is first rounded to one of
-     * n_x_phases * n_y_phases subpixel positions. These subpixel positions
-     * determine an xtable and a ytable to use.
-     *
-     * Conceptually a width x height matrix is then formed in which each entry
-     * is the product of the corresponding entries in the x and y tables.
-     * This matrix is then aligned with the image pixels such that its center
-     * is as close as possible to the subpixel location chosen earlier. Then
-     * the image is convolved with the matrix and the resulting pixel returned.
-     */
-    PIXMAN_FILTER_SEPARABLE_CONVOLUTION
-} pixman_filter_t;
-
-typedef enum
-{
-    PIXMAN_OP_CLEAR			= 0x00,
-    PIXMAN_OP_SRC			= 0x01,
-    PIXMAN_OP_DST			= 0x02,
-    PIXMAN_OP_OVER			= 0x03,
-    PIXMAN_OP_OVER_REVERSE		= 0x04,
-    PIXMAN_OP_IN			= 0x05,
-    PIXMAN_OP_IN_REVERSE		= 0x06,
-    PIXMAN_OP_OUT			= 0x07,
-    PIXMAN_OP_OUT_REVERSE		= 0x08,
-    PIXMAN_OP_ATOP			= 0x09,
-    PIXMAN_OP_ATOP_REVERSE		= 0x0a,
-    PIXMAN_OP_XOR			= 0x0b,
-    PIXMAN_OP_ADD			= 0x0c,
-    PIXMAN_OP_SATURATE			= 0x0d,
-
-    PIXMAN_OP_DISJOINT_CLEAR		= 0x10,
-    PIXMAN_OP_DISJOINT_SRC		= 0x11,
-    PIXMAN_OP_DISJOINT_DST		= 0x12,
-    PIXMAN_OP_DISJOINT_OVER		= 0x13,
-    PIXMAN_OP_DISJOINT_OVER_REVERSE	= 0x14,
-    PIXMAN_OP_DISJOINT_IN		= 0x15,
-    PIXMAN_OP_DISJOINT_IN_REVERSE	= 0x16,
-    PIXMAN_OP_DISJOINT_OUT		= 0x17,
-    PIXMAN_OP_DISJOINT_OUT_REVERSE	= 0x18,
-    PIXMAN_OP_DISJOINT_ATOP		= 0x19,
-    PIXMAN_OP_DISJOINT_ATOP_REVERSE	= 0x1a,
-    PIXMAN_OP_DISJOINT_XOR		= 0x1b,
-
-    PIXMAN_OP_CONJOINT_CLEAR		= 0x20,
-    PIXMAN_OP_CONJOINT_SRC		= 0x21,
-    PIXMAN_OP_CONJOINT_DST		= 0x22,
-    PIXMAN_OP_CONJOINT_OVER		= 0x23,
-    PIXMAN_OP_CONJOINT_OVER_REVERSE	= 0x24,
-    PIXMAN_OP_CONJOINT_IN		= 0x25,
-    PIXMAN_OP_CONJOINT_IN_REVERSE	= 0x26,
-    PIXMAN_OP_CONJOINT_OUT		= 0x27,
-    PIXMAN_OP_CONJOINT_OUT_REVERSE	= 0x28,
-    PIXMAN_OP_CONJOINT_ATOP		= 0x29,
-    PIXMAN_OP_CONJOINT_ATOP_REVERSE	= 0x2a,
-    PIXMAN_OP_CONJOINT_XOR		= 0x2b,
-
-    PIXMAN_OP_MULTIPLY                  = 0x30,
-    PIXMAN_OP_SCREEN                    = 0x31,
-    PIXMAN_OP_OVERLAY                   = 0x32,
-    PIXMAN_OP_DARKEN                    = 0x33,
-    PIXMAN_OP_LIGHTEN                   = 0x34,
-    PIXMAN_OP_COLOR_DODGE               = 0x35,
-    PIXMAN_OP_COLOR_BURN                = 0x36,
-    PIXMAN_OP_HARD_LIGHT                = 0x37,
-    PIXMAN_OP_SOFT_LIGHT                = 0x38,
-    PIXMAN_OP_DIFFERENCE                = 0x39,
-    PIXMAN_OP_EXCLUSION                 = 0x3a,
-    PIXMAN_OP_HSL_HUE			= 0x3b,
-    PIXMAN_OP_HSL_SATURATION		= 0x3c,
-    PIXMAN_OP_HSL_COLOR			= 0x3d,
-    PIXMAN_OP_HSL_LUMINOSITY		= 0x3e
-
-#ifdef PIXMAN_USE_INTERNAL_API
-    ,
-    PIXMAN_N_OPERATORS,
-    PIXMAN_OP_NONE = PIXMAN_N_OPERATORS
-#endif
-} pixman_op_t;
-
-/*
- * Regions
- */
-typedef struct pixman_region16_data	pixman_region16_data_t;
-typedef struct pixman_box16		pixman_box16_t;
-typedef struct pixman_rectangle16	pixman_rectangle16_t;
-typedef struct pixman_region16		pixman_region16_t;
-
-struct pixman_region16_data {
-    long		size;
-    long		numRects;
-/*  pixman_box16_t	rects[size];   in memory but not explicitly declared */
-};
-
-struct pixman_rectangle16
-{
-    int16_t	x, y;
-    uint16_t	width, height;
-};
-
-struct pixman_box16
-{
-    int16_t x1, y1, x2, y2;
-};
-
-struct pixman_region16
-{
-    pixman_box16_t          extents;
-    pixman_region16_data_t *data;
-};
-
-typedef enum
-{
-    PIXMAN_REGION_OUT,
-    PIXMAN_REGION_IN,
-    PIXMAN_REGION_PART
-} pixman_region_overlap_t;
-
-/* This function exists only to make it possible to preserve
- * the X ABI - it should go away at first opportunity.
- */
-PIXMAN_API
-void pixman_region_set_static_pointers (pixman_box16_t         *empty_box,
-					pixman_region16_data_t *empty_data,
-					pixman_region16_data_t *broken_data);
-
-/* creation/destruction */
-PIXMAN_API
-void                    pixman_region_init               (pixman_region16_t *region);
-
-PIXMAN_API
-void                    pixman_region_init_rect          (pixman_region16_t *region,
-							  int                x,
-							  int                y,
-							  unsigned int       width,
-							  unsigned int       height);
-
-PIXMAN_API
-pixman_bool_t           pixman_region_init_rects         (pixman_region16_t *region,
-							  const pixman_box16_t *boxes,
-							  int                count);
-
-PIXMAN_API
-void                    pixman_region_init_with_extents  (pixman_region16_t    *region,
-							  const pixman_box16_t *extents);
-
-PIXMAN_API
-void                    pixman_region_init_from_image    (pixman_region16_t *region,
-							  pixman_image_t    *image);
-
-PIXMAN_API
-void                    pixman_region_fini               (pixman_region16_t *region);
-
-
-/* manipulation */
-PIXMAN_API
-void                    pixman_region_translate          (pixman_region16_t *region,
-							  int                x,
-							  int                y);
-
-PIXMAN_API
-pixman_bool_t           pixman_region_copy               (pixman_region16_t       *dest,
-							  const pixman_region16_t *source);
-
-PIXMAN_API
-pixman_bool_t           pixman_region_intersect          (pixman_region16_t       *new_reg,
-							  const pixman_region16_t *reg1,
-							  const pixman_region16_t *reg2);
-
-PIXMAN_API
-pixman_bool_t           pixman_region_union              (pixman_region16_t       *new_reg,
-							  const pixman_region16_t *reg1,
-							  const pixman_region16_t *reg2);
-
-PIXMAN_API
-pixman_bool_t           pixman_region_union_rect         (pixman_region16_t       *dest,
-							  const pixman_region16_t *source,
-							  int                     x,
-							  int                     y,
-							  unsigned int            width,
-							  unsigned int            height);
-
-PIXMAN_API
-pixman_bool_t		pixman_region_intersect_rect     (pixman_region16_t       *dest,
-							  const pixman_region16_t *source,
-							  int                      x,
-							  int                      y,
-							  unsigned int             width,
-							  unsigned int             height);
-
-PIXMAN_API
-pixman_bool_t           pixman_region_subtract           (pixman_region16_t       *reg_d,
-							  const pixman_region16_t *reg_m,
-							  const pixman_region16_t *reg_s);
-
-PIXMAN_API
-pixman_bool_t           pixman_region_inverse            (pixman_region16_t       *new_reg,
-							  const pixman_region16_t *reg1,
-							  const pixman_box16_t    *inv_rect);
-
-PIXMAN_API
-pixman_bool_t           pixman_region_contains_point     (const pixman_region16_t *region,
-							  int                      x,
-							  int                      y,
-							  pixman_box16_t          *box);
-
-PIXMAN_API
-pixman_region_overlap_t pixman_region_contains_rectangle (const pixman_region16_t *region,
-							  const pixman_box16_t    *prect);
-
-PIXMAN_API
-pixman_bool_t           pixman_region_empty              (const pixman_region16_t *region);
-
-PIXMAN_API
-pixman_bool_t           pixman_region_not_empty          (const pixman_region16_t *region);
-
-PIXMAN_API
-pixman_box16_t *        pixman_region_extents            (const pixman_region16_t *region);
-
-PIXMAN_API
-int                     pixman_region_n_rects            (const pixman_region16_t *region);
-
-PIXMAN_API
-pixman_box16_t *        pixman_region_rectangles         (const pixman_region16_t *region,
-							  int                     *n_rects);
-
-PIXMAN_API
-pixman_bool_t           pixman_region_equal              (const pixman_region16_t *region1,
-							  const pixman_region16_t *region2);
-
-PIXMAN_API
-pixman_bool_t           pixman_region_selfcheck          (pixman_region16_t *region);
-
-PIXMAN_API
-void                    pixman_region_reset              (pixman_region16_t       *region,
-							  const pixman_box16_t    *box);
-
-PIXMAN_API
-void			pixman_region_clear		 (pixman_region16_t *region);
-/*
- * 32 bit regions
- */
-typedef struct pixman_region32_data	pixman_region32_data_t;
-typedef struct pixman_box32		pixman_box32_t;
-typedef struct pixman_rectangle32	pixman_rectangle32_t;
-typedef struct pixman_region32		pixman_region32_t;
-
-struct pixman_region32_data {
-    long		size;
-    long		numRects;
-/*  pixman_box32_t	rects[size];   in memory but not explicitly declared */
-};
-
-struct pixman_rectangle32
-{
-    int32_t x, y;
-    uint32_t width, height;
-};
-
-struct pixman_box32
-{
-    int32_t x1, y1, x2, y2;
-};
-
-struct pixman_region32
-{
-    pixman_box32_t          extents;
-    pixman_region32_data_t  *data;
-};
-
-/* creation/destruction */
-PIXMAN_API
-void                    pixman_region32_init               (pixman_region32_t *region);
-
-PIXMAN_API
-void                    pixman_region32_init_rect          (pixman_region32_t *region,
-							    int                x,
-							    int                y,
-							    unsigned int       width,
-							    unsigned int       height);
-
-PIXMAN_API
-pixman_bool_t           pixman_region32_init_rects         (pixman_region32_t *region,
-							    const pixman_box32_t *boxes,
-							    int                count);
-
-PIXMAN_API
-void                    pixman_region32_init_with_extents  (pixman_region32_t    *region,
-							    const pixman_box32_t *extents);
-
-PIXMAN_API
-void                    pixman_region32_init_from_image    (pixman_region32_t *region,
-							    pixman_image_t    *image);
-
-PIXMAN_API
-void                    pixman_region32_fini               (pixman_region32_t *region);
-
-
-/* manipulation */
-PIXMAN_API
-void                    pixman_region32_translate          (pixman_region32_t *region,
-							    int                x,
-							    int                y);
-
-PIXMAN_API
-pixman_bool_t           pixman_region32_copy               (pixman_region32_t       *dest,
-							    const pixman_region32_t *source);
-
-PIXMAN_API
-pixman_bool_t           pixman_region32_intersect          (pixman_region32_t       *new_reg,
-							    const pixman_region32_t *reg1,
-							    const pixman_region32_t *reg2);
-
-PIXMAN_API
-pixman_bool_t           pixman_region32_union              (pixman_region32_t       *new_reg,
-							    const pixman_region32_t *reg1,
-							    const pixman_region32_t *reg2);
-
-PIXMAN_API
-pixman_bool_t		pixman_region32_intersect_rect     (pixman_region32_t       *dest,
-							    const pixman_region32_t *source,
-							    int                      x,
-							    int                      y,
-							    unsigned int             width,
-							    unsigned int             height);
-
-PIXMAN_API
-pixman_bool_t           pixman_region32_union_rect         (pixman_region32_t       *dest,
-							    const pixman_region32_t *source,
-							    int                      x,
-							    int                      y,
-							    unsigned int             width,
-							    unsigned int             height);
-
-PIXMAN_API
-pixman_bool_t           pixman_region32_subtract           (pixman_region32_t       *reg_d,
-							    const pixman_region32_t *reg_m,
-							    const pixman_region32_t *reg_s);
-
-PIXMAN_API
-pixman_bool_t           pixman_region32_inverse            (pixman_region32_t       *new_reg,
-							    const pixman_region32_t *reg1,
-							    const pixman_box32_t    *inv_rect);
-
-PIXMAN_API
-pixman_bool_t           pixman_region32_contains_point     (const pixman_region32_t *region,
-							    int                      x,
-							    int                      y,
-							    pixman_box32_t          *box);
-
-PIXMAN_API
-pixman_region_overlap_t pixman_region32_contains_rectangle (const pixman_region32_t *region,
-							    const pixman_box32_t    *prect);
-
-PIXMAN_API
-pixman_bool_t           pixman_region32_empty              (const pixman_region32_t *region);
-
-PIXMAN_API
-pixman_bool_t           pixman_region32_not_empty          (const pixman_region32_t *region);
-
-PIXMAN_API
-pixman_box32_t *        pixman_region32_extents            (const pixman_region32_t *region);
-
-PIXMAN_API
-int                     pixman_region32_n_rects            (const pixman_region32_t *region);
-
-PIXMAN_API
-pixman_box32_t *        pixman_region32_rectangles         (const pixman_region32_t *region,
-							    int                     *n_rects);
-
-PIXMAN_API
-pixman_bool_t           pixman_region32_equal              (const pixman_region32_t *region1,
-							    const pixman_region32_t *region2);
-
-PIXMAN_API
-pixman_bool_t           pixman_region32_selfcheck          (pixman_region32_t *region);
-
-PIXMAN_API
-void                    pixman_region32_reset              (pixman_region32_t    *region,
-							    const pixman_box32_t *box);
-
-PIXMAN_API
-void			pixman_region32_clear		   (pixman_region32_t *region);
-
-
-/* Copy / Fill / Misc */
-PIXMAN_API
-pixman_bool_t pixman_blt                (uint32_t           *src_bits,
-					 uint32_t           *dst_bits,
-					 int                 src_stride,
-					 int                 dst_stride,
-					 int                 src_bpp,
-					 int                 dst_bpp,
-					 int                 src_x,
-					 int                 src_y,
-					 int                 dest_x,
-					 int                 dest_y,
-					 int                 width,
-					 int                 height);
-
-PIXMAN_API
-pixman_bool_t pixman_fill               (uint32_t           *bits,
-					 int                 stride,
-					 int                 bpp,
-					 int                 x,
-					 int                 y,
-					 int                 width,
-					 int                 height,
-					 uint32_t            _xor);
-
-
-PIXMAN_API
-int           pixman_version            (void);
-
-PIXMAN_API
-const char*   pixman_version_string     (void);
-
-/*
- * Images
- */
-typedef struct pixman_indexed		pixman_indexed_t;
-typedef struct pixman_gradient_stop	pixman_gradient_stop_t;
-
-typedef uint32_t (* pixman_read_memory_func_t) (const void *src, int size);
-typedef void     (* pixman_write_memory_func_t) (void *dst, uint32_t value, int size);
-
-typedef void     (* pixman_image_destroy_func_t) (pixman_image_t *image, void *data);
-
-struct pixman_gradient_stop {
-    pixman_fixed_t x;
-    pixman_color_t color;
-};
-
-#define PIXMAN_MAX_INDEXED  256 /* XXX depth must be <= 8 */
-
-#if PIXMAN_MAX_INDEXED <= 256
-typedef uint8_t pixman_index_type;
-#endif
-
-struct pixman_indexed
-{
-    pixman_bool_t       color;
-    uint32_t		rgba[PIXMAN_MAX_INDEXED];
-    pixman_index_type	ent[32768];
-};
-
-/*
- * While the protocol is generous in format support, the
- * sample implementation allows only packed RGB and GBR
- * representations for data to simplify software rendering,
- */
-#define PIXMAN_FORMAT(bpp,type,a,r,g,b)	(((bpp) << 24) |  \
-					 ((type) << 16) | \
-					 ((a) << 12) |	  \
-					 ((r) << 8) |	  \
-					 ((g) << 4) |	  \
-					 ((b)))
-
-#define PIXMAN_FORMAT_BYTE(bpp,type,a,r,g,b) \
-	(((bpp >> 3) << 24) | \
-	(3 << 22) | ((type) << 16) | \
-	((a >> 3) << 12) | \
-	((r >> 3) << 8) | \
-	((g >> 3) << 4) | \
-	((b >> 3)))
-
-#define PIXMAN_FORMAT_RESHIFT(val, ofs, num) \
-	(((val >> (ofs)) & ((1 << (num)) - 1)) << ((val >> 22) & 3))
-
-#define PIXMAN_FORMAT_BPP(f)	PIXMAN_FORMAT_RESHIFT(f, 24, 8)
-#define PIXMAN_FORMAT_SHIFT(f)	((uint32_t)((f >> 22) & 3))
-#define PIXMAN_FORMAT_TYPE(f)	(((f) >> 16) & 0x3f)
-#define PIXMAN_FORMAT_A(f)	PIXMAN_FORMAT_RESHIFT(f, 12, 4)
-#define PIXMAN_FORMAT_R(f)	PIXMAN_FORMAT_RESHIFT(f, 8, 4)
-#define PIXMAN_FORMAT_G(f)	PIXMAN_FORMAT_RESHIFT(f, 4, 4)
-#define PIXMAN_FORMAT_B(f)	PIXMAN_FORMAT_RESHIFT(f, 0, 4)
-#define PIXMAN_FORMAT_RGB(f)	(((f)      ) & 0xfff)
-#define PIXMAN_FORMAT_VIS(f)	(((f)      ) & 0xffff)
-#define PIXMAN_FORMAT_DEPTH(f)	(PIXMAN_FORMAT_A(f) +	\
-				 PIXMAN_FORMAT_R(f) +	\
-				 PIXMAN_FORMAT_G(f) +	\
-				 PIXMAN_FORMAT_B(f))
-
-#define PIXMAN_TYPE_OTHER	0
-#define PIXMAN_TYPE_A		1
-#define PIXMAN_TYPE_ARGB	2
-#define PIXMAN_TYPE_ABGR	3
-#define PIXMAN_TYPE_COLOR	4
-#define PIXMAN_TYPE_GRAY	5
-#define PIXMAN_TYPE_YUY2	6
-#define PIXMAN_TYPE_YV12	7
-#define PIXMAN_TYPE_BGRA	8
-#define PIXMAN_TYPE_RGBA	9
-#define PIXMAN_TYPE_ARGB_SRGB	10
-#define PIXMAN_TYPE_RGBA_FLOAT	11
-
-#define PIXMAN_FORMAT_COLOR(f)				\
-	(PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ARGB ||	\
-	 PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ABGR ||	\
-	 PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_BGRA ||	\
-	 PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA ||	\
-	 PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA_FLOAT)
-
-typedef enum {
-/* 128bpp formats */
-    PIXMAN_rgba_float =	PIXMAN_FORMAT_BYTE(128,PIXMAN_TYPE_RGBA_FLOAT,32,32,32,32),
-/* 96bpp formats */
-    PIXMAN_rgb_float =	PIXMAN_FORMAT_BYTE(96,PIXMAN_TYPE_RGBA_FLOAT,0,32,32,32),
-
-/* 32bpp formats */
-    PIXMAN_a8r8g8b8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,8,8,8,8),
-    PIXMAN_x8r8g8b8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,8,8,8),
-    PIXMAN_a8b8g8r8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,8,8,8,8),
-    PIXMAN_x8b8g8r8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,8,8,8),
-    PIXMAN_b8g8r8a8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,8,8,8,8),
-    PIXMAN_b8g8r8x8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,0,8,8,8),
-    PIXMAN_r8g8b8a8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,8,8,8,8),
-    PIXMAN_r8g8b8x8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,0,8,8,8),
-    PIXMAN_x14r6g6b6 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,6,6,6),
-    PIXMAN_x2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,10,10,10),
-    PIXMAN_a2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,2,10,10,10),
-    PIXMAN_x2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,10,10,10),
-    PIXMAN_a2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,2,10,10,10),
-
-/* sRGB formats */
-    PIXMAN_a8r8g8b8_sRGB = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB_SRGB,8,8,8,8),
-    PIXMAN_r8g8b8_sRGB = PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB_SRGB,0,8,8,8),
-
-/* 24bpp formats */
-    PIXMAN_r8g8b8 =	 PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB,0,8,8,8),
-    PIXMAN_b8g8r8 =	 PIXMAN_FORMAT(24,PIXMAN_TYPE_ABGR,0,8,8,8),
-
-/* 16bpp formats */
-    PIXMAN_r5g6b5 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,6,5),
-    PIXMAN_b5g6r5 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,6,5),
-
-    PIXMAN_a1r5g5b5 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,1,5,5,5),
-    PIXMAN_x1r5g5b5 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,5,5),
-    PIXMAN_a1b5g5r5 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,1,5,5,5),
-    PIXMAN_x1b5g5r5 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,5,5),
-    PIXMAN_a4r4g4b4 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,4,4,4,4),
-    PIXMAN_x4r4g4b4 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,4,4,4),
-    PIXMAN_a4b4g4r4 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,4,4,4,4),
-    PIXMAN_x4b4g4r4 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,4,4,4),
-
-/* 8bpp formats */
-    PIXMAN_a8 =		 PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0),
-    PIXMAN_r3g3b2 =	 PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,0,3,3,2),
-    PIXMAN_b2g3r3 =	 PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,0,3,3,2),
-    PIXMAN_a2r2g2b2 =	 PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,2,2,2,2),
-    PIXMAN_a2b2g2r2 =	 PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,2,2,2,2),
-
-    PIXMAN_c8 =		 PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
-    PIXMAN_g8 =		 PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
-
-    PIXMAN_x4a4 =	 PIXMAN_FORMAT(8,PIXMAN_TYPE_A,4,0,0,0),
-
-    PIXMAN_x4c4 =	 PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
-    PIXMAN_x4g4 =	 PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
-
-/* 4bpp formats */
-    PIXMAN_a4 =		 PIXMAN_FORMAT(4,PIXMAN_TYPE_A,4,0,0,0),
-    PIXMAN_r1g2b1 =	 PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,0,1,2,1),
-    PIXMAN_b1g2r1 =	 PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,0,1,2,1),
-    PIXMAN_a1r1g1b1 =	 PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,1,1,1,1),
-    PIXMAN_a1b1g1r1 =	 PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,1,1,1,1),
-
-    PIXMAN_c4 =		 PIXMAN_FORMAT(4,PIXMAN_TYPE_COLOR,0,0,0,0),
-    PIXMAN_g4 =		 PIXMAN_FORMAT(4,PIXMAN_TYPE_GRAY,0,0,0,0),
-
-/* 1bpp formats */
-    PIXMAN_a1 =		 PIXMAN_FORMAT(1,PIXMAN_TYPE_A,1,0,0,0),
-
-    PIXMAN_g1 =		 PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0),
-
-/* YUV formats */
-    PIXMAN_yuy2 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_YUY2,0,0,0,0),
-    PIXMAN_yv12 =	 PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0)
-} pixman_format_code_t;
-
-/* Querying supported format values. */
-PIXMAN_API
-pixman_bool_t pixman_format_supported_destination (pixman_format_code_t format);
-
-PIXMAN_API
-pixman_bool_t pixman_format_supported_source      (pixman_format_code_t format);
-
-/* Constructors */
-PIXMAN_API
-pixman_image_t *pixman_image_create_solid_fill       (const pixman_color_t         *color);
-
-PIXMAN_API
-pixman_image_t *pixman_image_create_linear_gradient  (const pixman_point_fixed_t   *p1,
-						      const pixman_point_fixed_t   *p2,
-						      const pixman_gradient_stop_t *stops,
-						      int                           n_stops);
-
-PIXMAN_API
-pixman_image_t *pixman_image_create_radial_gradient  (const pixman_point_fixed_t   *inner,
-						      const pixman_point_fixed_t   *outer,
-						      pixman_fixed_t                inner_radius,
-						      pixman_fixed_t                outer_radius,
-						      const pixman_gradient_stop_t *stops,
-						      int                           n_stops);
-
-PIXMAN_API
-pixman_image_t *pixman_image_create_conical_gradient (const pixman_point_fixed_t   *center,
-						      pixman_fixed_t                angle,
-						      const pixman_gradient_stop_t *stops,
-						      int                           n_stops);
-
-PIXMAN_API
-pixman_image_t *pixman_image_create_bits             (pixman_format_code_t          format,
-						      int                           width,
-						      int                           height,
-						      uint32_t                     *bits,
-						      int                           rowstride_bytes);
-
-PIXMAN_API
-pixman_image_t *pixman_image_create_bits_no_clear    (pixman_format_code_t format,
-						      int                  width,
-						      int                  height,
-						      uint32_t *           bits,
-						      int                  rowstride_bytes);
-
-/* Destructor */
-PIXMAN_API
-pixman_image_t *pixman_image_ref                     (pixman_image_t               *image);
-
-PIXMAN_API
-pixman_bool_t   pixman_image_unref                   (pixman_image_t               *image);
-
-
-PIXMAN_API
-void		pixman_image_set_destroy_function    (pixman_image_t		   *image,
-						      pixman_image_destroy_func_t   function,
-						      void			   *data);
-
-PIXMAN_API
-void *		pixman_image_get_destroy_data        (pixman_image_t		   *image);
-
-/* Set properties */
-PIXMAN_API
-pixman_bool_t   pixman_image_set_clip_region         (pixman_image_t               *image,
-						      const pixman_region16_t            *region);
-
-PIXMAN_API
-pixman_bool_t   pixman_image_set_clip_region32       (pixman_image_t               *image,
-						      const pixman_region32_t            *region);
-
-PIXMAN_API
-void		pixman_image_set_has_client_clip     (pixman_image_t               *image,
-						      pixman_bool_t		    clien_clip);
-
-PIXMAN_API
-pixman_bool_t   pixman_image_set_transform           (pixman_image_t               *image,
-						      const pixman_transform_t     *transform);
-
-PIXMAN_API
-void            pixman_image_set_repeat              (pixman_image_t               *image,
-						      pixman_repeat_t               repeat);
-
-PIXMAN_API
-void            pixman_image_set_dither              (pixman_image_t               *image,
-						      pixman_dither_t               dither);
-
-PIXMAN_API
-void            pixman_image_set_dither_offset       (pixman_image_t               *image,
-						      int                           offset_x,
-						      int                           offset_y);
-
-PIXMAN_API
-pixman_bool_t   pixman_image_set_filter              (pixman_image_t               *image,
-						      pixman_filter_t               filter,
-						      const pixman_fixed_t         *filter_params,
-						      int                           n_filter_params);
-
-PIXMAN_API
-void		pixman_image_set_source_clipping     (pixman_image_t		   *image,
-						      pixman_bool_t                 source_clipping);
-
-PIXMAN_API
-void            pixman_image_set_alpha_map           (pixman_image_t               *image,
-						      pixman_image_t               *alpha_map,
-						      int16_t                       x,
-						      int16_t                       y);
-
-PIXMAN_API
-void            pixman_image_set_component_alpha     (pixman_image_t               *image,
-						      pixman_bool_t                 component_alpha);
-
-PIXMAN_API
-pixman_bool_t   pixman_image_get_component_alpha     (pixman_image_t               *image);
-
-PIXMAN_API
-void		pixman_image_set_accessors	     (pixman_image_t		   *image,
-						      pixman_read_memory_func_t	    read_func,
-						      pixman_write_memory_func_t    write_func);
-
-PIXMAN_API
-void		pixman_image_set_indexed	     (pixman_image_t		   *image,
-						      const pixman_indexed_t	   *indexed);
-
-PIXMAN_API
-uint32_t       *pixman_image_get_data                (pixman_image_t               *image);
-
-PIXMAN_API
-int		pixman_image_get_width               (pixman_image_t               *image);
-
-PIXMAN_API
-int             pixman_image_get_height              (pixman_image_t               *image);
-
-PIXMAN_API
-int		pixman_image_get_stride              (pixman_image_t               *image); /* in bytes */
-
-PIXMAN_API
-int		pixman_image_get_depth               (pixman_image_t		   *image);
-
-PIXMAN_API
-pixman_format_code_t pixman_image_get_format	     (pixman_image_t		   *image);
-
-typedef enum
-{
-    PIXMAN_KERNEL_IMPULSE,
-    PIXMAN_KERNEL_BOX,
-    PIXMAN_KERNEL_LINEAR,
-    PIXMAN_KERNEL_CUBIC,
-    PIXMAN_KERNEL_GAUSSIAN,
-    PIXMAN_KERNEL_LANCZOS2,
-    PIXMAN_KERNEL_LANCZOS3,
-    PIXMAN_KERNEL_LANCZOS3_STRETCHED       /* Jim Blinn's 'nice' filter */
-} pixman_kernel_t;
-
-/* Create the parameter list for a SEPARABLE_CONVOLUTION filter
- * with the given kernels and scale parameters.
- */
-PIXMAN_API
-pixman_fixed_t *
-pixman_filter_create_separable_convolution (int             *n_values,
-					    pixman_fixed_t   scale_x,
-					    pixman_fixed_t   scale_y,
-					    pixman_kernel_t  reconstruct_x,
-					    pixman_kernel_t  reconstruct_y,
-					    pixman_kernel_t  sample_x,
-					    pixman_kernel_t  sample_y,
-					    int              subsample_bits_x,
-					    int              subsample_bits_y);
-
-
-PIXMAN_API
-pixman_bool_t	pixman_image_fill_rectangles	     (pixman_op_t		    op,
-						      pixman_image_t		   *image,
-						      const pixman_color_t	   *color,
-						      int			    n_rects,
-						      const pixman_rectangle16_t   *rects);
-
-PIXMAN_API
-pixman_bool_t   pixman_image_fill_boxes              (pixman_op_t                   op,
-                                                      pixman_image_t               *dest,
-                                                      const pixman_color_t         *color,
-                                                      int                           n_boxes,
-                                                      const pixman_box32_t         *boxes);
-
-/* Composite */
-PIXMAN_API
-pixman_bool_t pixman_compute_composite_region (pixman_region16_t *region,
-					       pixman_image_t    *src_image,
-					       pixman_image_t    *mask_image,
-					       pixman_image_t    *dest_image,
-					       int16_t            src_x,
-					       int16_t            src_y,
-					       int16_t            mask_x,
-					       int16_t            mask_y,
-					       int16_t            dest_x,
-					       int16_t            dest_y,
-					       uint16_t           width,
-					       uint16_t           height);
-
-PIXMAN_API
-void          pixman_image_composite          (pixman_op_t        op,
-					       pixman_image_t    *src,
-					       pixman_image_t    *mask,
-					       pixman_image_t    *dest,
-					       int16_t            src_x,
-					       int16_t            src_y,
-					       int16_t            mask_x,
-					       int16_t            mask_y,
-					       int16_t            dest_x,
-					       int16_t            dest_y,
-					       uint16_t           width,
-					       uint16_t           height);
-
-PIXMAN_API
-void          pixman_image_composite32        (pixman_op_t        op,
-					       pixman_image_t    *src,
-					       pixman_image_t    *mask,
-					       pixman_image_t    *dest,
-					       int32_t            src_x,
-					       int32_t            src_y,
-					       int32_t            mask_x,
-					       int32_t            mask_y,
-					       int32_t            dest_x,
-					       int32_t            dest_y,
-					       int32_t            width,
-					       int32_t            height);
-
-/* Executive Summary: This function is a no-op that only exists
- * for historical reasons.
- *
- * There used to be a bug in the X server where it would rely on
- * out-of-bounds accesses when it was asked to composite with a
- * window as the source. It would create a pixman image pointing
- * to some bogus position in memory, but then set a clip region
- * to the position where the actual bits were.
- *
- * Due to a bug in old versions of pixman, where it would not clip
- * against the image bounds when a clip region was set, this would
- * actually work. So when the pixman bug was fixed, a workaround was
- * added to allow certain out-of-bound accesses. This function disabled
- * those workarounds.
- *
- * Since 0.21.2, pixman doesn't do these workarounds anymore, so now this
- * function is a no-op.
- */
-PIXMAN_API
-void pixman_disable_out_of_bounds_workaround (void);
-
-/*
- * Glyphs
- */
-typedef struct pixman_glyph_cache_t pixman_glyph_cache_t;
-typedef struct
-{
-    int		x, y;
-    const void *glyph;
-} pixman_glyph_t;
-
-PIXMAN_API
-pixman_glyph_cache_t *pixman_glyph_cache_create       (void);
-
-PIXMAN_API
-void                  pixman_glyph_cache_destroy      (pixman_glyph_cache_t *cache);
-
-PIXMAN_API
-void                  pixman_glyph_cache_freeze       (pixman_glyph_cache_t *cache);
-
-PIXMAN_API
-void                  pixman_glyph_cache_thaw         (pixman_glyph_cache_t *cache);
-
-PIXMAN_API
-const void *          pixman_glyph_cache_lookup       (pixman_glyph_cache_t *cache,
-						       void                 *font_key,
-						       void                 *glyph_key);
-
-PIXMAN_API
-const void *          pixman_glyph_cache_insert       (pixman_glyph_cache_t *cache,
-						       void                 *font_key,
-						       void                 *glyph_key,
-						       int		     origin_x,
-						       int                   origin_y,
-						       pixman_image_t       *glyph_image);
-
-PIXMAN_API
-void                  pixman_glyph_cache_remove       (pixman_glyph_cache_t *cache,
-						       void                 *font_key,
-						       void                 *glyph_key);
-
-PIXMAN_API
-void                  pixman_glyph_get_extents        (pixman_glyph_cache_t *cache,
-						       int                   n_glyphs,
-						       pixman_glyph_t       *glyphs,
-						       pixman_box32_t       *extents);
-
-PIXMAN_API
-pixman_format_code_t  pixman_glyph_get_mask_format    (pixman_glyph_cache_t *cache,
-						       int		     n_glyphs,
-						       const pixman_glyph_t *glyphs);
-
-PIXMAN_API
-void                  pixman_composite_glyphs         (pixman_op_t           op,
-						       pixman_image_t       *src,
-						       pixman_image_t       *dest,
-						       pixman_format_code_t  mask_format,
-						       int32_t               src_x,
-						       int32_t               src_y,
-						       int32_t		     mask_x,
-						       int32_t		     mask_y,
-						       int32_t               dest_x,
-						       int32_t               dest_y,
-						       int32_t		     width,
-						       int32_t		     height,
-						       pixman_glyph_cache_t *cache,
-						       int		     n_glyphs,
-						       const pixman_glyph_t *glyphs);
-
-PIXMAN_API
-void                  pixman_composite_glyphs_no_mask (pixman_op_t           op,
-						       pixman_image_t       *src,
-						       pixman_image_t       *dest,
-						       int32_t               src_x,
-						       int32_t               src_y,
-						       int32_t               dest_x,
-						       int32_t               dest_y,
-						       pixman_glyph_cache_t *cache,
-						       int		     n_glyphs,
-						       const pixman_glyph_t *glyphs);
-
-/*
- * Trapezoids
- */
-typedef struct pixman_edge pixman_edge_t;
-typedef struct pixman_trapezoid pixman_trapezoid_t;
-typedef struct pixman_trap pixman_trap_t;
-typedef struct pixman_span_fix pixman_span_fix_t;
-typedef struct pixman_triangle pixman_triangle_t;
-
-/*
- * An edge structure.  This represents a single polygon edge
- * and can be quickly stepped across small or large gaps in the
- * sample grid
- */
-struct pixman_edge
-{
-    pixman_fixed_t	x;
-    pixman_fixed_t	e;
-    pixman_fixed_t	stepx;
-    pixman_fixed_t	signdx;
-    pixman_fixed_t	dy;
-    pixman_fixed_t	dx;
-
-    pixman_fixed_t	stepx_small;
-    pixman_fixed_t	stepx_big;
-    pixman_fixed_t	dx_small;
-    pixman_fixed_t	dx_big;
-};
-
-struct pixman_trapezoid
-{
-    pixman_fixed_t	top, bottom;
-    pixman_line_fixed_t	left, right;
-};
-
-struct pixman_triangle
-{
-    pixman_point_fixed_t p1, p2, p3;
-};
-
-/* whether 't' is a well defined not obviously empty trapezoid */
-#define pixman_trapezoid_valid(t)				   \
-    ((t)->left.p1.y != (t)->left.p2.y &&			   \
-     (t)->right.p1.y != (t)->right.p2.y &&			   \
-     ((t)->bottom > (t)->top))
-
-struct pixman_span_fix
-{
-    pixman_fixed_t	l, r, y;
-};
-
-struct pixman_trap
-{
-    pixman_span_fix_t	top, bot;
-};
-
-PIXMAN_API
-pixman_fixed_t pixman_sample_ceil_y        (pixman_fixed_t             y,
-					    int                        bpp);
-
-PIXMAN_API
-pixman_fixed_t pixman_sample_floor_y       (pixman_fixed_t             y,
-					    int                        bpp);
-
-PIXMAN_API
-void           pixman_edge_step            (pixman_edge_t             *e,
-					    int                        n);
-
-PIXMAN_API
-void           pixman_edge_init            (pixman_edge_t             *e,
-					    int                        bpp,
-					    pixman_fixed_t             y_start,
-					    pixman_fixed_t             x_top,
-					    pixman_fixed_t             y_top,
-					    pixman_fixed_t             x_bot,
-					    pixman_fixed_t             y_bot);
-
-PIXMAN_API
-void           pixman_line_fixed_edge_init (pixman_edge_t             *e,
-					    int                        bpp,
-					    pixman_fixed_t             y,
-					    const pixman_line_fixed_t *line,
-					    int                        x_off,
-					    int                        y_off);
-
-PIXMAN_API
-void           pixman_rasterize_edges      (pixman_image_t            *image,
-					    pixman_edge_t             *l,
-					    pixman_edge_t             *r,
-					    pixman_fixed_t             t,
-					    pixman_fixed_t             b);
-
-PIXMAN_API
-void           pixman_add_traps            (pixman_image_t            *image,
-					    int16_t                    x_off,
-					    int16_t                    y_off,
-					    int                        ntrap,
-					    const pixman_trap_t       *traps);
-
-PIXMAN_API
-void           pixman_add_trapezoids       (pixman_image_t            *image,
-					    int16_t                    x_off,
-					    int                        y_off,
-					    int                        ntraps,
-					    const pixman_trapezoid_t  *traps);
-
-PIXMAN_API
-void           pixman_rasterize_trapezoid  (pixman_image_t            *image,
-					    const pixman_trapezoid_t  *trap,
-					    int                        x_off,
-					    int                        y_off);
-
-PIXMAN_API
-void          pixman_composite_trapezoids (pixman_op_t		       op,
-					   pixman_image_t *	       src,
-					   pixman_image_t *	       dst,
-					   pixman_format_code_t	       mask_format,
-					   int			       x_src,
-					   int			       y_src,
-					   int			       x_dst,
-					   int			       y_dst,
-					   int			       n_traps,
-					   const pixman_trapezoid_t *  traps);
-
-PIXMAN_API
-void          pixman_composite_triangles (pixman_op_t		       op,
-					  pixman_image_t *	       src,
-					  pixman_image_t *	       dst,
-					  pixman_format_code_t	       mask_format,
-					  int			       x_src,
-					  int			       y_src,
-					  int			       x_dst,
-					  int			       y_dst,
-					  int			       n_tris,
-					  const pixman_triangle_t *    tris);
-
-PIXMAN_API
-void	      pixman_add_triangles       (pixman_image_t              *image,
-					  int32_t	               x_off,
-					  int32_t	               y_off,
-					  int	                       n_tris,
-					  const pixman_triangle_t     *tris);
-
-PIXMAN_END_DECLS
-
-#endif /* PIXMAN_H__ */
diff --git a/vendor/pixman/pixman/rounding.txt b/vendor/pixman/pixman/rounding.txt
deleted file mode 100644
index 1c00019b6..000000000
--- a/vendor/pixman/pixman/rounding.txt
+++ /dev/null
@@ -1,168 +0,0 @@
-*** General notes about rounding
-
-Suppose a function is sampled at positions [k + o] where k is an
-integer and o is a fractional offset 0 <= o < 1.
-
-To round a value to the nearest sample, breaking ties by rounding up,
-we can do this:
-
-   round(x) = floor(x - o + 0.5) + o
-
-That is, first subtract o to let us pretend that the samples are at
-integer coordinates, then add 0.5 and floor to round to nearest
-integer, then add the offset back in.
-
-To break ties by rounding down:
-
-    round(x) = ceil(x - o - 0.5) + o
-
-or if we have an epsilon value:
-
-    round(x) = floor(x - o + 0.5 - e) + o
-
-To always round *up* to the next sample:
-
-    round_up(x) = ceil(x - o) + o
-
-To always round *down* to the previous sample:
-
-    round_down(x) = floor(x - o) + o
-
-If a set of samples is stored in an array, you get from the sample
-position to an index by subtracting the position of the first sample
-in the array:
-
-    index(s) = s - first_sample
-
-
-*** Application to pixman
-
-In pixman, images are sampled with o = 0.5, that is, pixels are
-located midways between integers. We usually break ties by rounding
-down (i.e., "round towards north-west").
-
-
--- NEAREST filtering:
-
-The NEAREST filter simply picks the closest pixel to the given
-position:
-
-    round(x) = floor(x - 0.5 + 0.5 - e) + 0.5 = floor (x - e) + 0.5
-
-The first sample of a pixman image has position 0.5, so to find the
-index in the pixel array, we have to subtract 0.5:
-
-    floor (x - e) + 0.5 - 0.5 = floor (x - e).
-
-Therefore a 16.16 fixed-point image location is turned into a pixel
-value with NEAREST filtering by doing this:
-
-    pixels[((y - e) >> 16) * stride + ((x - e) >> 16)]
-
-where stride is the number of pixels allocated per scanline and e =
-0x0001.
-
-
--- CONVOLUTION filtering:
-
-A convolution matrix is considered a sampling of a function f at
-values surrounding 0. For example, this convolution matrix:
-
-	[a, b, c, d]
-
-is interpreted as the values of a function f:
-
-   	a = f(-1.5)
-        b = f(-0.5)
-        c = f(0.5)
-        d = f(1.5)
-
-The sample offset in this case is o = 0.5 and the first sample has
-position s0 = -1.5. If the matrix is:
-
-        [a, b, c, d, e]
-
-the sample offset is o = 0 and the first sample has position s0 =
--2.0. In general we have 
-
-      s0 = (- width / 2.0 + 0.5).
-
-and
-
-      o = frac (s0)
-
-To evaluate f at a position between the samples, we round to the
-closest sample, and then we subtract the position of the first sample
-to get the index in the matrix:
-
-	f(t) = matrix[floor(t - o + 0.5) + o - s0]
-
-Note that in this case we break ties by rounding up.
-
-If we write s0 = m + o, where m is an integer, this is equivalent to
-
-        f(t) = matrix[floor(t - o + 0.5) + o - (m + o)]
-	     = matrix[floor(t - o + 0.5 - m) + o - o]
-	     = matrix[floor(t - s0 + 0.5)]
-
-The convolution filter in pixman positions f such that 0 aligns with
-the given position x. For a given pixel x0 in the image, the closest
-sample of f is then computed by taking (x - x0) and rounding that to
-the closest index:
-
-	i = floor ((x0 - x) - s0 + 0.5)
-
-To perform the convolution, we have to find the first pixel x0 whose
-corresponding sample has index 0. We can write x0 = k + 0.5, where k
-is an integer:
-
-         0 = floor(k + 0.5 - x - s0 + 0.5)
-
-	   = k + floor(1 - x - s0)
-
-	   = k - ceil(x + s0 - 1)
-
-	   = k - floor(x + s0 - e)
-
-	   = k - floor(x - (width - 1) / 2.0 - e)
-
-And so the final formula for the index k of x0 in the image is:
-
-    	    k = floor(x - (width - 1) / 2.0 - e)
-
-Computing the result is then simply a matter of convolving all the
-pixels starting at k with all the samples in the matrix.
-
-
---- SEPARABLE_CONVOLUTION
-
-For this filter, x is first rounded to one of n regularly spaced
-subpixel positions. This subpixel position determines which of n
-convolution matrices is being used.
-
-Then, as in a regular convolution filter, the first pixel to be used
-is determined:
-
-    	k = floor (x - (width - 1) / 2.0 - e)
-
-and then the image pixels starting there are convolved with the chosen
-matrix. If we write x = xi + frac, where xi is an integer, we get
-
-	k = xi + floor (frac - (width - 1) / 2.0 - e)
-
-so the location of k relative to x is given by:
-
-    (k + 0.5 - x) = xi + floor (frac - (width - 1) / 2.0 - e) + 0.5 - x
-
-                  = floor (frac - (width - 1) / 2.0 - e) + 0.5 - frac
-
-which means the contents of the matrix corresponding to (frac) should
-contain width samplings of the function, with the first sample at:
-
-       floor (frac - (width - 1) / 2.0 - e) + 0.5 - frac
-     = ceil (frac - width / 2.0 - 0.5) + 0.5 - frac
-
-This filter is called separable because each of the k x k convolution
-matrices is specified with two k-wide vectors, one for each dimension,
-where each entry in the matrix is computed as the product of the
-corresponding entries in the vectors.
diff --git a/vendor/pixman/pixman/solaris-hwcap.mapfile b/vendor/pixman/pixman/solaris-hwcap.mapfile
deleted file mode 100644
index 87efce1e3..000000000
--- a/vendor/pixman/pixman/solaris-hwcap.mapfile
+++ /dev/null
@@ -1,30 +0,0 @@
-###############################################################################
-#
-# Copyright 2009, Oracle and/or its affiliates. All rights reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-#
-###############################################################################
-#
-# Override the linker's detection of CMOV/MMX/SSE instructions so this
-# library isn't flagged as only usable on CPU's with those ISA's, since it
-# checks at runtime for availability before calling them
-
-hwcap_1 = V0x0 FPU OVERRIDE;