From d38d0f30c4860dfae4091d4e064f9dcc804794b7 Mon Sep 17 00:00:00 2001 From: Qwerasd Date: Wed, 9 Oct 2024 19:31:13 -0400 Subject: [PATCH] font/sprite: replace pixman with z2d, extend Box coverage More complete coverage of the Symbols For Legacy Computing block, including characters from Unicode 16.0. Pixman and the web canvas impl for Canvas have been removed in favor of z2d for drawing, since it has a nicer API with more powerful methods, and is in Zig with no specific platform optimizations so should compile to wasm no problem. --- build.zig | 16 +- build.zig.zon | 5 +- nix/devShell.nix | 3 - nix/package.nix | 2 - pkg/pixman/build.zig | 122 - pkg/pixman/build.zig.zon | 13 - pkg/pixman/c.zig | 3 - pkg/pixman/error.zig | 4 - pkg/pixman/format.zig | 118 - pkg/pixman/image.zig | 211 - pkg/pixman/main.zig | 23 - pkg/pixman/pixman-version.h | 54 - pkg/pixman/types.zig | 131 - src/font/sprite.zig | 1 + src/font/sprite/Box.zig | 1120 ++- src/font/sprite/Face.zig | 80 +- src/font/sprite/Powerline.zig | 158 +- src/font/sprite/canvas.zig | 513 +- src/font/sprite/testdata/Box.ppm | Bin 1048593 -> 1048593 bytes src/font/sprite/underline.zig | 26 +- vendor/pixman/.editorconfig | 14 - vendor/pixman/.gitignore | 56 - vendor/pixman/.gitlab-ci.yml | 19 - vendor/pixman/AUTHORS | 0 vendor/pixman/CODING_STYLE | 199 - vendor/pixman/COPYING | 42 - vendor/pixman/ChangeLog | 0 vendor/pixman/INSTALL | 234 - vendor/pixman/Makefile.am | 143 - vendor/pixman/Makefile.win32 | 25 - vendor/pixman/Makefile.win32.common | 73 - vendor/pixman/NEWS | 0 vendor/pixman/README | 140 - vendor/pixman/RELEASING | 59 - vendor/pixman/a64-neon-test.S | 5 - vendor/pixman/arm-simd-test.S | 10 - vendor/pixman/autogen.sh | 14 - vendor/pixman/configure.ac | 1199 --- vendor/pixman/meson.build | 581 -- vendor/pixman/meson_options.txt | 128 - vendor/pixman/neon-test.S | 12 - vendor/pixman/pixman-1-uninstalled.pc.in | 5 - vendor/pixman/pixman-1.pc.in | 11 - vendor/pixman/pixman/Makefile.am | 158 - vendor/pixman/pixman/Makefile.sources | 43 - vendor/pixman/pixman/Makefile.win32 | 93 - .../pixman/pixman/dither/blue-noise-64x64.h | 77 - vendor/pixman/pixman/dither/make-blue-noise.c | 679 -- vendor/pixman/pixman/loongson-mmintrin.h | 412 -- vendor/pixman/pixman/make-srgb.pl | 115 - vendor/pixman/pixman/meson.build | 143 - .../pixman/pixman/pixman-access-accessors.c | 3 - vendor/pixman/pixman/pixman-access.c | 1715 ----- vendor/pixman/pixman/pixman-accessor.h | 25 - vendor/pixman/pixman/pixman-arm-asm.h | 37 - vendor/pixman/pixman/pixman-arm-common.h | 419 -- .../pixman/pixman/pixman-arm-detect-win32.asm | 21 - .../pixman/pixman-arm-neon-asm-bilinear.S | 1358 ---- vendor/pixman/pixman/pixman-arm-neon-asm.S | 3627 --------- vendor/pixman/pixman/pixman-arm-neon-asm.h | 1184 --- vendor/pixman/pixman/pixman-arm-neon.c | 493 -- .../pixman/pixman-arm-simd-asm-scaled.S | 156 - vendor/pixman/pixman/pixman-arm-simd-asm.S | 1179 --- vendor/pixman/pixman/pixman-arm-simd-asm.h | 966 --- vendor/pixman/pixman/pixman-arm-simd.c | 291 - vendor/pixman/pixman/pixman-arm.c | 256 - .../pixman/pixman-arma64-neon-asm-bilinear.S | 1275 ---- vendor/pixman/pixman/pixman-arma64-neon-asm.S | 3704 ---------- vendor/pixman/pixman/pixman-arma64-neon-asm.h | 1310 ---- vendor/pixman/pixman/pixman-bits-image.c | 1383 ---- vendor/pixman/pixman/pixman-combine-float.c | 1158 --- vendor/pixman/pixman/pixman-combine32.c | 1189 --- vendor/pixman/pixman/pixman-combine32.h | 272 - vendor/pixman/pixman/pixman-compiler.h | 234 - .../pixman/pixman/pixman-conical-gradient.c | 220 - vendor/pixman/pixman/pixman-edge-accessors.c | 4 - vendor/pixman/pixman/pixman-edge-imp.h | 182 - vendor/pixman/pixman/pixman-edge.c | 385 - vendor/pixman/pixman/pixman-fast-path.c | 3298 --------- vendor/pixman/pixman/pixman-filter.c | 491 -- vendor/pixman/pixman/pixman-general.c | 264 - vendor/pixman/pixman/pixman-glyph.c | 676 -- vendor/pixman/pixman/pixman-gradient-walker.c | 264 - vendor/pixman/pixman/pixman-image.c | 994 --- vendor/pixman/pixman/pixman-implementation.c | 417 -- vendor/pixman/pixman/pixman-inlines.h | 1365 ---- vendor/pixman/pixman/pixman-linear-gradient.c | 292 - vendor/pixman/pixman/pixman-matrix.c | 1073 --- vendor/pixman/pixman/pixman-mips-dspr2-asm.S | 4283 ----------- vendor/pixman/pixman/pixman-mips-dspr2-asm.h | 711 -- vendor/pixman/pixman/pixman-mips-dspr2.c | 459 -- vendor/pixman/pixman/pixman-mips-dspr2.h | 432 -- vendor/pixman/pixman/pixman-mips-memcpy-asm.S | 382 - vendor/pixman/pixman/pixman-mips.c | 94 - vendor/pixman/pixman/pixman-mmx.c | 4153 ----------- vendor/pixman/pixman/pixman-noop.c | 161 - vendor/pixman/pixman/pixman-ppc.c | 173 - vendor/pixman/pixman/pixman-private.h | 1193 --- vendor/pixman/pixman/pixman-radial-gradient.c | 509 -- vendor/pixman/pixman/pixman-region.c | 2800 ------- vendor/pixman/pixman/pixman-region16.c | 67 - vendor/pixman/pixman/pixman-region32.c | 47 - vendor/pixman/pixman/pixman-solid-fill.c | 67 - vendor/pixman/pixman/pixman-sse2.c | 6528 ----------------- vendor/pixman/pixman/pixman-ssse3.c | 351 - vendor/pixman/pixman/pixman-timer.c | 66 - vendor/pixman/pixman/pixman-trap.c | 711 -- vendor/pixman/pixman/pixman-utils.c | 330 - vendor/pixman/pixman/pixman-version.h.in | 54 - vendor/pixman/pixman/pixman-vmx.c | 3159 -------- vendor/pixman/pixman/pixman-x86.c | 249 - vendor/pixman/pixman/pixman.c | 1134 --- vendor/pixman/pixman/pixman.h | 1426 ---- vendor/pixman/pixman/rounding.txt | 168 - vendor/pixman/pixman/solaris-hwcap.mapfile | 30 - 115 files changed, 1029 insertions(+), 68180 deletions(-) delete mode 100644 pkg/pixman/build.zig delete mode 100644 pkg/pixman/build.zig.zon delete mode 100644 pkg/pixman/c.zig delete mode 100644 pkg/pixman/error.zig delete mode 100644 pkg/pixman/format.zig delete mode 100644 pkg/pixman/image.zig delete mode 100644 pkg/pixman/main.zig delete mode 100644 pkg/pixman/pixman-version.h delete mode 100644 pkg/pixman/types.zig delete mode 100644 vendor/pixman/.editorconfig delete mode 100644 vendor/pixman/.gitignore delete mode 100644 vendor/pixman/.gitlab-ci.yml delete mode 100644 vendor/pixman/AUTHORS delete mode 100644 vendor/pixman/CODING_STYLE delete mode 100644 vendor/pixman/COPYING delete mode 100644 vendor/pixman/ChangeLog delete mode 100644 vendor/pixman/INSTALL delete mode 100644 vendor/pixman/Makefile.am delete mode 100644 vendor/pixman/Makefile.win32 delete mode 100644 vendor/pixman/Makefile.win32.common delete mode 100644 vendor/pixman/NEWS delete mode 100644 vendor/pixman/README delete mode 100644 vendor/pixman/RELEASING delete mode 100644 vendor/pixman/a64-neon-test.S delete mode 100644 vendor/pixman/arm-simd-test.S delete mode 100755 vendor/pixman/autogen.sh delete mode 100644 vendor/pixman/configure.ac delete mode 100644 vendor/pixman/meson.build delete mode 100644 vendor/pixman/meson_options.txt delete mode 100644 vendor/pixman/neon-test.S delete mode 100644 vendor/pixman/pixman-1-uninstalled.pc.in delete mode 100644 vendor/pixman/pixman-1.pc.in delete mode 100644 vendor/pixman/pixman/Makefile.am delete mode 100644 vendor/pixman/pixman/Makefile.sources delete mode 100644 vendor/pixman/pixman/Makefile.win32 delete mode 100644 vendor/pixman/pixman/dither/blue-noise-64x64.h delete mode 100644 vendor/pixman/pixman/dither/make-blue-noise.c delete mode 100644 vendor/pixman/pixman/loongson-mmintrin.h delete mode 100644 vendor/pixman/pixman/make-srgb.pl delete mode 100644 vendor/pixman/pixman/meson.build delete mode 100644 vendor/pixman/pixman/pixman-access-accessors.c delete mode 100644 vendor/pixman/pixman/pixman-access.c delete mode 100644 vendor/pixman/pixman/pixman-accessor.h delete mode 100644 vendor/pixman/pixman/pixman-arm-asm.h delete mode 100644 vendor/pixman/pixman/pixman-arm-common.h delete mode 100644 vendor/pixman/pixman/pixman-arm-detect-win32.asm delete mode 100644 vendor/pixman/pixman/pixman-arm-neon-asm-bilinear.S delete mode 100644 vendor/pixman/pixman/pixman-arm-neon-asm.S delete mode 100644 vendor/pixman/pixman/pixman-arm-neon-asm.h delete mode 100644 vendor/pixman/pixman/pixman-arm-neon.c delete mode 100644 vendor/pixman/pixman/pixman-arm-simd-asm-scaled.S delete mode 100644 vendor/pixman/pixman/pixman-arm-simd-asm.S delete mode 100644 vendor/pixman/pixman/pixman-arm-simd-asm.h delete mode 100644 vendor/pixman/pixman/pixman-arm-simd.c delete mode 100644 vendor/pixman/pixman/pixman-arm.c delete mode 100644 vendor/pixman/pixman/pixman-arma64-neon-asm-bilinear.S delete mode 100644 vendor/pixman/pixman/pixman-arma64-neon-asm.S delete mode 100644 vendor/pixman/pixman/pixman-arma64-neon-asm.h delete mode 100644 vendor/pixman/pixman/pixman-bits-image.c delete mode 100644 vendor/pixman/pixman/pixman-combine-float.c delete mode 100644 vendor/pixman/pixman/pixman-combine32.c delete mode 100644 vendor/pixman/pixman/pixman-combine32.h delete mode 100644 vendor/pixman/pixman/pixman-compiler.h delete mode 100644 vendor/pixman/pixman/pixman-conical-gradient.c delete mode 100644 vendor/pixman/pixman/pixman-edge-accessors.c delete mode 100644 vendor/pixman/pixman/pixman-edge-imp.h delete mode 100644 vendor/pixman/pixman/pixman-edge.c delete mode 100644 vendor/pixman/pixman/pixman-fast-path.c delete mode 100644 vendor/pixman/pixman/pixman-filter.c delete mode 100644 vendor/pixman/pixman/pixman-general.c delete mode 100644 vendor/pixman/pixman/pixman-glyph.c delete mode 100644 vendor/pixman/pixman/pixman-gradient-walker.c delete mode 100644 vendor/pixman/pixman/pixman-image.c delete mode 100644 vendor/pixman/pixman/pixman-implementation.c delete mode 100644 vendor/pixman/pixman/pixman-inlines.h delete mode 100644 vendor/pixman/pixman/pixman-linear-gradient.c delete mode 100644 vendor/pixman/pixman/pixman-matrix.c delete mode 100644 vendor/pixman/pixman/pixman-mips-dspr2-asm.S delete mode 100644 vendor/pixman/pixman/pixman-mips-dspr2-asm.h delete mode 100644 vendor/pixman/pixman/pixman-mips-dspr2.c delete mode 100644 vendor/pixman/pixman/pixman-mips-dspr2.h delete mode 100644 vendor/pixman/pixman/pixman-mips-memcpy-asm.S delete mode 100644 vendor/pixman/pixman/pixman-mips.c delete mode 100644 vendor/pixman/pixman/pixman-mmx.c delete mode 100644 vendor/pixman/pixman/pixman-noop.c delete mode 100644 vendor/pixman/pixman/pixman-ppc.c delete mode 100644 vendor/pixman/pixman/pixman-private.h delete mode 100644 vendor/pixman/pixman/pixman-radial-gradient.c delete mode 100644 vendor/pixman/pixman/pixman-region.c delete mode 100644 vendor/pixman/pixman/pixman-region16.c delete mode 100644 vendor/pixman/pixman/pixman-region32.c delete mode 100644 vendor/pixman/pixman/pixman-solid-fill.c delete mode 100644 vendor/pixman/pixman/pixman-sse2.c delete mode 100644 vendor/pixman/pixman/pixman-ssse3.c delete mode 100644 vendor/pixman/pixman/pixman-timer.c delete mode 100644 vendor/pixman/pixman/pixman-trap.c delete mode 100644 vendor/pixman/pixman/pixman-utils.c delete mode 100644 vendor/pixman/pixman/pixman-version.h.in delete mode 100644 vendor/pixman/pixman/pixman-vmx.c delete mode 100644 vendor/pixman/pixman/pixman-x86.c delete mode 100644 vendor/pixman/pixman/pixman.c delete mode 100644 vendor/pixman/pixman/pixman.h delete mode 100644 vendor/pixman/pixman/rounding.txt delete mode 100644 vendor/pixman/pixman/solaris-hwcap.mapfile diff --git a/build.zig b/build.zig index 0c9cc9441..25b5e5453 100644 --- a/build.zig +++ b/build.zig @@ -1007,10 +1007,6 @@ fn addDeps( .optimize = optimize, }); const opengl_dep = b.dependency("opengl", .{}); - const pixman_dep = b.dependency("pixman", .{ - .target = target, - .optimize = optimize, - }); const sentry_dep = b.dependency("sentry", .{ .target = target, .optimize = optimize, @@ -1044,6 +1040,7 @@ fn addDeps( .target = target, .optimize = optimize, }); + const z2d_dep = b.dependency("z2d", .{}); // Wasm we do manually since it is such a different build. if (step.rootModuleTarget().cpu.arch == .wasm32) { @@ -1125,12 +1122,16 @@ fn addDeps( step.root_module.addImport("spirv_cross", spirv_cross_dep.module("spirv_cross")); step.root_module.addImport("xev", libxev_dep.module("xev")); step.root_module.addImport("opengl", opengl_dep.module("opengl")); - step.root_module.addImport("pixman", pixman_dep.module("pixman")); step.root_module.addImport("sentry", sentry_dep.module("sentry")); step.root_module.addImport("ziglyph", ziglyph_dep.module("ziglyph")); step.root_module.addImport("vaxis", vaxis_dep.module("vaxis")); step.root_module.addImport("wuffs", wuffs_dep.module("wuffs")); step.root_module.addImport("zf", zf_dep.module("zf")); + step.root_module.addImport("z2d", b.addModule("z2d", .{ + .root_source_file = z2d_dep.path("src/z2d.zig"), + .target = target, + .optimize = optimize, + })); // Mac Stuff if (step.rootModuleTarget().isDarwin()) { @@ -1196,7 +1197,6 @@ fn addDeps( step.linkSystemLibrary2("freetype2", dynamic_link_opts); step.linkSystemLibrary2("libpng", dynamic_link_opts); step.linkSystemLibrary2("oniguruma", dynamic_link_opts); - step.linkSystemLibrary2("pixman-1", dynamic_link_opts); step.linkSystemLibrary2("zlib", dynamic_link_opts); if (config.font_backend.hasFontconfig()) { @@ -1222,10 +1222,6 @@ fn addDeps( step.linkLibrary(freetype_dep.artifact("freetype")); try static_libs.append(freetype_dep.artifact("freetype").getEmittedBin()); - // Pixman - step.linkLibrary(pixman_dep.artifact("pixman")); - try static_libs.append(pixman_dep.artifact("pixman").getEmittedBin()); - // Harfbuzz if (config.font_backend.hasHarfbuzz()) { step.linkLibrary(harfbuzz_dep.artifact("harfbuzz")); diff --git a/build.zig.zon b/build.zig.zon index 3722bacc2..b0c409778 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -36,7 +36,6 @@ .macos = .{ .path = "./pkg/macos" }, .oniguruma = .{ .path = "./pkg/oniguruma" }, .opengl = .{ .path = "./pkg/opengl" }, - .pixman = .{ .path = "./pkg/pixman" }, .sentry = .{ .path = "./pkg/sentry" }, .simdutf = .{ .path = "./pkg/simdutf" }, .utfcpp = .{ .path = "./pkg/utfcpp" }, @@ -61,5 +60,9 @@ .url = "git+https://github.com/natecraddock/zf.git?ref=main#bb27a917c3513785c6a91f0b1c10002a5029cacc", .hash = "1220a74107c7f153a2f809e41c7fa7e8dbf75c91043e39fad998247804e5edac2cc8", }, + .z2d = .{ + .url = "git+https://github.com/vancluever/z2d?ref=main#285a796eb9c25a2389f087d008f0e60faf0b8eda", + .hash = "12206445aa45bcf0170ace371905f705aec1d8d4f61e7dd77839c6621b8c407680a5", + }, }, } diff --git a/nix/devShell.nix b/nix/devShell.nix index ed83407fb..a924d853e 100644 --- a/nix/devShell.nix +++ b/nix/devShell.nix @@ -41,7 +41,6 @@ libXi, libXinerama, libXrandr, - pixman, zlib, alejandra, pandoc, @@ -61,7 +60,6 @@ harfbuzz libpng oniguruma - pixman zlib libX11 @@ -126,7 +124,6 @@ in harfbuzz libpng oniguruma - pixman zlib libX11 diff --git a/nix/package.nix b/nix/package.nix index af7587900..668663dc7 100644 --- a/nix/package.nix +++ b/nix/package.nix @@ -7,7 +7,6 @@ freetype, harfbuzz, libpng, - pixman, zlib, libGL, libX11, @@ -133,7 +132,6 @@ in freetype harfbuzz libpng - pixman zlib libX11 diff --git a/pkg/pixman/build.zig b/pkg/pixman/build.zig deleted file mode 100644 index b1338ed25..000000000 --- a/pkg/pixman/build.zig +++ /dev/null @@ -1,122 +0,0 @@ -const std = @import("std"); - -pub fn build(b: *std.Build) !void { - const target = b.standardTargetOptions(.{}); - const optimize = b.standardOptimizeOption(.{}); - - const module = b.addModule("pixman", .{ .root_source_file = b.path("main.zig") }); - - const upstream = b.dependency("pixman", .{}); - const lib = b.addStaticLibrary(.{ - .name = "pixman", - .target = target, - .optimize = optimize, - }); - lib.linkLibC(); - if (target.result.os.tag != .windows) { - lib.linkSystemLibrary("pthread"); - } - if (target.result.isDarwin()) { - const apple_sdk = @import("apple_sdk"); - try apple_sdk.addPaths(b, &lib.root_module); - } - - lib.addIncludePath(upstream.path("")); - lib.addIncludePath(b.path("")); - module.addIncludePath(upstream.path("pixman")); - module.addIncludePath(b.path("")); - - var flags = std.ArrayList([]const u8).init(b.allocator); - defer flags.deinit(); - try flags.appendSlice(&.{ - "-DHAVE_SIGACTION=1", - "-DHAVE_ALARM=1", - "-DHAVE_MPROTECT=1", - "-DHAVE_GETPAGESIZE=1", - "-DHAVE_MMAP=1", - "-DHAVE_GETISAX=1", - "-DHAVE_GETTIMEOFDAY=1", - - "-DHAVE_FENV_H=1", - "-DHAVE_SYS_MMAN_H=1", - "-DHAVE_UNISTD_H=1", - - "-DSIZEOF_LONG=8", - "-DPACKAGE=foo", - - // There is ubsan - "-fno-sanitize=undefined", - "-fno-sanitize-trap=undefined", - }); - if (!(target.result.os.tag == .windows)) { - try flags.appendSlice(&.{ - "-DHAVE_PTHREADS=1", - - "-DHAVE_POSIX_MEMALIGN=1", - }); - } - - lib.addCSourceFiles(.{ - .root = upstream.path(""), - .files = srcs, - .flags = flags.items, - }); - - lib.installHeader(b.path("pixman-version.h"), "pixman-version.h"); - lib.installHeadersDirectory( - upstream.path("pixman"), - "", - .{ .include_extensions = &.{".h"} }, - ); - - b.installArtifact(lib); - - if (target.query.isNative()) { - const test_exe = b.addTest(.{ - .name = "test", - .root_source_file = b.path("main.zig"), - .target = target, - .optimize = optimize, - }); - test_exe.linkLibrary(lib); - var it = module.import_table.iterator(); - while (it.next()) |entry| test_exe.root_module.addImport(entry.key_ptr.*, entry.value_ptr.*); - - const tests_run = b.addRunArtifact(test_exe); - const test_step = b.step("test", "Run tests"); - test_step.dependOn(&tests_run.step); - } -} - -const srcs: []const []const u8 = &.{ - "pixman/pixman.c", - "pixman/pixman-access.c", - "pixman/pixman-access-accessors.c", - "pixman/pixman-bits-image.c", - "pixman/pixman-combine32.c", - "pixman/pixman-combine-float.c", - "pixman/pixman-conical-gradient.c", - "pixman/pixman-filter.c", - "pixman/pixman-x86.c", - "pixman/pixman-mips.c", - "pixman/pixman-arm.c", - "pixman/pixman-ppc.c", - "pixman/pixman-edge.c", - "pixman/pixman-edge-accessors.c", - "pixman/pixman-fast-path.c", - "pixman/pixman-glyph.c", - "pixman/pixman-general.c", - "pixman/pixman-gradient-walker.c", - "pixman/pixman-image.c", - "pixman/pixman-implementation.c", - "pixman/pixman-linear-gradient.c", - "pixman/pixman-matrix.c", - "pixman/pixman-noop.c", - "pixman/pixman-radial-gradient.c", - "pixman/pixman-region16.c", - "pixman/pixman-region32.c", - "pixman/pixman-solid-fill.c", - //"pixman/pixman-timer.c", - "pixman/pixman-trap.c", - "pixman/pixman-utils.c", -}; diff --git a/pkg/pixman/build.zig.zon b/pkg/pixman/build.zig.zon deleted file mode 100644 index af6813e07..000000000 --- a/pkg/pixman/build.zig.zon +++ /dev/null @@ -1,13 +0,0 @@ -.{ - .name = "pixman", - .version = "0.42.2", - .paths = .{""}, - .dependencies = .{ - .pixman = .{ - .url = "https://deps.files.ghostty.dev/pixman-pixman-0.42.2.tar.gz", - .hash = "12209b9206f9a5d31ccd9a2312cc72cb9dfc3e034aee1883c549dc1d753fae457230", - }, - - .apple_sdk = .{ .path = "../apple-sdk" }, - }, -} diff --git a/pkg/pixman/c.zig b/pkg/pixman/c.zig deleted file mode 100644 index 912dd7fbc..000000000 --- a/pkg/pixman/c.zig +++ /dev/null @@ -1,3 +0,0 @@ -pub const c = @cImport({ - @cInclude("pixman.h"); -}); diff --git a/pkg/pixman/error.zig b/pkg/pixman/error.zig deleted file mode 100644 index 2fa569aed..000000000 --- a/pkg/pixman/error.zig +++ /dev/null @@ -1,4 +0,0 @@ -pub const Error = error{ - // Pixman doesn't really have errors so we just have a single error. - PixmanFailure, -}; diff --git a/pkg/pixman/format.zig b/pkg/pixman/format.zig deleted file mode 100644 index 1176d5212..000000000 --- a/pkg/pixman/format.zig +++ /dev/null @@ -1,118 +0,0 @@ -const std = @import("std"); -const c = @import("c.zig").c; -const pixman = @import("main.zig"); - -pub const FormatCode = enum(c_uint) { - // 128bpp formats - rgba_float = c.PIXMAN_FORMAT_BYTE(128, c.PIXMAN_TYPE_RGBA_FLOAT, 32, 32, 32, 32), - - // 96bpp formats - rgb_float = c.PIXMAN_FORMAT_BYTE(96, c.PIXMAN_TYPE_RGBA_FLOAT, 0, 32, 32, 32), - - // 32bpp formats - a8r8g8b8 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ARGB, 8, 8, 8, 8), - x8r8g8b8 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ARGB, 0, 8, 8, 8), - a8b8g8r8 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ABGR, 8, 8, 8, 8), - x8b8g8r8 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ABGR, 0, 8, 8, 8), - b8g8r8a8 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_BGRA, 8, 8, 8, 8), - b8g8r8x8 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_BGRA, 0, 8, 8, 8), - r8g8b8a8 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_RGBA, 8, 8, 8, 8), - r8g8b8x8 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_RGBA, 0, 8, 8, 8), - x14r6g6b6 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ARGB, 0, 6, 6, 6), - x2r10g10b10 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ARGB, 0, 10, 10, 10), - a2r10g10b10 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ARGB, 2, 10, 10, 10), - x2b10g10r10 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ABGR, 0, 10, 10, 10), - a2b10g10r10 = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ABGR, 2, 10, 10, 10), - - // sRGB formats - a8r8g8b8_sRGB = c.PIXMAN_FORMAT(32, c.PIXMAN_TYPE_ARGB_SRGB, 8, 8, 8, 8), - r8g8b8_sRGB = c.PIXMAN_FORMAT(24, c.PIXMAN_TYPE_ARGB_SRGB, 0, 8, 8, 8), - - // 24bpp formats - r8g8b8 = c.PIXMAN_FORMAT(24, c.PIXMAN_TYPE_ARGB, 0, 8, 8, 8), - b8g8r8 = c.PIXMAN_FORMAT(24, c.PIXMAN_TYPE_ABGR, 0, 8, 8, 8), - - // 16bpp formats - r5g6b5 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ARGB, 0, 5, 6, 5), - b5g6r5 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ABGR, 0, 5, 6, 5), - - a1r5g5b5 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ARGB, 1, 5, 5, 5), - x1r5g5b5 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ARGB, 0, 5, 5, 5), - a1b5g5r5 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ABGR, 1, 5, 5, 5), - x1b5g5r5 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ABGR, 0, 5, 5, 5), - a4r4g4b4 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ARGB, 4, 4, 4, 4), - x4r4g4b4 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ARGB, 0, 4, 4, 4), - a4b4g4r4 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ABGR, 4, 4, 4, 4), - x4b4g4r4 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_ABGR, 0, 4, 4, 4), - - // 8bpp formats - a8 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_A, 8, 0, 0, 0), - r3g3b2 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_ARGB, 0, 3, 3, 2), - b2g3r3 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_ABGR, 0, 3, 3, 2), - a2r2g2b2 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_ARGB, 2, 2, 2, 2), - a2b2g2r2 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_ABGR, 2, 2, 2, 2), - - c8 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_COLOR, 0, 0, 0, 0), - g8 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_GRAY, 0, 0, 0, 0), - - x4a4 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_A, 4, 0, 0, 0), - - // c8/g8 equivalent - // x4c4 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_COLOR, 0, 0, 0, 0), - // x4g4 = c.PIXMAN_FORMAT(8, c.PIXMAN_TYPE_GRAY, 0, 0, 0, 0), - - // 4bpp formats - a4 = c.PIXMAN_FORMAT(4, c.PIXMAN_TYPE_A, 4, 0, 0, 0), - r1g2b1 = c.PIXMAN_FORMAT(4, c.PIXMAN_TYPE_ARGB, 0, 1, 2, 1), - b1g2r1 = c.PIXMAN_FORMAT(4, c.PIXMAN_TYPE_ABGR, 0, 1, 2, 1), - a1r1g1b1 = c.PIXMAN_FORMAT(4, c.PIXMAN_TYPE_ARGB, 1, 1, 1, 1), - a1b1g1r1 = c.PIXMAN_FORMAT(4, c.PIXMAN_TYPE_ABGR, 1, 1, 1, 1), - - c4 = c.PIXMAN_FORMAT(4, c.PIXMAN_TYPE_COLOR, 0, 0, 0, 0), - g4 = c.PIXMAN_FORMAT(4, c.PIXMAN_TYPE_GRAY, 0, 0, 0, 0), - - // 1bpp formats - a1 = c.PIXMAN_FORMAT(1, c.PIXMAN_TYPE_A, 1, 0, 0, 0), - - g1 = c.PIXMAN_FORMAT(1, c.PIXMAN_TYPE_GRAY, 0, 0, 0, 0), - - // YUV formats - yuy2 = c.PIXMAN_FORMAT(16, c.PIXMAN_TYPE_YUY2, 0, 0, 0, 0), - yv12 = c.PIXMAN_FORMAT(12, c.PIXMAN_TYPE_YV12, 0, 0, 0, 0), - - pub inline fn bpp(self: FormatCode) u32 { - return self.reshift(24, 8); - } - - /// Calculates a valid stride for the bpp and width. Based on Cairo. - pub fn strideForWidth(self: FormatCode, width: u32) c_int { - const alignment = @sizeOf(u32); - const val = @as(c_int, @intCast((self.bpp() * width + 7) / 8)); - return val + (alignment - 1) & -alignment; - } - - // Converted from pixman.h - fn reshift(self: FormatCode, ofs: u5, num: u5) u32 { - const val = @intFromEnum(self); - const v1 = val >> ofs; - const v2 = @as(c_uint, 1) << num; - const v3 = @as(u5, @intCast((val >> 22) & 3)); - return ((v1 & (v2 - 1)) << v3); - } -}; - -test "bpp" { - const testing = std.testing; - - try testing.expectEqual(@as(u32, 1), FormatCode.g1.bpp()); - try testing.expectEqual(@as(u32, 4), FormatCode.g4.bpp()); - try testing.expectEqual(@as(u32, 8), FormatCode.g8.bpp()); -} - -test "stride" { - const testing = std.testing; - - try testing.expectEqual(@as(c_int, 4), FormatCode.g1.strideForWidth(10)); - try testing.expectEqual(@as(c_int, 8), FormatCode.g4.strideForWidth(10)); - try testing.expectEqual(@as(c_int, 12), FormatCode.g8.strideForWidth(10)); -} diff --git a/pkg/pixman/image.zig b/pkg/pixman/image.zig deleted file mode 100644 index 76cb45922..000000000 --- a/pkg/pixman/image.zig +++ /dev/null @@ -1,211 +0,0 @@ -const std = @import("std"); -const c = @import("c.zig").c; -const pixman = @import("main.zig"); - -pub const Image = opaque { - pub fn createBitsNoClear( - format: pixman.FormatCode, - width: c_int, - height: c_int, - bits: [*]u32, - stride: c_int, - ) pixman.Error!*Image { - return @as(?*Image, @ptrCast(c.pixman_image_create_bits_no_clear( - @intFromEnum(format), - width, - height, - bits, - stride, - ))) orelse return pixman.Error.PixmanFailure; - } - - pub fn createSolidFill( - color: pixman.Color, - ) pixman.Error!*Image { - return @as(?*Image, @ptrCast(c.pixman_image_create_solid_fill( - @ptrCast(&color), - ))) orelse return pixman.Error.PixmanFailure; - } - - pub fn unref(self: *Image) bool { - return c.pixman_image_unref(@ptrCast(self)) == 1; - } - - /// A variant of getDataUnsafe that sets the length of the slice to - /// height * stride. Its possible the buffer is larger but this is the - /// known safe values. If you KNOW the buffer is larger you can use the - /// unsafe variant. - pub fn getData(self: *Image) []u32 { - const height = self.getHeight(); - const stride = self.getStride(); - const ptr = self.getDataUnsafe(); - const len = @as(usize, @intCast(height * stride)); - return ptr[0..len]; - } - - pub fn getDataUnsafe(self: *Image) [*]u32 { - return c.pixman_image_get_data(@ptrCast(self)); - } - - pub fn getHeight(self: *Image) c_int { - return c.pixman_image_get_height(@ptrCast(self)); - } - - pub fn getWidth(self: *Image) c_int { - return c.pixman_image_get_width(@ptrCast(self)); - } - - pub fn getStride(self: *Image) c_int { - return c.pixman_image_get_stride(@ptrCast(self)); - } - - pub fn fillBoxes( - self: *Image, - op: pixman.Op, - color: pixman.Color, - boxes: []const pixman.Box32, - ) pixman.Error!void { - if (c.pixman_image_fill_boxes( - @intFromEnum(op), - @ptrCast(self), - @ptrCast(&color), - @intCast(boxes.len), - @ptrCast(boxes.ptr), - ) == 0) return pixman.Error.PixmanFailure; - } - - pub fn fillRectangles( - self: *Image, - op: pixman.Op, - color: pixman.Color, - rects: []const pixman.Rectangle16, - ) pixman.Error!void { - if (c.pixman_image_fill_rectangles( - @intFromEnum(op), - @ptrCast(self), - @ptrCast(&color), - @intCast(rects.len), - @ptrCast(rects.ptr), - ) == 0) return pixman.Error.PixmanFailure; - } - - pub fn rasterizeTrapezoid( - self: *Image, - trap: pixman.Trapezoid, - x_off: c_int, - y_off: c_int, - ) void { - c.pixman_rasterize_trapezoid( - @ptrCast(self), - @ptrCast(&trap), - x_off, - y_off, - ); - } - - pub fn composite( - self: *Image, - op: pixman.Op, - src: *Image, - mask: ?*Image, - src_x: i16, - src_y: i16, - mask_x: i16, - mask_y: i16, - dest_x: i16, - dest_y: i16, - width: u16, - height: u16, - ) void { - c.pixman_image_composite( - @intFromEnum(op), - @ptrCast(src), - @ptrCast(mask), - @ptrCast(self), - src_x, - src_y, - mask_x, - mask_y, - dest_x, - dest_y, - width, - height, - ); - } - - pub fn compositeTriangles( - self: *Image, - op: pixman.Op, - src: *Image, - mask_format: pixman.FormatCode, - x_src: c_int, - y_src: c_int, - x_dst: c_int, - y_dst: c_int, - tris: []const pixman.Triangle, - ) void { - c.pixman_composite_triangles( - @intFromEnum(op), - @ptrCast(src), - @ptrCast(self), - @intFromEnum(mask_format), - x_src, - y_src, - x_dst, - y_dst, - @intCast(tris.len), - @ptrCast(tris.ptr), - ); - } -}; - -test "create and destroy" { - const testing = std.testing; - const alloc = testing.allocator; - - const width = 10; - const height = 10; - const format: pixman.FormatCode = .g1; - const stride = format.strideForWidth(width); - - const len = height * @as(usize, @intCast(stride)); - const data = try alloc.alloc(u32, len); - defer alloc.free(data); - @memset(data, 0); - const img = try Image.createBitsNoClear(.g1, width, height, data.ptr, stride); - try testing.expectEqual(@as(c_int, height), img.getHeight()); - try testing.expectEqual(@as(c_int, stride), img.getStride()); - try testing.expect(img.getData().len == height * stride); - try testing.expect(img.unref()); -} - -test "fill boxes a1" { - const testing = std.testing; - const alloc = testing.allocator; - - // Dimensions - const width = 100; - const height = 100; - const format: pixman.FormatCode = .a1; - const stride = format.strideForWidth(width); - - // Image - const len = height * @as(usize, @intCast(stride)); - const data = try alloc.alloc(u32, len); - defer alloc.free(data); - @memset(data, 0); - const img = try Image.createBitsNoClear(format, width, height, data.ptr, stride); - defer _ = img.unref(); - - // Fill - const color: pixman.Color = .{ .red = 0xFFFF, .green = 0xFFFF, .blue = 0xFFFF, .alpha = 0xFFFF }; - const boxes = &[_]pixman.Box32{ - .{ - .x1 = 0, - .y1 = 0, - .x2 = width, - .y2 = height, - }, - }; - try img.fillBoxes(.src, color, boxes); -} diff --git a/pkg/pixman/main.zig b/pkg/pixman/main.zig deleted file mode 100644 index 33ea19127..000000000 --- a/pkg/pixman/main.zig +++ /dev/null @@ -1,23 +0,0 @@ -const std = @import("std"); -const format = @import("format.zig"); -const image = @import("image.zig"); -const types = @import("types.zig"); - -pub const c = @import("c.zig").c; -pub const Color = types.Color; -pub const Error = @import("error.zig").Error; -pub const Fixed = types.Fixed; -pub const FormatCode = format.FormatCode; -pub const Image = image.Image; -pub const Op = types.Op; -pub const PointFixed = types.PointFixed; -pub const LineFixed = types.LineFixed; -pub const Triangle = types.Triangle; -pub const Trapezoid = types.Trapezoid; -pub const Rectangle16 = types.Rectangle16; -pub const Box32 = types.Box32; -pub const Indexed = types.Indexed; - -test { - std.testing.refAllDecls(@This()); -} diff --git a/pkg/pixman/pixman-version.h b/pkg/pixman/pixman-version.h deleted file mode 100644 index c2342d3d5..000000000 --- a/pkg/pixman/pixman-version.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright © 2008 Red Hat, Inc. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Author: Carl D. Worth - */ - -#ifndef PIXMAN_VERSION_H__ -#define PIXMAN_VERSION_H__ - -#ifndef PIXMAN_H__ -# error pixman-version.h should only be included by pixman.h -#endif - -#define PIXMAN_VERSION_MAJOR 999 -#define PIXMAN_VERSION_MINOR 999 -#define PIXMAN_VERSION_MICRO 999 - -#define PIXMAN_VERSION_STRING "999.999.999" - -#define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \ - ((major) * 10000) \ - + ((minor) * 100) \ - + ((micro) * 1)) - -#define PIXMAN_VERSION PIXMAN_VERSION_ENCODE( \ - PIXMAN_VERSION_MAJOR, \ - PIXMAN_VERSION_MINOR, \ - PIXMAN_VERSION_MICRO) - -#ifndef PIXMAN_API -# define PIXMAN_API -#endif - -#endif /* PIXMAN_VERSION_H__ */ diff --git a/pkg/pixman/types.zig b/pkg/pixman/types.zig deleted file mode 100644 index 3547bd1d5..000000000 --- a/pkg/pixman/types.zig +++ /dev/null @@ -1,131 +0,0 @@ -const std = @import("std"); -const c = @import("c.zig").c; -const pixman = @import("main.zig"); - -pub const Op = enum(c_uint) { - clear = 0x00, - src = 0x01, - dst = 0x02, - over = 0x03, - over_reverse = 0x04, - in = 0x05, - in_reverse = 0x06, - out = 0x07, - out_reverse = 0x08, - atop = 0x09, - atop_reverse = 0x0a, - xor = 0x0b, - add = 0x0c, - saturate = 0x0d, - - disjoint_clear = 0x10, - disjoint_src = 0x11, - disjoint_dst = 0x12, - disjoint_over = 0x13, - disjoint_over_reverse = 0x14, - disjoint_in = 0x15, - disjoint_in_reverse = 0x16, - disjoint_out = 0x17, - disjoint_out_reverse = 0x18, - disjoint_atop = 0x19, - disjoint_atop_reverse = 0x1a, - disjoint_xor = 0x1b, - - conjoint_clear = 0x20, - conjoint_src = 0x21, - conjoint_dst = 0x22, - conjoint_over = 0x23, - conjoint_over_reverse = 0x24, - conjoint_in = 0x25, - conjoint_in_reverse = 0x26, - conjoint_out = 0x27, - conjoint_out_reverse = 0x28, - conjoint_atop = 0x29, - conjoint_atop_reverse = 0x2a, - conjoint_xor = 0x2b, - - multiply = 0x30, - screen = 0x31, - overlay = 0x32, - darken = 0x33, - lighten = 0x34, - color_dodge = 0x35, - color_burn = 0x36, - hard_light = 0x37, - soft_light = 0x38, - difference = 0x39, - exclusion = 0x3a, - hsl_hue = 0x3b, - hsl_saturation = 0x3c, - hsl_color = 0x3d, - hsl_luminosity = 0x3e, -}; - -pub const Color = extern struct { - red: u16, - green: u16, - blue: u16, - alpha: u16, -}; - -pub const Fixed = enum(i32) { - _, - - pub fn init(v: anytype) Fixed { - return switch (@TypeOf(v)) { - comptime_int, i32, u32 => @enumFromInt(v << 16), - f64 => @enumFromInt(@as(i32, @intFromFloat(v * 65536))), - else => { - @compileLog(@TypeOf(v)); - @compileError("unsupported type"); - }, - }; - } -}; - -pub const PointFixed = extern struct { - x: Fixed, - y: Fixed, -}; - -pub const LineFixed = extern struct { - p1: PointFixed, - p2: PointFixed, -}; - -pub const Triangle = extern struct { - p1: PointFixed, - p2: PointFixed, - p3: PointFixed, -}; - -pub const Trapezoid = extern struct { - top: Fixed, - bottom: Fixed, - left: LineFixed, - right: LineFixed, -}; - -pub const Rectangle16 = extern struct { - x: i16, - y: i16, - width: u16, - height: u16, -}; - -pub const Box32 = extern struct { - x1: i32, - y1: i32, - x2: i32, - y2: i32, -}; - -pub const Indexed = extern struct { - color: bool, - rgba: [256]u32, - ent: [32768]u8, -}; - -test { - std.testing.refAllDecls(@This()); -} diff --git a/src/font/sprite.zig b/src/font/sprite.zig index 00462c205..d71e777bf 100644 --- a/src/font/sprite.zig +++ b/src/font/sprite.zig @@ -3,6 +3,7 @@ const canvas = @import("sprite/canvas.zig"); pub const Face = @import("sprite/Face.zig"); pub const Box = canvas.Box; +pub const Point = canvas.Point; pub const Canvas = canvas.Canvas; pub const Color = canvas.Color; diff --git a/src/font/sprite/Box.zig b/src/font/sprite/Box.zig index 109ff2353..4343f0be1 100644 --- a/src/font/sprite/Box.zig +++ b/src/font/sprite/Box.zig @@ -23,6 +23,8 @@ const Allocator = std.mem.Allocator; const font = @import("../main.zig"); const Sprite = @import("../sprite.zig").Sprite; +const z2d = @import("z2d"); + const log = std.log.scoped(.box_font); /// The cell width and height because the boxes are fit perfectly @@ -101,19 +103,54 @@ const Alignment = struct { const lower_left: Alignment = .{ .vertical = .bottom, .horizontal = .left }; const lower_right: Alignment = .{ .vertical = .bottom, .horizontal = .right }; + const center: Alignment = .{}; + + const upper_center = upper; + const lower_center = lower; + const middle_left = left; + const middle_right = right; + const middle_center: Alignment = center; + const top = upper; const bottom = lower; + const center_top = top; + const center_bottom = bottom; + + const top_left = upper_left; + const top_right = upper_right; + const bottom_left = lower_left; + const bottom_right = lower_right; +}; + +const Corner = enum { + tl, + tr, + bl, + br, }; // Utility names for common fractions const one_eighth: f64 = 0.125; const one_quarter: f64 = 0.25; +const one_third: f64 = (1.0 / 3.0); const three_eighths: f64 = 0.375; const half: f64 = 0.5; const five_eighths: f64 = 0.625; +const two_thirds: f64 = (2.0 / 3.0); const three_quarters: f64 = 0.75; const seven_eighths: f64 = 0.875; +/// Shades +const Shade = enum(u8) { + off = 0x00, + light = 0x40, + medium = 0x80, + dark = 0xc0, + on = 0xff, + + _, +}; + pub fn renderGlyph( self: Box, alloc: Allocator, @@ -160,6 +197,7 @@ pub fn unadjustedCodepoint(cp: u32) bool { } fn draw(self: Box, alloc: Allocator, canvas: *font.sprite.Canvas, cp: u32) !void { + _ = alloc; switch (cp) { // '─' 0x2500 => self.draw_lines(canvas, .{ .left = .light, .right = .light }), @@ -385,8 +423,15 @@ fn draw(self: Box, alloc: Allocator, canvas: *font.sprite.Canvas, cp: u32) !void 0x256b => self.draw_lines(canvas, .{ .up = .double, .down = .double, .left = .light, .right = .light }), // '╬' 0x256c => self.draw_lines(canvas, .{ .up = .double, .down = .double, .left = .double, .right = .double }), - 0x256d...0x2570 => try self.draw_light_arc(alloc, canvas, cp), + // '╭' + 0x256d => try self.draw_light_arc(canvas, .br), + // '╮' + 0x256e => try self.draw_light_arc(canvas, .bl), + // '╯' + 0x256f => try self.draw_light_arc(canvas, .tl), + // '╰' + 0x2570 => try self.draw_light_arc(canvas, .tr), // '╱' 0x2571 => self.draw_light_diagonal_upper_right_to_lower_left(canvas), // '╲' @@ -500,7 +545,7 @@ fn draw(self: Box, alloc: Allocator, canvas: *font.sprite.Canvas, cp: u32) !void 0x1fb52...0x1fb56, 0x1fb5d...0x1fb61, 0x1fb68...0x1fb6b, - => try self.draw_wedge_triangle_inverted(alloc, canvas, cp), + => try self.draw_wedge_triangle_inverted(canvas, cp), // '🭆' 0x1fb46, @@ -599,6 +644,294 @@ fn draw(self: Box, alloc: Allocator, canvas: *font.sprite.Canvas, cp: u32) !void 0x1fb8a => self.draw_block(canvas, Alignment.right, three_quarters, 1), // '🮋' RIGHT SEVEN EIGHTHS BLOCK 0x1fb8b => self.draw_block(canvas, Alignment.right, seven_eighths, 1), + // '🮌' + 0x1fb8c => self.draw_block_shade(canvas, Alignment.left, half, 1, .medium), + // '🮍' + 0x1fb8d => self.draw_block_shade(canvas, Alignment.right, half, 1, .medium), + // '🮎' + 0x1fb8e => self.draw_block_shade(canvas, Alignment.upper, 1, half, .medium), + // '🮏' + 0x1fb8f => self.draw_block_shade(canvas, Alignment.lower, 1, half, .medium), + + // '🮐' + 0x1fb90 => self.draw_medium_shade(canvas), + // '🮑' + 0x1fb91 => { + self.draw_medium_shade(canvas); + self.draw_block(canvas, Alignment.upper, 1, half); + }, + // '🮒' + 0x1fb92 => { + self.draw_medium_shade(canvas); + self.draw_block(canvas, Alignment.lower, 1, half); + }, + // '🮔' + 0x1fb94 => { + self.draw_medium_shade(canvas); + self.draw_block(canvas, Alignment.right, half, 1); + }, + // '🮕' + 0x1fb95 => self.draw_checkerboard_fill(canvas, 0), + // '🮖' + 0x1fb96 => self.draw_checkerboard_fill(canvas, 1), + // '🮗' + 0x1fb97 => { + self.draw_horizontal_one_eighth_block_n(canvas, 2); + self.draw_horizontal_one_eighth_block_n(canvas, 3); + self.draw_horizontal_one_eighth_block_n(canvas, 6); + self.draw_horizontal_one_eighth_block_n(canvas, 7); + }, + // '🮘' + 0x1fb98 => self.draw_upper_left_to_lower_right_fill(canvas), + // '🮙' + 0x1fb99 => self.draw_upper_right_to_lower_left_fill(canvas), + // '🮜' + 0x1fb9c => self.draw_corner_triangle_shade(canvas, .tl, .medium), + // '🮝' + 0x1fb9d => self.draw_corner_triangle_shade(canvas, .tr, .medium), + // '🮞' + 0x1fb9e => self.draw_corner_triangle_shade(canvas, .br, .medium), + // '🮟' + 0x1fb9f => self.draw_corner_triangle_shade(canvas, .bl, .medium), + + // '🮠' + 0x1fba0 => self.draw_corner_diagonal_lines(canvas, .{ .tl = true }), + // '🮡' + 0x1fba1 => self.draw_corner_diagonal_lines(canvas, .{ .tr = true }), + // '🮢' + 0x1fba2 => self.draw_corner_diagonal_lines(canvas, .{ .bl = true }), + // '🮣' + 0x1fba3 => self.draw_corner_diagonal_lines(canvas, .{ .br = true }), + // '🮤' + 0x1fba4 => self.draw_corner_diagonal_lines(canvas, .{ .tl = true, .bl = true }), + // '🮥' + 0x1fba5 => self.draw_corner_diagonal_lines(canvas, .{ .tr = true, .br = true }), + // '🮦' + 0x1fba6 => self.draw_corner_diagonal_lines(canvas, .{ .bl = true, .br = true }), + // '🮧' + 0x1fba7 => self.draw_corner_diagonal_lines(canvas, .{ .tl = true, .tr = true }), + // '🮨' + 0x1fba8 => self.draw_corner_diagonal_lines(canvas, .{ .tl = true, .br = true }), + // '🮩' + 0x1fba9 => self.draw_corner_diagonal_lines(canvas, .{ .tr = true, .bl = true }), + // '🮪' + 0x1fbaa => self.draw_corner_diagonal_lines(canvas, .{ .tr = true, .bl = true, .br = true }), + // '🮫' + 0x1fbab => self.draw_corner_diagonal_lines(canvas, .{ .tl = true, .bl = true, .br = true }), + // '🮬' + 0x1fbac => self.draw_corner_diagonal_lines(canvas, .{ .tl = true, .tr = true, .br = true }), + // '🮭' + 0x1fbad => self.draw_corner_diagonal_lines(canvas, .{ .tl = true, .tr = true, .bl = true }), + // '🮮' + 0x1fbae => self.draw_corner_diagonal_lines(canvas, .{ .tl = true, .tr = true, .bl = true, .br = true }), + // '🮯' + 0x1fbaf => self.draw_lines(canvas, .{ .up = .heavy, .down = .heavy, .left = .light, .right = .light }), + + // '🮽' + 0x1fbbd => { + self.draw_light_diagonal_cross(canvas); + canvas.invert(); + }, + // '🮾' + 0x1fbbe => { + self.draw_corner_diagonal_lines(canvas, .{ .br = true }); + canvas.invert(); + }, + // '🮿' + 0x1fbbf => { + self.draw_corner_diagonal_lines(canvas, .{ .tl = true, .tr = true, .bl = true, .br = true }); + canvas.invert(); + }, + + // '🯎' + 0x1fbce => self.draw_block(canvas, Alignment.left, two_thirds, 1), + // '🯏' + 0x1fbcf => self.draw_block(canvas, Alignment.left, one_third, 1), + // '🯐' + 0x1fbd0 => self.draw_cell_diagonal( + canvas, + Alignment.middle_right, + Alignment.lower_left, + ), + // '🯑' + 0x1fbd1 => self.draw_cell_diagonal( + canvas, + Alignment.upper_right, + Alignment.middle_left, + ), + // '🯒' + 0x1fbd2 => self.draw_cell_diagonal( + canvas, + Alignment.upper_left, + Alignment.middle_right, + ), + // '🯓' + 0x1fbd3 => self.draw_cell_diagonal( + canvas, + Alignment.middle_left, + Alignment.lower_right, + ), + // '🯔' + 0x1fbd4 => self.draw_cell_diagonal( + canvas, + Alignment.upper_left, + Alignment.lower_center, + ), + // '🯕' + 0x1fbd5 => self.draw_cell_diagonal( + canvas, + Alignment.upper_center, + Alignment.lower_right, + ), + // '🯖' + 0x1fbd6 => self.draw_cell_diagonal( + canvas, + Alignment.upper_right, + Alignment.lower_center, + ), + // '🯗' + 0x1fbd7 => self.draw_cell_diagonal( + canvas, + Alignment.upper_center, + Alignment.lower_left, + ), + // '🯘' + 0x1fbd8 => { + self.draw_cell_diagonal( + canvas, + Alignment.upper_left, + Alignment.middle_center, + ); + self.draw_cell_diagonal( + canvas, + Alignment.middle_center, + Alignment.upper_right, + ); + }, + // '🯙' + 0x1fbd9 => { + self.draw_cell_diagonal( + canvas, + Alignment.upper_right, + Alignment.middle_center, + ); + self.draw_cell_diagonal( + canvas, + Alignment.middle_center, + Alignment.lower_right, + ); + }, + // '🯚' + 0x1fbda => { + self.draw_cell_diagonal( + canvas, + Alignment.lower_left, + Alignment.middle_center, + ); + self.draw_cell_diagonal( + canvas, + Alignment.middle_center, + Alignment.lower_right, + ); + }, + // '🯛' + 0x1fbdb => { + self.draw_cell_diagonal( + canvas, + Alignment.upper_left, + Alignment.middle_center, + ); + self.draw_cell_diagonal( + canvas, + Alignment.middle_center, + Alignment.lower_left, + ); + }, + // '🯜' + 0x1fbdc => { + self.draw_cell_diagonal( + canvas, + Alignment.upper_left, + Alignment.lower_center, + ); + self.draw_cell_diagonal( + canvas, + Alignment.lower_center, + Alignment.upper_right, + ); + }, + // '🯝' + 0x1fbdd => { + self.draw_cell_diagonal( + canvas, + Alignment.upper_right, + Alignment.middle_left, + ); + self.draw_cell_diagonal( + canvas, + Alignment.middle_left, + Alignment.lower_right, + ); + }, + // '🯞' + 0x1fbde => { + self.draw_cell_diagonal( + canvas, + Alignment.lower_left, + Alignment.upper_center, + ); + self.draw_cell_diagonal( + canvas, + Alignment.upper_center, + Alignment.lower_right, + ); + }, + // '🯟' + 0x1fbdf => { + self.draw_cell_diagonal( + canvas, + Alignment.upper_left, + Alignment.middle_right, + ); + self.draw_cell_diagonal( + canvas, + Alignment.middle_right, + Alignment.lower_left, + ); + }, + + // '🯠' + 0x1fbe0 => self.draw_circle(canvas, Alignment.top, false), + // '🯡' + 0x1fbe1 => self.draw_circle(canvas, Alignment.right, false), + // '🯢' + 0x1fbe2 => self.draw_circle(canvas, Alignment.bottom, false), + // '🯣' + 0x1fbe3 => self.draw_circle(canvas, Alignment.left, false), + // '🯤' + 0x1fbe4 => self.draw_block(canvas, Alignment.upper_center, 0.5, 0.5), + // '🯥' + 0x1fbe5 => self.draw_block(canvas, Alignment.lower_center, 0.5, 0.5), + // '🯦' + 0x1fbe6 => self.draw_block(canvas, Alignment.middle_left, 0.5, 0.5), + // '🯧' + 0x1fbe7 => self.draw_block(canvas, Alignment.middle_right, 0.5, 0.5), + // '🯨' + 0x1fbe8 => self.draw_circle(canvas, Alignment.top, true), + // '🯩' + 0x1fbe9 => self.draw_circle(canvas, Alignment.right, true), + // '🯪' + 0x1fbea => self.draw_circle(canvas, Alignment.bottom, true), + // '🯫' + 0x1fbeb => self.draw_circle(canvas, Alignment.left, true), + // '🯬' + 0x1fbec => self.draw_circle(canvas, Alignment.top_right, true), + // '🯭' + 0x1fbed => self.draw_circle(canvas, Alignment.bottom_left, true), + // '🯮' + 0x1fbee => self.draw_circle(canvas, Alignment.bottom_right, true), + // '🯯' + 0x1fbef => self.draw_circle(canvas, Alignment.top_left, true), // Not official box characters but special characters we hide // in the high bits of a unicode codepoint. @@ -610,7 +943,11 @@ fn draw(self: Box, alloc: Allocator, canvas: *font.sprite.Canvas, cp: u32) !void } } -fn draw_lines(self: Box, canvas: *font.sprite.Canvas, comptime lines: Lines) void { +fn draw_lines( + self: Box, + canvas: *font.sprite.Canvas, + comptime lines: Lines, +) void { const light_px = Thickness.light.height(self.thickness); const heavy_px = Thickness.heavy.height(self.thickness); @@ -858,63 +1195,20 @@ fn draw_heavy_double_dash_vertical(self: Box, canvas: *font.sprite.Canvas) void } fn draw_light_diagonal_upper_right_to_lower_left(self: Box, canvas: *font.sprite.Canvas) void { - const thick_px = Thickness.light.height(self.thickness); - canvas.trapezoid(.{ - .top = 0, - .bottom = @as(i32, @intCast(self.height)), - .left = .{ - .p1 = .{ - .x = @as(i32, @intFromFloat(@as(f64, @floatFromInt(self.width)) - @as(f64, @floatFromInt(thick_px)) / 2)), - .y = 0, - }, - - .p2 = .{ - .x = @as(i32, @intFromFloat(0 - @as(f64, @floatFromInt(thick_px)) / 2)), - .y = @as(i32, @intCast(self.height)), - }, - }, - .right = .{ - .p1 = .{ - .x = @as(i32, @intFromFloat(@as(f64, @floatFromInt(self.width)) + @as(f64, @floatFromInt(thick_px)) / 2)), - .y = 0, - }, - - .p2 = .{ - .x = @as(i32, @intFromFloat(0 + @as(f64, @floatFromInt(thick_px)) / 2)), - .y = @as(i32, @intCast(self.height)), - }, - }, - }); + canvas.line(.{ + .p0 = .{ .x = @floatFromInt(self.width), .y = 0 }, + .p1 = .{ .x = 0, .y = @floatFromInt(self.height) }, + }, @floatFromInt(Thickness.light.height(self.thickness)), .on) catch {}; } fn draw_light_diagonal_upper_left_to_lower_right(self: Box, canvas: *font.sprite.Canvas) void { - const thick_px = Thickness.light.height(self.thickness); - canvas.trapezoid(.{ - .top = 0, - .bottom = @as(i32, @intCast(self.height)), - .left = .{ - .p1 = .{ - .x = @as(i32, @intFromFloat(0 - @as(f64, @floatFromInt(thick_px)) / 2)), - .y = 0, - }, - - .p2 = .{ - .x = @as(i32, @intFromFloat(@as(f64, @floatFromInt(self.width)) - @as(f64, @floatFromInt(thick_px)) / 2)), - .y = @as(i32, @intCast(self.height)), - }, + canvas.line(.{ + .p0 = .{ .x = 0, .y = 0 }, + .p1 = .{ + .x = @floatFromInt(self.width), + .y = @floatFromInt(self.height), }, - .right = .{ - .p1 = .{ - .x = @as(i32, @intFromFloat(0 + @as(f64, @floatFromInt(thick_px)) / 2)), - .y = 0, - }, - - .p2 = .{ - .x = @as(i32, @intFromFloat(@as(f64, @floatFromInt(self.width)) + @as(f64, @floatFromInt(thick_px)) / 2)), - .y = @as(i32, @intCast(self.height)), - }, - }, - }); + }, @floatFromInt(Thickness.light.height(self.thickness)), .on) catch {}; } fn draw_light_diagonal_cross(self: Box, canvas: *font.sprite.Canvas) void { @@ -925,9 +1219,20 @@ fn draw_light_diagonal_cross(self: Box, canvas: *font.sprite.Canvas) void { fn draw_block( self: Box, canvas: *font.sprite.Canvas, - alignment: Alignment, - width: f64, - height: f64, + comptime alignment: Alignment, + comptime width: f64, + comptime height: f64, +) void { + self.draw_block_shade(canvas, alignment, width, height, .on); +} + +fn draw_block_shade( + self: Box, + canvas: *font.sprite.Canvas, + comptime alignment: Alignment, + comptime width: f64, + comptime height: f64, + comptime shade: Shade, ) void { const float_width: f64 = @floatFromInt(self.width); const float_height: f64 = @floatFromInt(self.height); @@ -947,11 +1252,31 @@ fn draw_block( }; canvas.rect(.{ - .x = @intCast(x), - .y = @intCast(y), - .width = w, - .height = h, - }, .on); + .x = @floatFromInt(x), + .y = @floatFromInt(y), + .width = @floatFromInt(w), + .height = @floatFromInt(h), + }, @as(font.sprite.Color, @enumFromInt(@intFromEnum(shade)))); +} + +fn draw_corner_triangle_shade( + self: Box, + canvas: *font.sprite.Canvas, + comptime corner: Corner, + comptime shade: Shade, +) void { + const x0, const y0, const x1, const y1, const x2, const y2 = switch (corner) { + .tl => .{ 0, 0, 0, self.height, self.width, 0 }, + .tr => .{ 0, 0, self.width, self.height, self.width, 0 }, + .bl => .{ 0, 0, 0, self.height, self.width, self.height }, + .br => .{ 0, self.height, self.width, self.height, self.width, 0 }, + }; + + canvas.triangle(.{ + .p0 = .{ .x = @floatFromInt(x0), .y = @floatFromInt(y0) }, + .p1 = .{ .x = @floatFromInt(x1), .y = @floatFromInt(y1) }, + .p2 = .{ .x = @floatFromInt(x2), .y = @floatFromInt(y2) }, + }, @as(font.sprite.Color, @enumFromInt(@intFromEnum(shade)))) catch {}; } fn draw_full_block(self: Box, canvas: *font.sprite.Canvas) void { @@ -964,25 +1289,220 @@ fn draw_vertical_one_eighth_block_n(self: Box, canvas: *font.sprite.Canvas, n: u self.rect(canvas, x, 0, x + w, self.height); } -fn draw_pixman_shade(self: Box, canvas: *font.sprite.Canvas, v: u16) void { +fn draw_checkerboard_fill(self: Box, canvas: *font.sprite.Canvas, parity: u1) void { + const float_width: f64 = @floatFromInt(self.width); + const float_height: f64 = @floatFromInt(self.height); + const x_size: usize = 4; + const y_size: usize = @intFromFloat(@round(4 * (float_height / float_width))); + for (0..x_size) |x| { + const x0 = (self.width * x) / x_size; + const x1 = (self.width * (x + 1)) / x_size; + for (0..y_size) |y| { + const y0 = (self.height * y) / y_size; + const y1 = (self.height * (y + 1)) / y_size; + if ((x + y) % 2 == parity) { + canvas.rect(.{ + .x = @floatFromInt(x0), + .y = @floatFromInt(y0), + .width = @floatFromInt(x1 -| x0), + .height = @floatFromInt(y1 -| y0), + }, .on); + } + } + } +} + +fn draw_upper_left_to_lower_right_fill(self: Box, canvas: *font.sprite.Canvas) void { + const thick_px = Thickness.light.height(self.thickness); + const line_count = self.width / (2 * thick_px); + + const float_width: f64 = @floatFromInt(self.width); + const float_height: f64 = @floatFromInt(self.height); + const float_thick: f64 = @floatFromInt(thick_px); + const stride = @round(float_width / @as(f64, @floatFromInt(line_count))); + + for (0..line_count * 2 + 1) |_i| { + const i = @as(i32, @intCast(_i)) - @as(i32, @intCast(line_count)); + const top_x = @as(f64, @floatFromInt(i)) * stride; + const bottom_x = float_width + top_x; + canvas.line(.{ + .p0 = .{ .x = top_x, .y = 0 }, + .p1 = .{ .x = bottom_x, .y = float_height }, + }, float_thick, .on) catch {}; + } +} + +fn draw_upper_right_to_lower_left_fill(self: Box, canvas: *font.sprite.Canvas) void { + const thick_px = Thickness.light.height(self.thickness); + const line_count = self.width / (2 * thick_px); + + const float_width: f64 = @floatFromInt(self.width); + const float_height: f64 = @floatFromInt(self.height); + const float_thick: f64 = @floatFromInt(thick_px); + const stride = @round(float_width / @as(f64, @floatFromInt(line_count))); + + for (0..line_count * 2 + 1) |_i| { + const i = @as(i32, @intCast(_i)) - @as(i32, @intCast(line_count)); + const bottom_x = @as(f64, @floatFromInt(i)) * stride; + const top_x = float_width + bottom_x; + canvas.line(.{ + .p0 = .{ .x = top_x, .y = 0 }, + .p1 = .{ .x = bottom_x, .y = float_height }, + }, float_thick, .on) catch {}; + } +} + +fn draw_corner_diagonal_lines( + self: Box, + canvas: *font.sprite.Canvas, + comptime corners: Quads, +) void { + const thick_px = Thickness.light.height(self.thickness); + + const float_width: f64 = @floatFromInt(self.width); + const float_height: f64 = @floatFromInt(self.height); + const float_thick: f64 = @floatFromInt(thick_px); + const center_x: f64 = @floatFromInt(self.width / 2 + self.width % 2); + const center_y: f64 = @floatFromInt(self.height / 2 + self.height % 2); + + if (corners.tl) canvas.line(.{ + .p0 = .{ .x = center_x, .y = 0 }, + .p1 = .{ .x = 0, .y = center_y }, + }, float_thick, .on) catch {}; + + if (corners.tr) canvas.line(.{ + .p0 = .{ .x = center_x, .y = 0 }, + .p1 = .{ .x = float_width, .y = center_y }, + }, float_thick, .on) catch {}; + + if (corners.bl) canvas.line(.{ + .p0 = .{ .x = center_x, .y = float_height }, + .p1 = .{ .x = 0, .y = center_y }, + }, float_thick, .on) catch {}; + + if (corners.br) canvas.line(.{ + .p0 = .{ .x = center_x, .y = float_height }, + .p1 = .{ .x = float_width, .y = center_y }, + }, float_thick, .on) catch {}; +} + +fn draw_cell_diagonal( + self: Box, + canvas: *font.sprite.Canvas, + comptime from: Alignment, + comptime to: Alignment, +) void { + const float_width: f64 = @floatFromInt(self.width); + const float_height: f64 = @floatFromInt(self.height); + + const x0: f64 = switch (from.horizontal) { + .left => 0, + .right => float_width, + .center => float_width / 2, + }; + const y0: f64 = switch (from.vertical) { + .top => 0, + .bottom => float_height, + .middle => float_height / 2, + }; + const x1: f64 = switch (to.horizontal) { + .left => 0, + .right => float_width, + .center => float_width / 2, + }; + const y1: f64 = switch (to.vertical) { + .top => 0, + .bottom => float_height, + .middle => float_height / 2, + }; + + self.draw_line( + canvas, + .{ .x = x0, .y = y0 }, + .{ .x = x1, .y = y1 }, + .light, + ) catch {}; +} + +fn draw_circle( + self: Box, + canvas: *font.sprite.Canvas, + comptime position: Alignment, + comptime filled: bool, +) void { + const float_width: f64 = @floatFromInt(self.width); + const float_height: f64 = @floatFromInt(self.height); + + const x: f64 = switch (position.horizontal) { + .left => 0, + .right => float_width, + .center => float_width / 2, + }; + const y: f64 = switch (position.vertical) { + .top => 0, + .bottom => float_height, + .middle => float_height / 2, + }; + const r: f64 = 0.5 * @min(float_width, float_height); + + var ctx: z2d.Context = .{ + .surface = canvas.sfc, + .pattern = .{ + .opaque_pattern = .{ + .pixel = .{ .alpha8 = .{ .a = @intFromEnum(Shade.on) } }, + }, + }, + .line_width = @floatFromInt(Thickness.light.height(self.thickness)), + }; + + var path = z2d.Path.init(canvas.alloc); + defer path.deinit(); + + if (filled) { + path.arc(x, y, r, 0, std.math.pi * 2, false, null) catch return; + path.close() catch return; + ctx.fill(canvas.alloc, path) catch return; + } else { + path.arc(x, y, r - ctx.line_width / 2, 0, std.math.pi * 2, false, null) catch return; + path.close() catch return; + ctx.stroke(canvas.alloc, path) catch return; + } +} + +fn draw_line( + self: Box, + canvas: *font.sprite.Canvas, + p0: font.sprite.Point, + p1: font.sprite.Point, + comptime thickness: Thickness, +) !void { + canvas.line( + .{ .p0 = p0, .p1 = p1 }, + @floatFromInt(thickness.height(self.thickness)), + .on, + ) catch {}; +} + +fn draw_shade(self: Box, canvas: *font.sprite.Canvas, v: u16) void { canvas.rect((font.sprite.Box{ - .x1 = 0, - .y1 = 0, - .x2 = @as(i32, @intCast(self.width)), - .y2 = @as(i32, @intCast(self.height)), + .p0 = .{ .x = 0, .y = 0 }, + .p1 = .{ + .x = @floatFromInt(self.width), + .y = @floatFromInt(self.height), + }, }).rect(), @as(font.sprite.Color, @enumFromInt(v))); } fn draw_light_shade(self: Box, canvas: *font.sprite.Canvas) void { - self.draw_pixman_shade(canvas, 0x40); + self.draw_shade(canvas, 0x40); } fn draw_medium_shade(self: Box, canvas: *font.sprite.Canvas) void { - self.draw_pixman_shade(canvas, 0x80); + self.draw_shade(canvas, 0x80); } fn draw_dark_shade(self: Box, canvas: *font.sprite.Canvas) void { - self.draw_pixman_shade(canvas, 0xc0); + self.draw_shade(canvas, 0xc0); } fn draw_horizontal_one_eighth_block_n(self: Box, canvas: *font.sprite.Canvas, n: u32) void { @@ -1495,29 +2015,20 @@ fn draw_wedge_triangle(self: Box, canvas: *font.sprite.Canvas, cp: u32) !void { else => unreachable, } - canvas.triangle(.{ - .p1 = .{ .x = @as(i32, @intCast(p1_x)), .y = @as(i32, @intCast(p1_y)) }, - .p2 = .{ .x = @as(i32, @intCast(p2_x)), .y = @as(i32, @intCast(p2_y)) }, - .p3 = .{ .x = @as(i32, @intCast(p3_x)), .y = @as(i32, @intCast(p3_y)) }, + try canvas.triangle(.{ + .p0 = .{ .x = @floatFromInt(p1_x), .y = @floatFromInt(p1_y) }, + .p1 = .{ .x = @floatFromInt(p2_x), .y = @floatFromInt(p2_y) }, + .p2 = .{ .x = @floatFromInt(p3_x), .y = @floatFromInt(p3_y) }, }, .on); } fn draw_wedge_triangle_inverted( self: Box, - alloc: Allocator, canvas: *font.sprite.Canvas, cp: u32, ) !void { try self.draw_wedge_triangle(canvas, cp); - - var src = try font.sprite.Canvas.init(alloc, self.width, self.height); - src.rect(.{ .x = 0, .y = 0, .width = self.width, .height = self.height }, .on); - defer src.deinit(alloc); - canvas.composite( - .source_out, - &src, - .{ .x = 0, .y = 0, .width = self.width, .height = self.height }, - ); + canvas.invert(); } fn draw_wedge_triangle_and_box(self: Box, canvas: *font.sprite.Canvas, cp: u32) !void { @@ -1526,17 +2037,19 @@ fn draw_wedge_triangle_and_box(self: Box, canvas: *font.sprite.Canvas, cp: u32) const y_thirds = self.yThirds(); const box: font.sprite.Box = switch (cp) { 0x1fb46, 0x1fb51 => .{ - .x1 = 0, - .y1 = @as(i32, @intCast(y_thirds[1])), - .x2 = @as(i32, @intCast(self.width)), - .y2 = @as(i32, @intCast(self.height)), + .p0 = .{ .x = 0, .y = @floatFromInt(y_thirds[1]) }, + .p1 = .{ + .x = @floatFromInt(self.width), + .y = @floatFromInt(self.height), + }, }, 0x1fb5c, 0x1fb67 => .{ - .x1 = 0, - .y1 = 0, - .x2 = @as(i32, @intCast(self.width)), - .y2 = @as(i32, @intCast(y_thirds[0])), + .p0 = .{ .x = 0, .y = 0 }, + .p1 = .{ + .x = @floatFromInt(self.width), + .y = @floatFromInt(y_thirds[0]), + }, }, else => unreachable, @@ -1547,246 +2060,106 @@ fn draw_wedge_triangle_and_box(self: Box, canvas: *font.sprite.Canvas, cp: u32) fn draw_light_arc( self: Box, - alloc: Allocator, canvas: *font.sprite.Canvas, - cp: u32, + comptime corner: Corner, ) !void { - const supersample = 4; - const height = self.height * supersample; - const width = self.width * supersample; + const thick_px = Thickness.light.height(self.thickness); + const float_width: f64 = @floatFromInt(self.width); + const float_height: f64 = @floatFromInt(self.height); + const float_thick: f64 = @floatFromInt(thick_px); + const center_x: f64 = @floatFromInt(self.width / 2 + self.width % 2); + const center_y: f64 = @floatFromInt(self.height / 2 + self.height % 2); - // Allocate our supersample sized canvas - var ss_data = try alloc.alloc(u8, height * width); - defer alloc.free(ss_data); - @memset(ss_data, 0); + const r = @min(float_width, float_height) / 2; - const height_pixels = self.height; - const width_pixels = self.width; - const thick_pixels = Thickness.light.height(self.thickness); - const thick = thick_pixels * supersample; + // Fraction away from the center to place the middle control points, + const s: f64 = 0.25; - const circle_inner_edge = (@min(width_pixels, height_pixels) -| thick_pixels) / 2; - - // We want to draw the quartercircle by filling small circles (with r = - // thickness/2.) whose centers are on its edge. This means to get the - // radius of the quartercircle, we add the exact half thickness to the - // radius of the inner circle. - var c_r: f64 = @as(f64, @floatFromInt(circle_inner_edge)) + @as(f64, @floatFromInt(thick_pixels)) / 2; - - // We need to draw short lines from the end of the quartercircle to the - // box-edges, store one endpoint (the other is the edge of the - // quartercircle) in these vars. - var vert_to: u32 = 0; - var hor_to: u32 = 0; - - // Coordinates of the circle-center. - var c_x: u32 = 0; - var c_y: u32 = 0; - - // For a given y there are up to two solutions for the circle-equation. - // Set to -1 for the left, and 1 for the right hemisphere. - var circle_hemisphere: i32 = 0; - - // The quarter circle only has to be evaluated for a small range of - // y-values. - var y_min: u32 = 0; - var y_max: u32 = 0; - - switch (cp) { - '╭' => { - // Don't use supersampled coordinates yet, we want to align actual - // pixels. - // - // pixel-coordinates of the lower edge of the right line and the - // right edge of the bottom line. - const right_bottom_edge = (height_pixels + thick_pixels) / 2; - const bottom_right_edge = (width_pixels + thick_pixels) / 2; - - // find coordinates of circle-center. - c_y = right_bottom_edge + circle_inner_edge; - c_x = bottom_right_edge + circle_inner_edge; - - // we want to render the left, not the right hemisphere of the circle. - circle_hemisphere = -1; - - // don't evaluate beyond c_y, the vertical line is drawn there. - y_min = 0; - y_max = c_y; - - // the vertical line should extend to the bottom of the box, the - // horizontal to the right. - vert_to = height_pixels; - hor_to = width_pixels; + var ctx: z2d.Context = .{ + .surface = canvas.sfc, + .pattern = .{ + .opaque_pattern = .{ + .pixel = .{ .alpha8 = .{ .a = @intFromEnum(Shade.on) } }, + }, }, - '╮' => { - const left_bottom_edge = (height_pixels + thick_pixels) / 2; - const bottom_left_edge = (width_pixels -| thick_pixels) / 2; + .line_width = float_thick, + .line_cap_mode = .round, + }; - c_y = left_bottom_edge + circle_inner_edge; - c_x = bottom_left_edge -| circle_inner_edge; + var path = z2d.Path.init(canvas.alloc); + defer path.deinit(); - circle_hemisphere = 1; - - y_min = 0; - y_max = c_y; - - vert_to = height_pixels; - hor_to = 0; - }, - '╰' => { - const right_top_edge = (height_pixels -| thick_pixels) / 2; - const top_right_edge = (width_pixels + thick_pixels) / 2; - - c_y = right_top_edge -| circle_inner_edge; - c_x = top_right_edge + circle_inner_edge; - - circle_hemisphere = -1; - - y_min = c_y; - y_max = height_pixels; - - vert_to = 0; - hor_to = width_pixels; - }, - '╯' => { - const left_top_edge = (height_pixels -| thick_pixels) / 2; - const top_left_edge = (width_pixels -| thick_pixels) / 2; - - c_y = left_top_edge -| circle_inner_edge; - c_x = top_left_edge -| circle_inner_edge; - - circle_hemisphere = 1; - - y_min = c_y; - y_max = height_pixels; - - vert_to = 0; - hor_to = 0; - }, - - else => {}, - } - - // store for horizontal+vertical line. - const c_x_pixels = c_x; - const c_y_pixels = c_y; - - // Bring coordinates from pixel-grid to supersampled grid. - c_r *= supersample; - c_x *= supersample; - c_y *= supersample; - - y_min *= supersample; - y_max *= supersample; - - const c_r2 = c_r * c_r; - - // To prevent gaps in the circle, each pixel is sampled multiple times. - // As the quartercircle ends (vertically) in the middle of a pixel, an - // uneven number helps hit that exactly. - { - var i: f64 = @as(f64, @floatFromInt(y_min)) * 16; - while (i <= @as(f64, @floatFromInt(y_max)) * 16) : (i += 1) { - const y = i / 16; - const x = x: { - // circle_hemisphere * sqrt(c_r2 - (y - c_y) * (y - c_y)) + c_x; - const hemi = @as(f64, @floatFromInt(circle_hemisphere)); - const y_part = y - @as(f64, @floatFromInt(c_y)); - const y_squared = y_part * y_part; - const sqrt = @sqrt(c_r2 - y_squared); - const f_c_x = @as(f64, @floatFromInt(c_x)); - - // We need to detect overflows and just skip this i - const a = hemi * sqrt; - const b = a + f_c_x; - - // If the float math didn't work, ignore. - if (std.math.isNan(b)) continue; - - break :x b; - }; - - const row = @as(i32, @intFromFloat(@round(y))); - const col = @as(i32, @intFromFloat(@round(x))); - if (col < 0) continue; - - // rectangle big enough to fit entire circle with radius thick/2. - const row1 = row - @as(i32, @intCast(thick / 2 + 1)); - const row2 = row + @as(i32, @intCast(thick / 2 + 1)); - const col1 = col - @as(i32, @intCast(thick / 2 + 1)); - const col2 = col + @as(i32, @intCast(thick / 2 + 1)); - - const row_start = @min(row1, row2); - const row_end = @max(row1, row2); - const col_start = @min(col1, col2); - const col_end = @max(col1, col2); - - assert(row_end > row_start); - assert(col_end > col_start); - - // draw circle with radius thick/2 around x,y. - // this is accomplished by rejecting pixels where the distance from - // their center to x,y is greater than thick/2. - var r: i32 = @max(row_start, 0); - const r_end = @max(@min(row_end, @as(i32, @intCast(height))), 0); - while (r < r_end) : (r += 1) { - const r_midpoint = @as(f64, @floatFromInt(r)) + 0.5; - - var c: i32 = @max(col_start, 0); - const c_end = @max(@min(col_end, @as(i32, @intCast(width))), 0); - while (c < c_end) : (c += 1) { - const c_midpoint = @as(f64, @floatFromInt(c)) + 0.5; - - // vector from point on quartercircle to midpoint of the current pixel. - const center_midpoint_x = c_midpoint - x; - const center_midpoint_y = r_midpoint - y; - - // distance from current point to circle-center. - const dist = @sqrt(center_midpoint_x * center_midpoint_x + center_midpoint_y * center_midpoint_y); - // skip if midpoint of pixel is outside the circle. - if (dist > @as(f64, @floatFromInt(thick)) / 2) continue; - - // Set our pixel - const idx = @as(usize, @intCast(r * @as(i32, @intCast(width)) + c)); - ss_data[idx] = 0xFF; - } + switch (corner) { + .tl => { + path.moveTo(center_x, 0) catch return; + if (self.height > self.width) { + path.lineTo(center_x, center_y - r) catch return; } - } - } - - // Downsample - { - var r: u32 = 0; - while (r < self.height) : (r += 1) { - var c: u32 = 0; - while (c < self.width) : (c += 1) { - var total: u32 = 0; - var i: usize = 0; - while (i < supersample) : (i += 1) { - var j: usize = 0; - while (j < supersample) : (j += 1) { - const idx = (r * supersample + i) * width + (c * supersample + j); - total += ss_data[idx]; - } - } - - const average = @as(u8, @intCast(@min(total / (supersample * supersample), 0xff))); - canvas.rect( - .{ - .x = @as(i32, @intCast(c)), - .y = @as(i32, @intCast(r)), - .width = 1, - .height = 1, - }, - @as(font.sprite.Color, @enumFromInt(average)), - ); + path.curveTo( + center_x, + center_y - s * r, + center_x - s * r, + center_y, + center_x - r, + center_y, + ) catch return; + if (self.width > self.height) { + path.lineTo(0, center_y) catch return; } - } + }, + .tr => { + path.moveTo(center_x, 0) catch return; + if (self.height > self.width) { + path.lineTo(center_x, center_y - r) catch return; + } + path.curveTo( + center_x, + center_y - s * r, + center_x + s * r, + center_y, + center_x + r, + center_y, + ) catch return; + if (self.width > self.height) { + path.lineTo(float_width, center_y) catch return; + } + }, + .bl => { + path.moveTo(center_x, float_height) catch return; + if (self.height > self.width) { + path.lineTo(center_x, center_y + r) catch return; + } + path.curveTo( + center_x, + center_y + s * r, + center_x - s * r, + center_y, + center_x - r, + center_y, + ) catch return; + if (self.width > self.height) { + path.lineTo(0, center_y) catch return; + } + }, + .br => { + path.moveTo(center_x, float_height) catch return; + if (self.height > self.width) { + path.lineTo(center_x, center_y + r) catch return; + } + path.curveTo( + center_x, + center_y + s * r, + center_x + s * r, + center_y, + center_x + r, + center_y, + ) catch return; + if (self.width > self.height) { + path.lineTo(float_width, center_y) catch return; + } + }, } - - // draw vertical/horizontal lines from quartercircle-edge to box-edge. - self.vline(canvas, @min(c_y_pixels, vert_to), @max(c_y_pixels, vert_to), (width_pixels - thick_pixels) / 2, thick_pixels); - self.hline(canvas, @min(c_x_pixels, hor_to), @max(c_x_pixels, hor_to), (height_pixels - thick_pixels) / 2, thick_pixels); + ctx.stroke(canvas.alloc, path) catch return; } fn draw_dash_horizontal( @@ -1978,12 +2351,13 @@ fn vline( x: u32, thickness_px: u32, ) void { - canvas.rect((font.sprite.Box{ - .x1 = @as(i32, @intCast(@min(@max(x, 0), self.width))), - .x2 = @as(i32, @intCast(@min(@max(x + thickness_px, 0), self.width))), - .y1 = @as(i32, @intCast(@min(@max(y1, 0), self.height))), - .y2 = @as(i32, @intCast(@min(@max(y2, 0), self.height))), - }).rect(), .on); + canvas.rect((font.sprite.Box{ .p0 = .{ + .x = @floatFromInt(@min(@max(x, 0), self.width)), + .y = @floatFromInt(@min(@max(y1, 0), self.height)), + }, .p1 = .{ + .x = @floatFromInt(@min(@max(x + thickness_px, 0), self.width)), + .y = @floatFromInt(@min(@max(y2, 0), self.height)), + } }).rect(), .on); } fn hline( @@ -1994,12 +2368,13 @@ fn hline( y: u32, thickness_px: u32, ) void { - canvas.rect((font.sprite.Box{ - .x1 = @as(i32, @intCast(@min(@max(x1, 0), self.width))), - .x2 = @as(i32, @intCast(@min(@max(x2, 0), self.width))), - .y1 = @as(i32, @intCast(@min(@max(y, 0), self.height))), - .y2 = @as(i32, @intCast(@min(@max(y + thickness_px, 0), self.height))), - }).rect(), .on); + canvas.rect((font.sprite.Box{ .p0 = .{ + .x = @floatFromInt(@min(@max(x1, 0), self.width)), + .y = @floatFromInt(@min(@max(y, 0), self.height)), + }, .p1 = .{ + .x = @floatFromInt(@min(@max(x2, 0), self.width)), + .y = @floatFromInt(@min(@max(y + thickness_px, 0), self.height)), + } }).rect(), .on); } fn rect( @@ -2010,12 +2385,13 @@ fn rect( x2: u32, y2: u32, ) void { - canvas.rect((font.sprite.Box{ - .x1 = @as(i32, @intCast(@min(@max(x1, 0), self.width))), - .y1 = @as(i32, @intCast(@min(@max(y1, 0), self.height))), - .x2 = @as(i32, @intCast(@min(@max(x2, 0), self.width))), - .y2 = @as(i32, @intCast(@min(@max(y2, 0), self.height))), - }).rect(), .on); + canvas.rect((font.sprite.Box{ .p0 = .{ + .x = @floatFromInt(@min(@max(x1, 0), self.width)), + .y = @floatFromInt(@min(@max(y1, 0), self.height)), + }, .p1 = .{ + .x = @floatFromInt(@min(@max(x2, 0), self.width)), + .y = @floatFromInt(@min(@max(y2, 0), self.height)), + } }).rect(), .on); } test "all" { @@ -2073,26 +2449,50 @@ test "render all sprites" { // Symbols for Legacy Computing. cp = 0x1fb00; - while (cp <= 0x1fb9b) : (cp += 1) { + while (cp <= 0x1fbef) : (cp += 1) { switch (cp) { - 0x1FB00...0x1FB3B, - 0x1FB3C...0x1FB40, - 0x1FB47...0x1FB4B, - 0x1FB57...0x1FB5B, - 0x1FB62...0x1FB66, - 0x1FB6C...0x1FB6F, - 0x1FB41...0x1FB45, - 0x1FB4C...0x1FB50, - 0x1FB52...0x1FB56, - 0x1FB5D...0x1FB61, - 0x1FB68...0x1FB6B, - 0x1FB70...0x1FB8B, - 0x1FB46, - 0x1FB51, - 0x1FB5C, - 0x1FB67, - 0x1FB9A, - 0x1FB9B, + // (Block Mosaics / "Sextants") + // 🬀 🬁 🬂 🬃 🬄 🬅 🬆 🬇 🬈 🬉 🬊 🬋 🬌 🬍 🬎 🬏 🬐 🬑 🬒 🬓 🬔 🬕 🬖 🬗 🬘 🬙 🬚 🬛 🬜 🬝 🬞 🬟 🬠 + // 🬡 🬢 🬣 🬤 🬥 🬦 🬧 🬨 🬩 🬪 🬫 🬬 🬭 🬮 🬯 🬰 🬱 🬲 🬳 🬴 🬵 🬶 🬷 🬸 🬹 🬺 🬻 + // (Smooth Mosaics) + // 🬼 🬽 🬾 🬿 🭀 🭁 🭂 🭃 🭄 🭅 🭆 + // 🭇 🭈 🭉 🭊 🭋 🭌 🭍 🭎 🭏 🭐 🭑 + // 🭒 🭓 🭔 🭕 🭖 🭗 🭘 🭙 🭚 🭛 🭜 + // 🭝 🭞 🭟 🭠 🭡 🭢 🭣 🭤 🭥 🭦 🭧 + // 🭨 🭩 🭪 🭫 🭬 🭭 🭮 🭯 + // (Block Elements) + // 🭰 🭱 🭲 🭳 🭴 🭵 🭶 🭷 🭸 🭹 🭺 🭻 + // 🭼 🭽 🭾 🭿 🮀 🮁 + // 🮂 🮃 🮄 🮅 🮆 + // 🮇 🮈 🮉 🮊 🮋 + // (Rectangular Shade Characters) + // 🮌 🮍 🮎 🮏 🮐 🮑 🮒 + 0x1FB00...0x1FB92, + // (Rectangular Shade Characters) + // 🮔 + // (Fill Characters) + // 🮕 🮖 🮗 + // (Diagonal Fill Characters) + // 🮘 🮙 + // (Smooth Mosaics) + // 🮚 🮛 + // (Triangular Shade Characters) + // 🮜 🮝 🮞 🮟 + // (Character Cell Diagonals) + // 🮠 🮡 🮢 🮣 🮤 🮥 🮦 🮧 🮨 🮩 🮪 🮫 🮬 🮭 🮮 + // (Light Solid Line With Stroke) + // 🮯 + 0x1FB94...0x1FBAF, + // (Negative Terminal Characters) + // 🮽 🮾 🮿 + 0x1FBBD...0x1FBBF, + // (Block Elements) + // 🯎 🯏 + // (Character Cell Diagonals) + // 🯐 🯑 🯒 🯓 🯔 🯕 🯖 🯗 🯘 🯙 🯚 🯛 🯜 🯝 🯞 🯟 + // (Geometric Shapes) + // 🯠 🯡 🯢 🯣 🯤 🯥 🯦 🯧 🯨 🯩 🯪 🯫 🯬 🯭 🯮 🯯 + 0x1FBCE...0x1FBEF, => _ = try face.renderGlyph( alloc, &atlas_grayscale, diff --git a/src/font/sprite/Face.zig b/src/font/sprite/Face.zig index adbe9bece..f183192dc 100644 --- a/src/font/sprite/Face.zig +++ b/src/font/sprite/Face.zig @@ -188,29 +188,65 @@ const Kind = enum { => .box, }, - // Box fonts - 0x2500...0x257F, // "Box Drawing" block - 0x2580...0x259F, // "Block Elements" block - 0x2800...0x28FF, // "Braille" block + // == Box fonts == - 0x1FB00...0x1FB3B, // "Symbols for Legacy Computing" block - 0x1FB3C...0x1FB40, - 0x1FB47...0x1FB4B, - 0x1FB57...0x1FB5B, - 0x1FB62...0x1FB66, - 0x1FB6C...0x1FB6F, - 0x1FB41...0x1FB45, - 0x1FB4C...0x1FB50, - 0x1FB52...0x1FB56, - 0x1FB5D...0x1FB61, - 0x1FB68...0x1FB6B, - 0x1FB70...0x1FB8B, - 0x1FB46, - 0x1FB51, - 0x1FB5C, - 0x1FB67, - 0x1FB9A, - 0x1FB9B, + // "Box Drawing" block + // ─ ━ │ ┃ ┄ ┅ ┆ ┇ ┈ ┉ ┊ ┋ ┌ ┍ ┎ ┏ ┐ ┑ ┒ ┓ └ ┕ ┖ ┗ ┘ ┙ ┚ ┛ ├ ┝ ┞ ┟ ┠ + // ┡ ┢ ┣ ┤ ┥ ┦ ┧ ┨ ┩ ┪ ┫ ┬ ┭ ┮ ┯ ┰ ┱ ┲ ┳ ┴ ┵ ┶ ┷ ┸ ┹ ┺ ┻ ┼ ┽ ┾ ┿ ╀ ╁ + // ╂ ╃ ╄ ╅ ╆ ╇ ╈ ╉ ╊ ╋ ╌ ╍ ╎ ╏ ═ ║ ╒ ╓ ╔ ╕ ╖ ╗ ╘ ╙ ╚ ╛ ╜ ╝ ╞ ╟ ╠ ╡ ╢ + // ╣ ╤ ╥ ╦ ╧ ╨ ╩ ╪ ╫ ╬ ╭ ╮ ╯ ╰ ╱ ╲ ╳ ╴ ╵ ╶ ╷ ╸ ╹ ╺ ╻ ╼ ╽ ╾ ╿ + 0x2500...0x257F, + + // "Block Elements" block + // ▀ ▁ ▂ ▃ ▄ ▅ ▆ ▇ █ ▉ ▊ ▋ ▌ ▍ ▎ ▏ ▐ ░ ▒ ▓ ▔ ▕ ▖ ▗ ▘ ▙ ▚ ▛ ▜ ▝ ▞ ▟ + 0x2580...0x259F, + + // "Braille" block + 0x2800...0x28FF, + + // "Symbols for Legacy Computing" block + // (Block Mosaics / "Sextants") + // 🬀 🬁 🬂 🬃 🬄 🬅 🬆 🬇 🬈 🬉 🬊 🬋 🬌 🬍 🬎 🬏 🬐 🬑 🬒 🬓 🬔 🬕 🬖 🬗 🬘 🬙 🬚 🬛 🬜 🬝 🬞 🬟 🬠 + // 🬡 🬢 🬣 🬤 🬥 🬦 🬧 🬨 🬩 🬪 🬫 🬬 🬭 🬮 🬯 🬰 🬱 🬲 🬳 🬴 🬵 🬶 🬷 🬸 🬹 🬺 🬻 + // (Smooth Mosaics) + // 🬼 🬽 🬾 🬿 🭀 🭁 🭂 🭃 🭄 🭅 🭆 + // 🭇 🭈 🭉 🭊 🭋 🭌 🭍 🭎 🭏 🭐 🭑 + // 🭒 🭓 🭔 🭕 🭖 🭗 🭘 🭙 🭚 🭛 🭜 + // 🭝 🭞 🭟 🭠 🭡 🭢 🭣 🭤 🭥 🭦 🭧 + // 🭨 🭩 🭪 🭫 🭬 🭭 🭮 🭯 + // (Block Elements) + // 🭰 🭱 🭲 🭳 🭴 🭵 🭶 🭷 🭸 🭹 🭺 🭻 + // 🭼 🭽 🭾 🭿 🮀 🮁 + // 🮂 🮃 🮄 🮅 🮆 + // 🮇 🮈 🮉 🮊 🮋 + // (Rectangular Shade Characters) + // 🮌 🮍 🮎 🮏 🮐 🮑 🮒 + 0x1FB00...0x1FB92, + // (Rectangular Shade Characters) + // 🮔 + // (Fill Characters) + // 🮕 🮖 🮗 + // (Diagonal Fill Characters) + // 🮘 🮙 + // (Smooth Mosaics) + // 🮚 🮛 + // (Triangular Shade Characters) + // 🮜 🮝 🮞 🮟 + // (Character Cell Diagonals) + // 🮠 🮡 🮢 🮣 🮤 🮥 🮦 🮧 🮨 🮩 🮪 🮫 🮬 🮭 🮮 + // (Light Solid Line With Stroke) + // 🮯 + 0x1FB94...0x1FBAF, + // (Negative Terminal Characters) + // 🮽 🮾 🮿 + 0x1FBBD...0x1FBBF, + // (Block Elements) + // 🯎 🯏 + // (Character Cell Diagonals) + // 🯐 🯑 🯒 🯓 🯔 🯕 🯖 🯗 🯘 🯙 🯚 🯛 🯜 🯝 🯞 🯟 + // (Geometric Shapes) + // 🯠 🯡 🯢 🯣 🯤 🯥 🯦 🯧 🯨 🯩 🯪 🯫 🯬 🯭 🯮 🯯 + 0x1FBCE...0x1FBEF, => .box, // Powerline fonts diff --git a/src/font/sprite/Powerline.zig b/src/font/sprite/Powerline.zig index f32fdb01b..ba56eb38a 100644 --- a/src/font/sprite/Powerline.zig +++ b/src/font/sprite/Powerline.zig @@ -11,7 +11,7 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const font = @import("../main.zig"); -const Trapezoid = @import("canvas.zig").Trapezoid; +const Quad = @import("canvas.zig").Quad; const log = std.log.scoped(.powerline_font); @@ -176,10 +176,10 @@ fn draw_wedge_triangle(self: Powerline, canvas: *font.sprite.Canvas, cp: u32) !v else => unreachable, } - canvas.triangle(.{ - .p1 = .{ .x = @as(i32, @intCast(p1_x)), .y = @as(i32, @intCast(p1_y)) }, - .p2 = .{ .x = @as(i32, @intCast(p2_x)), .y = @as(i32, @intCast(p2_y)) }, - .p3 = .{ .x = @as(i32, @intCast(p3_x)), .y = @as(i32, @intCast(p3_y)) }, + try canvas.triangle(.{ + .p0 = .{ .x = @floatFromInt(p1_x), .y = @floatFromInt(p1_y) }, + .p1 = .{ .x = @floatFromInt(p2_x), .y = @floatFromInt(p2_y) }, + .p2 = .{ .x = @floatFromInt(p3_x), .y = @floatFromInt(p3_y) }, }, .on); } @@ -391,8 +391,8 @@ fn draw_half_circle(self: Powerline, alloc: Allocator, canvas: *font.sprite.Canv const average = @as(u8, @intCast(@min(total / (supersample * supersample), 0xFF))); canvas.rect( .{ - .x = @intCast(c), - .y = @intCast(r), + .x = @floatFromInt(c), + .y = @floatFromInt(r), .width = 1, .height = 1, }, @@ -404,110 +404,86 @@ fn draw_half_circle(self: Powerline, alloc: Allocator, canvas: *font.sprite.Canv } fn draw_trapezoid_top_bottom(self: Powerline, canvas: *font.sprite.Canvas, cp: u32) !void { - const t_top: Trapezoid = if (cp == 0xE0D4) + const t_top: Quad = if (cp == 0xE0D4) .{ - .top = 0, - .bottom = @intCast(self.height / 2 - self.height / 20), - .left = .{ - .p1 = .{ - .x = 0, - .y = 0, - }, - .p2 = .{ - .x = @intCast(self.width - self.width / 3), - .y = @intCast(self.height / 2 - self.height / 20), - }, + .p0 = .{ + .x = 0, + .y = 0, }, - .right = .{ - .p1 = .{ - .x = @intCast(self.width), - .y = 0, - }, - .p2 = .{ - .x = @intCast(self.width), - .y = @intCast(self.height / 2 - self.height / 20), - }, + .p1 = .{ + .x = @floatFromInt(self.width - self.width / 3), + .y = @floatFromInt(self.height / 2 - self.height / 20), + }, + .p2 = .{ + .x = @floatFromInt(self.width), + .y = @floatFromInt(self.height / 2 - self.height / 20), + }, + .p3 = .{ + .x = @floatFromInt(self.width), + .y = 0, }, } else .{ - .top = 0, - .bottom = @intCast(self.height / 2 - self.height / 20), - .left = .{ - .p1 = .{ - .x = 0, - .y = 0, - }, - .p2 = .{ - .x = 0, - .y = @intCast(self.height / 2 - self.height / 20), - }, + .p0 = .{ + .x = 0, + .y = 0, }, - .right = .{ - .p1 = .{ - .x = @intCast(self.width), - .y = 0, - }, - .p2 = .{ - .x = @intCast(self.width / 3), - .y = @intCast(self.height / 2 - self.height / 20), - }, + .p1 = .{ + .x = 0, + .y = @floatFromInt(self.height / 2 - self.height / 20), + }, + .p2 = .{ + .x = @floatFromInt(self.width / 3), + .y = @floatFromInt(self.height / 2 - self.height / 20), + }, + .p3 = .{ + .x = @floatFromInt(self.width), + .y = 0, }, }; - const t_bottom: Trapezoid = if (cp == 0xE0D4) + const t_bottom: Quad = if (cp == 0xE0D4) .{ - .top = @intCast(self.height / 2 + self.height / 20), - .bottom = @intCast(self.height), - .left = .{ - .p1 = .{ - .x = @intCast(self.width - self.width / 3), - .y = @intCast(self.height / 2 + self.height / 20), - }, - .p2 = .{ - .x = 0, - .y = @intCast(self.height), - }, + .p0 = .{ + .x = @floatFromInt(self.width - self.width / 3), + .y = @floatFromInt(self.height / 2 + self.height / 20), }, - .right = .{ - .p1 = .{ - .x = @intCast(self.width), - .y = @intCast(self.height / 2 + self.height / 20), - }, - .p2 = .{ - .x = @intCast(self.width), - .y = @intCast(self.height), - }, + .p1 = .{ + .x = 0, + .y = @floatFromInt(self.height), + }, + .p2 = .{ + .x = @floatFromInt(self.width), + .y = @floatFromInt(self.height), + }, + .p3 = .{ + .x = @floatFromInt(self.width), + .y = @floatFromInt(self.height / 2 + self.height / 20), }, } else .{ - .top = @intCast(self.height / 2 + self.height / 20), - .bottom = @intCast(self.height), - .left = .{ - .p1 = .{ - .x = 0, - .y = @intCast(self.height / 2 + self.height / 20), - }, - .p2 = .{ - .x = 0, - .y = @intCast(self.height), - }, + .p0 = .{ + .x = 0, + .y = @floatFromInt(self.height / 2 + self.height / 20), }, - .right = .{ - .p1 = .{ - .x = @intCast(self.width / 3), - .y = @intCast(self.height / 2 + self.height / 20), - }, - .p2 = .{ - .x = @intCast(self.width), - .y = @intCast(self.height), - }, + .p1 = .{ + .x = 0, + .y = @floatFromInt(self.height), + }, + .p2 = .{ + .x = @floatFromInt(self.width), + .y = @floatFromInt(self.height), + }, + .p3 = .{ + .x = @floatFromInt(self.width / 3), + .y = @floatFromInt(self.height / 2 + self.height / 20), }, }; - canvas.trapezoid(t_top); - canvas.trapezoid(t_bottom); + try canvas.quad(t_top, .on); + try canvas.quad(t_bottom, .on); } test "all" { diff --git a/src/font/sprite/canvas.zig b/src/font/sprite/canvas.zig index 67e213b21..a3792fe23 100644 --- a/src/font/sprite/canvas.zig +++ b/src/font/sprite/canvas.zig @@ -3,340 +3,88 @@ const std = @import("std"); const assert = std.debug.assert; const Allocator = std.mem.Allocator; -const js = @import("zig-js"); -const pixman = @import("pixman"); +const z2d = @import("z2d"); const font = @import("../main.zig"); pub const Point = struct { - x: i32, - y: i32, + x: f64, + y: f64, }; pub const Line = struct { + p0: Point, p1: Point, - p2: Point, }; pub const Box = struct { - x1: i32, - y1: i32, - x2: i32, - y2: i32, + p0: Point, + p1: Point, pub fn rect(self: Box) Rect { - const tl_x = @min(self.x1, self.x2); - const tl_y = @min(self.y1, self.y2); - const br_x = @max(self.x1, self.x2); - const br_y = @max(self.y1, self.y2); + const tl_x = @min(self.p0.x, self.p1.x); + const tl_y = @min(self.p0.y, self.p1.y); + const br_x = @max(self.p0.x, self.p1.x); + const br_y = @max(self.p0.y, self.p1.y); return .{ .x = tl_x, .y = tl_y, - .width = @intCast(br_x - tl_x), - .height = @intCast(br_y - tl_y), + .width = br_x - tl_x, + .height = br_y - tl_y, }; } }; pub const Rect = struct { - x: i32, - y: i32, - width: u32, - height: u32, + x: f64, + y: f64, + width: f64, + height: f64, }; pub const Triangle = struct { + p0: Point, + p1: Point, + p2: Point, +}; + +pub const Quad = struct { + p0: Point, p1: Point, p2: Point, p3: Point, }; -pub const Trapezoid = struct { - top: i32, - bottom: i32, - left: Line, - right: Line, -}; - /// We only use alpha-channel so a pixel can only be "on" or "off". pub const Color = enum(u8) { - const CSS_BUF_MAX = 24; - on = 255, off = 0, _, - - fn pixmanColor(self: Color) pixman.Color { - // pixman uses u16 for color while our color value is u8 so we - // scale it up proportionally. - const max = @as(f32, @floatFromInt(std.math.maxInt(u8))); - const max_u16 = @as(f32, @floatFromInt(std.math.maxInt(u16))); - const unscaled = @as(f32, @floatFromInt(@intFromEnum(self))); - const scaled = @as(u16, @intFromFloat((unscaled * max_u16) / max)); - return .{ .red = 0, .green = 0, .blue = 0, .alpha = scaled }; - } - - fn cssColor(self: Color, buf: []u8) ![]u8 { - return try std.fmt.bufPrint(buf, "rgba(0, 0, 0, {:.2})", .{ - @as(f32, @floatFromInt(@intFromEnum(self))) / 255, - }); - } }; -/// Composition operations that are supported. -pub const CompositionOp = enum { - // Note: more can be added here as needed. +pub const Canvas = struct { + /// The underlying z2d surface. + sfc: z2d.Surface, - source_out, - - fn pixmanOp(self: CompositionOp) pixman.Op { - return switch (self) { - .source_out => .out, - }; - } - - fn jsOp(self: CompositionOp) js.String { - return switch (self) { - .source_out => js.string("source-out"), - }; - } -}; - -pub const Canvas = switch (font.options.backend) { - .web_canvas => WebCanvasImpl, - else => PixmanImpl, -}; - -const WebCanvasImpl = struct { - /// The canvas element that is our final image. - canvas: js.Object, - - /// Store the dimensions for easy access later. - width: u32, - height: u32, - - pub fn init(alloc: Allocator, width: u32, height: u32) !WebCanvasImpl { - _ = alloc; - - // Create our canvas that we're going to continue to reuse. - const doc = try js.global.get(js.Object, "document"); - defer doc.deinit(); - const canvas = try doc.call(js.Object, "createElement", .{js.string("canvas")}); - errdefer canvas.deinit(); - - // Set our dimensions. - try canvas.set("width", width); - try canvas.set("height", height); - - return WebCanvasImpl{ - .canvas = canvas, - .width = width, - .height = height, - }; - } - - pub fn deinit(self: *WebCanvasImpl, alloc: Allocator) void { - _ = alloc; - self.canvas.deinit(); - self.* = undefined; - } - - pub fn pixel(self: *WebCanvasImpl, x: u32, y: u32, color: Color) void { - const ctx = self.context(color) catch return; - defer ctx.deinit(); - ctx.call(void, "fillRect", .{ x, y, 1, 1 }) catch return; - } - - pub fn rect(self: *WebCanvasImpl, v: Rect, color: Color) void { - const ctx = self.context(color) catch return; - defer ctx.deinit(); - ctx.call(void, "fillRect", .{ - @as(u32, @intCast(v.x)), - @as(u32, @intCast(v.y)), - v.width, - v.height, - }) catch return; - } - - pub fn trapezoid(self: *WebCanvasImpl, t: Trapezoid) void { - const ctx = self.context(.on) catch return; - defer ctx.deinit(); - - ctx.call(void, "beginPath", .{}) catch return; - ctx.call(void, "moveTo", .{ t.left.p1.x, t.left.p1.y }) catch return; - ctx.call(void, "lineTo", .{ t.right.p1.x, t.right.p1.y }) catch return; - ctx.call(void, "lineTo", .{ t.right.p2.x, t.right.p2.y }) catch return; - ctx.call(void, "lineTo", .{ t.left.p2.x, t.left.p2.y }) catch return; - ctx.call(void, "fill", .{}) catch return; - } - - pub fn triangle(self: *WebCanvasImpl, t: Triangle, color: Color) void { - const ctx = self.context(color) catch return; - defer ctx.deinit(); - - ctx.call(void, "beginPath", .{}) catch return; - ctx.call(void, "moveTo", .{ t.p1.x, t.p1.y }) catch return; - ctx.call(void, "lineTo", .{ t.p2.x, t.p2.y }) catch return; - ctx.call(void, "lineTo", .{ t.p3.x, t.p3.y }) catch return; - ctx.call(void, "fill", .{}) catch return; - } - - pub fn composite( - self: *WebCanvasImpl, - op: CompositionOp, - src: *const WebCanvasImpl, - dest: Rect, - ) void { - const ctx = self.context(Color.on) catch return; - defer ctx.deinit(); - - // Set our compositing operation - ctx.set("globalCompositeOperation", op.jsOp()) catch return; - - // Composite - ctx.call(void, "drawImage", .{ - src.canvas, - dest.x, - dest.y, - dest.width, - dest.height, - }) catch return; - } - - fn context(self: WebCanvasImpl, fill: ?Color) !js.Object { - const ctx = try self.canvas.call(js.Object, "getContext", .{js.string("2d")}); - errdefer ctx.deinit(); - - // Reset our composite operation - try ctx.set("globalCompositeOperation", js.string("source-over")); - - // Set our fill color - if (fill) |c| { - var buf: [Color.CSS_BUF_MAX]u8 = undefined; - const color = try c.cssColor(&buf); - try ctx.set("fillStyle", js.string(color)); - } - - return ctx; - } - - pub fn writeAtlas(self: *WebCanvasImpl, alloc: Allocator, atlas: *font.Atlas) !font.Atlas.Region { - assert(atlas.format == .grayscale); - - // Reload our context since we resized the canvas - const ctx = try self.context(null); - defer ctx.deinit(); - - // Set our width/height. Set to vars in case we just query the canvas later. - const width = self.width; - const height = self.height; - - // Read the image data and get it into a []u8 on our side - const bitmap: []u8 = bitmap: { - // Read the raw bitmap data and get the "data" value which is a - // Uint8ClampedArray. - const data = try ctx.call(js.Object, "getImageData", .{ 0, 0, width, height }); - defer data.deinit(); - const src_array = try data.get(js.Object, "data"); - defer src_array.deinit(); - - // Allocate our local memory to copy the data to. - const len = try src_array.get(u32, "length"); - const bitmap = try alloc.alloc(u8, @intCast(len)); - errdefer alloc.free(bitmap); - - // Create our target Uint8Array that we can use to copy from src. - const mem_array = mem_array: { - // Get our runtime memory - const mem = try js.runtime.get(js.Object, "memory"); - defer mem.deinit(); - const buf = try mem.get(js.Object, "buffer"); - defer buf.deinit(); - - // Construct our array to peer into our memory - const Uint8Array = try js.global.get(js.Object, "Uint8Array"); - defer Uint8Array.deinit(); - const mem_array = try Uint8Array.new(.{ buf, bitmap.ptr }); - errdefer mem_array.deinit(); - - break :mem_array mem_array; - }; - defer mem_array.deinit(); - - // Copy - try mem_array.call(void, "set", .{src_array}); - - break :bitmap bitmap; - }; - errdefer alloc.free(bitmap); - - // Convert the format of the bitmap to A8 since the raw canvas data - // is in RGBA. - // NOTE(mitchellh): do we need a 1px buffer to avoid artifacts? - const bitmap_a8: []u8 = a8: { - assert(@mod(bitmap.len, 4) == 0); - assert(bitmap.len == width * height * 4); - var bitmap_a8 = try alloc.alloc(u8, bitmap.len / 4); - errdefer alloc.free(bitmap_a8); - var i: usize = 0; - while (i < bitmap_a8.len) : (i += 1) { - bitmap_a8[i] = bitmap[(i * 4) + 3]; - } - - break :a8 bitmap_a8; - }; - defer alloc.free(bitmap_a8); - - // Write the glyph information into the atlas - const region = try atlas.reserve(alloc, width, height); - if (region.width > 0 and region.height > 0) { - assert(region.width == width); - assert(region.height == height); - atlas.set(region, bitmap_a8); - } - - return region; - } -}; - -const PixmanImpl = struct { - /// The underlying image. - image: *pixman.Image, - - /// The raw data buffer. - data: []u32, + alloc: Allocator, pub fn init(alloc: Allocator, width: u32, height: u32) !Canvas { - // Determine the config for our image buffer. The images we draw - // for boxes are always 8bpp - const format: pixman.FormatCode = .a8; - const stride = format.strideForWidth(width); - const len = @as(usize, @intCast(stride * @as(c_int, @intCast(height)))); - - // Allocate our buffer. pixman uses []u32 so we divide our length - // by 4 since u32 / u8 = 4. - const data = try alloc.alloc(u32, len / 4); - errdefer alloc.free(data); - @memset(data, 0); - - // Create the image we'll draw to - const img = try pixman.Image.createBitsNoClear( - format, + // Create the surface we'll be using. + const sfc = try z2d.Surface.initPixel( + .{ .alpha8 = .{ .a = 0 } }, + alloc, @intCast(width), @intCast(height), - data.ptr, - stride, ); - errdefer _ = img.unref(); - return Canvas{ - .image = img, - .data = data, + return .{ + .sfc = sfc, + .alloc = alloc, }; } pub fn deinit(self: *Canvas, alloc: Allocator) void { - alloc.free(self.data); - _ = self.image.unref(); + _ = alloc; + self.sfc.deinit(); self.* = undefined; } @@ -344,8 +92,8 @@ const PixmanImpl = struct { pub fn writeAtlas(self: *Canvas, alloc: Allocator, atlas: *font.Atlas) !font.Atlas.Region { assert(atlas.format == .grayscale); - const width = @as(u32, @intCast(self.image.getWidth())); - const height = @as(u32, @intCast(self.image.getHeight())); + const width = @as(u32, @intCast(self.sfc.getWidth())); + const height = @as(u32, @intCast(self.sfc.getHeight())); // Allocate our texture atlas region const region = region: { @@ -372,31 +120,7 @@ const PixmanImpl = struct { }; if (region.width > 0 and region.height > 0) { - const depth = atlas.format.depth(); - - // Convert our []u32 to []u8 since we use 8bpp formats - const stride = self.image.getStride(); - const data = @as([*]u8, @ptrCast(self.data.ptr))[0 .. self.data.len * 4]; - - // We can avoid a buffer copy if our atlas width and bitmap - // width match and the bitmap pitch is just the width (meaning - // the data is tightly packed). - const needs_copy = !(width * depth == stride); - - // If we need to copy the data, we copy it into a temporary buffer. - const buffer = if (needs_copy) buffer: { - const temp = try alloc.alloc(u8, width * height * depth); - var dst_ptr = temp; - var src_ptr = data.ptr; - var i: usize = 0; - while (i < height) : (i += 1) { - @memcpy(dst_ptr[0 .. width * depth], src_ptr[0 .. width * depth]); - dst_ptr = dst_ptr[width * depth ..]; - src_ptr += @as(usize, @intCast(stride)); - } - break :buffer temp; - } else data[0..(width * height * depth)]; - defer if (buffer.ptr != data.ptr) alloc.free(buffer); + const buffer: []u8 = @ptrCast(self.sfc.image_surface_alpha8.buf); // Write the glyph information into the atlas assert(region.width == width); @@ -409,102 +133,105 @@ const PixmanImpl = struct { /// Draw and fill a single pixel pub fn pixel(self: *Canvas, x: u32, y: u32, color: Color) void { - if (comptime std.debug.runtime_safety) { - assert(x < self.image.getWidth()); - assert(y < self.image.getHeight()); - } - - const boxes = &[_]pixman.Box32{ - .{ - .x1 = @intCast(x), - .y1 = @intCast(y), - .x2 = @intCast(x + 1), - .y2 = @intCast(y + 1), - }, + self.sfc.putPixel( + @intCast(x), + @intCast(y), + .{ .alpha8 = .{ .a = @intFromEnum(color) } }, + ) catch { + // If we try to set out of range this will fail. + // We just silently ignore that. }; - - self.image.fillBoxes(.src, color.pixmanColor(), boxes) catch {}; } /// Draw and fill a rectangle. This is the main primitive for drawing /// lines as well (which are just generally skinny rectangles...) pub fn rect(self: *Canvas, v: Rect, color: Color) void { - const boxes = &[_]pixman.Box32{ - .{ - .x1 = @intCast(v.x), - .y1 = @intCast(v.y), - .x2 = @intCast(v.x + @as(i32, @intCast(v.width))), - .y2 = @intCast(v.y + @as(i32, @intCast(v.height))), + const x0: usize = @intFromFloat(v.x); + const x1: usize = @intFromFloat(v.x + v.width); + const y0: usize = @intFromFloat(v.y); + const y1: usize = @intFromFloat(v.y + v.height); + + for (y0..y1) |y| { + for (x0..x1) |x| { + self.pixel( + @intCast(x), + @intCast(y), + color, + ); + } + } + } + + /// Draw and fill a quad. + pub fn quad(self: *Canvas, q: Quad, color: Color) !void { + var ctx: z2d.Context = .{ + .surface = self.sfc, + .pattern = .{ + .opaque_pattern = .{ + .pixel = .{ .alpha8 = .{ .a = @intFromEnum(color) } }, + }, }, }; - if (comptime std.debug.runtime_safety) { - assert(boxes[0].x1 >= 0); - assert(boxes[0].y1 >= 0); - assert(boxes[0].x2 <= @as(i32, @intCast(self.image.getWidth()))); - assert(boxes[0].y2 <= @as(i32, @intCast(self.image.getHeight()))); - } + var path = z2d.Path.init(self.alloc); + defer path.deinit(); - self.image.fillBoxes(.src, color.pixmanColor(), boxes) catch {}; - } + try path.moveTo(q.p0.x, q.p0.y); + try path.lineTo(q.p1.x, q.p1.y); + try path.lineTo(q.p2.x, q.p2.y); + try path.lineTo(q.p3.x, q.p3.y); + try path.close(); - /// Draw and fill a trapezoid. - pub fn trapezoid(self: *Canvas, t: Trapezoid) void { - self.image.rasterizeTrapezoid(.{ - .top = pixman.Fixed.init(t.top), - .bottom = pixman.Fixed.init(t.bottom), - .left = .{ - .p1 = .{ - .x = pixman.Fixed.init(t.left.p1.x), - .y = pixman.Fixed.init(t.left.p1.y), - }, - .p2 = .{ - .x = pixman.Fixed.init(t.left.p2.x), - .y = pixman.Fixed.init(t.left.p2.y), - }, - }, - .right = .{ - .p1 = .{ - .x = pixman.Fixed.init(t.right.p1.x), - .y = pixman.Fixed.init(t.right.p1.y), - }, - .p2 = .{ - .x = pixman.Fixed.init(t.right.p2.x), - .y = pixman.Fixed.init(t.right.p2.y), - }, - }, - }, 0, 0); + try ctx.fill(self.alloc, path); } /// Draw and fill a triangle. - pub fn triangle(self: *Canvas, t: Triangle, color: Color) void { - const tris = &[_]pixman.Triangle{ - .{ - .p1 = .{ .x = pixman.Fixed.init(t.p1.x), .y = pixman.Fixed.init(t.p1.y) }, - .p2 = .{ .x = pixman.Fixed.init(t.p2.x), .y = pixman.Fixed.init(t.p2.y) }, - .p3 = .{ .x = pixman.Fixed.init(t.p3.x), .y = pixman.Fixed.init(t.p3.y) }, + pub fn triangle(self: *Canvas, t: Triangle, color: Color) !void { + var ctx: z2d.Context = .{ + .surface = self.sfc, + .pattern = .{ + .opaque_pattern = .{ + .pixel = .{ .alpha8 = .{ .a = @intFromEnum(color) } }, + }, }, }; - const src = pixman.Image.createSolidFill(color.pixmanColor()) catch return; - defer _ = src.unref(); - self.image.compositeTriangles(.over, src, .a8, 0, 0, 0, 0, tris); + var path = z2d.Path.init(self.alloc); + defer path.deinit(); + + try path.moveTo(t.p0.x, t.p0.y); + try path.lineTo(t.p1.x, t.p1.y); + try path.lineTo(t.p2.x, t.p2.y); + try path.close(); + + try ctx.fill(self.alloc, path); } - /// Composite one image on another. - pub fn composite(self: *Canvas, op: CompositionOp, src: *const Canvas, dest: Rect) void { - self.image.composite( - op.pixmanOp(), - src.image, - null, - 0, - 0, - 0, - 0, - @intCast(dest.x), - @intCast(dest.y), - @intCast(dest.width), - @intCast(dest.height), - ); + /// Stroke a line. + pub fn line(self: *Canvas, l: Line, thickness: f64, color: Color) !void { + var ctx: z2d.Context = .{ + .surface = self.sfc, + .pattern = .{ + .opaque_pattern = .{ + .pixel = .{ .alpha8 = .{ .a = @intFromEnum(color) } }, + }, + }, + .line_width = thickness, + .line_cap_mode = .round, + }; + + var path = z2d.Path.init(self.alloc); + defer path.deinit(); + + try path.moveTo(l.p0.x, l.p0.y); + try path.lineTo(l.p1.x, l.p1.y); + + try ctx.stroke(self.alloc, path); + } + + pub fn invert(self: *Canvas) void { + for (std.mem.sliceAsBytes(self.sfc.image_surface_alpha8.buf)) |*v| { + v.* = 255 - v.*; + } } }; diff --git a/src/font/sprite/testdata/Box.ppm b/src/font/sprite/testdata/Box.ppm index 676b07ebea775e35eb23bf1d5095343bb53d5300..c21952561269dd20ed23a27cc122b77191d3d959 100644 GIT binary patch literal 1048593 zcmeFa+i@(nwk*o;{Udq6>49`k-2L73K=Od}K+A;f!CD@W9!MTEJ&@u!!0CbXbt29e zFP@1UKvosHQQb|>i3&wmB1et~27rE%oci^@|LcGK{J;OdFaF`b{`Y_X_kZ2ZYj$9E zV0K`3V0K`3V0K`$1MlyR9((IVU+x5_dZjq!t2pJWIOVIjUSFkC z+(?cQUsa3Zx^wKkU#=Cea}Rcx^U>pVPh@AI=u7Y3>fM*Oinn*(z2|$C`gYzfR|_Zh zrR;xiK_5L{?>zR_iN5T;FK-oZ@4RyV4fO9S_3gc1t`$%8RrbHPo{t`{cOHA|L|^va zm$!z%Ui|UJFnb-1O2;7eS7bh zYsC|NmHqFnXYY8uc|W>N^kwgTd8>GP=au_!pnq4X@1yU(rTRo)W&fA_Yku!|b^Omd z7Jb=!U*0O--g)Kz8|dFv>idTFrR;z2{KPs9#p})c(RHFPd+*Cz#oIft+%E(DyGnf@ zeg7@hC;BS;zvMspz7B{!@4YW?6>smna{mqV?<)26?ShY|5`C5Zzg6WQy}sVQhsmna{mqV?<)26?t_n} z6MdEazg68Iy}sVQhq=&SP(#=*!;w@>cQo&MWudK>w~%-`@M> zTJc0*W&eBY**jiu-jA*mec5|o-YVYSdFB2a=-*Z9`{?^`sXozH+5aW~n%_HK9sjeA zMPK&bm$!Xt?Bu60GA?U^mQHjs*4^f`i?dn+t53d^!0K%5LryrQE(Ny+ipIr?Nq)~6QC0sKKm^{RxZ=@;e3Ez-d1femp-M@HD}&WIp-D*#Veu`& zv$5DR!|MXB$Vv#ql>vOH7R4fd`UTD zxoa+Wlg>t=m~??+SUiS+7)5*bb@>4^@TFv&d;{azXi38~X7zTw6xk<*1} z8LkXqr=BWqAyAP9)ET$P7Qz-mNwCf>1S;;k)|bMTlGf92>WZKgyv_|KJbwbIEPdWe z%^B^lD;=^ZXDs((r*n`=FUwfpqktcw6Rw@{LOAbkLYP6!BI602I=9N#bLqk~OcB{C za(EM*imq@l3Q+XISg0af2wMcjNui=^1JN|#TJ)eyUlQwOMG(;vmqJ3G#zw9QJjRh;rw+|B#ARX}I|hI$}!s9q^f>E7${8S5LVS@rLvwr{XD2#Zx-PDc=!J<>=*3+Dx5ma#GA& zh{Zfq|GRs*cf4EldS0d#ZY`IF+}TQ}Gn1;whctlL-k5=DsL~R;wetWQ#!>d-w{sb=;cn@L!E2V9dqw^y@^F$ z)&K4u?j5gp9=FnU&Uf#g?!6wLvA&U-RsT-fLSo&AV%{pA$UD+k<*PW=E5)6(CbtlG z_HU>MB8Tdg;#A&VPQ_E4il=mnQ@$gd%F)Z6w0%0)q&w!V;)%Sf|J^;@J6?AKZzSxT z@7_J#dp$m5eIqri{++ag#JUf~yj47rccib%S8=LWiaTjdZXxdM-%t-k4%I8gsl2_M zil;agPw5n=d`CEyqnA5r`*f~Jcg$PG6M0qtyL-5IyzU0xNZ2{wy?eU%dVI$EMrv05 zJ81`rbsvg(t9T;sNMDt&;#98`chZ{NLfqNEp&p1Fs#l6rd3!k(PjM=q(kV{)j&LeR zFL%=R>0FbOV%|b5=AruE-NU`(-J&O_l3owDq`xlf9D3vuWE zRsB+&>Wku(ui|del9O_F_HU>MB8Tdg;#A&VPQ_E4il=mnQ@$gd%F)Y>uDi-}Qp{V3 z#oki=S94RG@>QJjRopFFa#GID{tfj&6!0f>6!0f=c-2w6b>sIek)O(qcT9j{Z4yEhmR9?lYc)dF1tGMpXEA~NW zJnt#IyEE@AY%5XqDNfBo=@fT6-_h>*+0@qnr}oSUCwJD)^{E`!s9ELMy_R}ym&T%} zvcA^edkXJXcon9tMAfG_H4CLv-0gfvo9bs%Ujv-lGb5bbSv%LKa$KWkm1Fl>>a|@O zi=N8*T7T~;yj$T_n6?sCpW@UklumKC^BrxfpG|!YaB9ztaB^quT%XEujha=C-D|1W zc4;hnD(h?gy{GVQg;!zPN>qJ{Q?pPy#of+#w5fhJ^)p`2ceJT~HuW{YsXa5o$(^-x zeJaN_YF0URuccnwrLpL#tgrR=p2E8oUWI8ZQS~WK%|huEcRSzFrux~`*8r#X%m^oU z*3R{*9M`B><=DNJdTp1+qNlRH*57*y?^bvfrmaNPr#LkWrBmGPd`Fw=XH#DToZ2%Z zoZMME*Qat^qh^(3_gd<;T^fs?%KBP=?Nn5>=n#)GU-vakujwZK|J5eGPDG z&x~+#XYE{{%5jaFRgT?jsn>RCEP5*IYyG{a@NR`yVcJSmeTq}FP&&ok&Udt_em3@QhmU?ZM#-gXPzSiG+3h!2U6{f93)u%W$3#C)s?R-a@ z>St451Dx73Bb?k>JJ+XjT%%@{WA|F>wcUTL&%^$C`XaV_L)E9a+nwQ&n)|x4tZH+s zK6G2Q z=CYD)1!t61VF}G3DZnL=9o?P#km(}lTC$b7tYllk8D*8NFS8<^cg$wmmxW8loFK@} z0Nw!@6Acp7Js_QpX5?moc(p?u2eXF&TGz5vOO?Ynikg*Ni@uFjn4*eN^0+7z$|`@a z?ZuWYsy@!f{mVZ2Zhdp?zWG|;7;(S6xt4OzHMjSo$%yAXx!eI>ozKqYCoQaW*cM?{ z9s#twpeRX8fJmvJT8K*~a9pZ_MaQ7MvmODbqt%w<>R8R(WM_tBfrBW~Ri* zo~_DTm@PicF8jAC&*Uh}eoJ|6HS7Mxn)Bs*wB%8_XAgT{v$L(>jBK1d%2s#JRK6P% z4HC>U+l8Vl_Ket8a7I~W<0aOMiXbAxA=PSvM$;A)35tuWW1V@=zF zuz4X#f$5e_ai8Z=z87I(+U`f+sOA1;pM1ADa_lzVT8tda-*W_i=MigKY`v_#*fMWx zjx1+e%WHjeZ0}K`XExu9g>{*AcQ2&j=YXo@ojKJP#ZmW5??f%iS8*zy(v5JX&<(UT z4_}16azO9ddKc}QF8W4o+@Kj}uXwjPa_qjmHd2n4V{irSEUa(wX_dv6eQeD+Z<%F5 zDjLr%$5vdnl^7~-$nK@NxKDFeCGX6sz9>%Ya$XU%RQW1S#Z$Tw4n=JN(NmRIgz=uO zw`OW~e_v7HRj7Jqu_raV|9tBVI&Ww}=XzeHUEN9A`KjGD#Hqe0PV91-SNSSV#Z$Tw zZlupDuZVZ6J+1SK0Ogi#Hqe0 zPMuf9DPP5n_^Nn{>+ZJbb7#DfeYwAH_k?#Aw*6CcQ=HnnN~gG9-!R$E_1w8JgW>$F z>uYCD^+j>&yedxlDsIGA#Zz3}y2Y{RX=l8VeYwBy;sh^jX#1z;rnufSJmMQB-?^R_ zZq$k4+rFxP4RNY3ic{xRamrV5Bfcu0;<~#n`rH|BWMA&@+dbi(g>C=T+!UwwuF@&4 z*EdYIb3J!%%wRY_>-yT6Q+-jKI-7zj?Oe~D8#5Tr&$_;L=2Tx4r_QV5l&|7Od{sQf)va3`i=KAI z8`+op`z}uK!iKhgYHo__J;NiuVe*~pdErK#7{2YR>empb`l2{>UKOW&6*uCm;wi4X z+oI2%@kaLL{=VH4-dWi8Pt8qnYVRtY;(C3uYCD z^+j>&yedxlDsIGA#Zz3}x(kd)_T~P*7nopI$aYVvxhbyq43GGRDcop(bhU3$)b>^N zYlu^QQJgxjic`Ld8}U`~6t~#d{R|`fa)00b=5<=6cAu)bDNgNOrBht5Z` ztkr(j^|dpn`l2{>UKOW&6*uCm;wi3f-37)Y`*MHZ3rw&pWVG{>kMb|V%q}r<+_#%WdY#45>s=r zItZvS;AjGgBPPW&Ox(2I6k+IFlFK7T|=0SCFDCfOIx7jxNO_L*U`L(;$oz9%GK#MT=i@q%2>`J#M_kLG25yW6rJHrUTj-{6B6z$ zMF{}u9AfN2%lR^lWS)Pl4Eh3Q8?^fU7=`vYJz&BX*L0RU$cjGrS70`vu0E|y3fY6b@ezQiQXFrKRj z9#Y_W{%vF0&c#(^)hSr5gW^I=9C3BPiGyyrlEoa$E^8LQW7g~^+d9LUyx6t?C#T)b z0VM$7Y=ZG~q(Okb0L#Uah(nFwz`&Q7#2LnO6~RLaJkP&vOxwA*imW;Xt94MEkGXNe zaq3C8Sjtk4MTd2R-!bco6Rn-$OkQkTfD@DMC`SnZIGbSn9BB}sFTrxLDB@5hI56-f zCUJ)GTt)DZ0?+er8`E|!t|F^W!D<~8=VRi4o2LR~!?7nFj5S!wQR_O#e#dl5`(&Nr zOkQkTfWBO#Dx?FzIRxY9LxTic&evjfu{z>VE;umoB_?r(@mxjlkOI&1ZyVEgF0L%A zPQhv&6z5{%eEaz7j3>_MmTFnZu_WK({f^nvoMi6|XYyj(0-TicY%$US;2eVS^Pxe4 zE$54{x>y!*s1qC*_!5&i!+5SDcu0Zg`L~T}I~P}$RVUz|>fhV*zHje8sSxJKt3#eR zqf>!Ax4QmIPRqRCG0WC7ES=#@UTj-{GZLTSf^-1s%wg<7ALr8>x71eRP$4)l@Fga3 zhVfiQ@Q?z}^KTo|b}p_gt4_ebydPWj@%@*Uv@icFZvfOYLmO^&{ag7K<9E!K<|KP( zIFlFK7T~0mXN!>z0Ot@)+$A&&u+{k@tSXj89O?uI2EN23&M=;<2p&@4dH!u<+Rnw5 zWz`Az`Te(6{oDIq1`)1_kc*LXFD0CgVtY5qT^vm<#?|=TFBtQK5{`d1QKe0)i zc6HL<-qjb194O&33t?QK;%xn6BUmJ(PopWYmb>p!eVxzw`fRjVF>#mzI56-fCUJ)GTt)DZ0?+dw8-tbraKjY-;rA%b zYw~Q)x_VCFDEYY~aq6ts-w1l?GWa;(e0m%iLN-q4!kdvZWJt#1P?N8wDkAMrEZlR_dUfmjcr8PREv_yq1+s3q= zi#vc-9*f`Mk-zIx`^oX*nXaA}g0A9Zz3R5omR0GuT-aiwV(;J!Lvmdy|gB@UH? z0|Q@T5@#6CRRj+y@I3#vF>UAKu3(kN;%|85-}=;kYP@oy%`-#N8BWDJgx zzq5M0dUfmjxF=K6V9NvGEP;tDO~U~9)%kL4hFBnRsbbQB0Y^;Y4CA?q;31OT-OclF z8`E|!?h00Utop0C+E0yF4zzh{NIJu*c&BjZ*1V9z@2mxrp?h_*&n8mm09zg)og0i} zNoE)Z`|5nTHb6(E#H;6-o*SSe zZZT-DLCvGrSLpoC5_CiL>O|+8SX~6RJb+)X`no*li?vx}`NUy9;K0C_n8X>za}~iu z3OvuhZA{xa-^sR{Eu&N%dl9c5XB>L^X&`ZnL3<5q9=*Oo=XaK%8>&|)I^V=<2yAZv zzc%%C8_pMND~aV3hxvd517BhiXBf{_1P>|jJpZ;aZRdO^+itduQgQ4hym6ZG>_9qj zOELR-HIIJpM(KA}Lkgl-Cps^pRTQ>2fM1*Xx((+`wUxx;iNkEbfq^eEi8GAnDuRa; zc%FaTn6`7ilWjL!MyWXV65cq>IQFCiw-mFVSM%uiZj^p!HKZVVb)xekT18=d1NgP6 zuiJ3GR9i_bo;b_~92oc#lQ_e8t|E9yf#>E_S(P|!jo>Jv^mNZmkuTFH{M#5GG@as@t zABXeR+v?(ch{FWIfq^eEi8GAnDuRa;c%FaTn6`7ilWjL!MyWWq#vLa)ocu~>o(**5 zK32`6&r_=W&XR_T?A3|R+ep~T0Dc|n>*H{~dRtwb4{?|vI56-fCUJ)GTt)DZ0?+er z8`E~qce3qf%P1Ac*0|#!hofKVs;2`TyN6Zt=X);mCNS1{F8j*Nf+ z2!S_K=Dkv{j>1ehJ1{$NxeoLmN$3FRh!clOM>qtTV0K`3;F29U`IwReAOKZ5!cmy` zEMtg;fls(#GW0Gw3IYf~>4*aaxS&plCbosDtX8$tdQdVz0Ln@nAixE6IyA8@RAsfQ zkJdk5b-1Y1O!0972XL*|f1i+wTZC8DQ8GXPDv>xqfD7t$XkuHa%4$^~ZU3eUsdzvD zngwxy0Hh zMMpsZ0VpeRfB+ZN>CnWsP?goHc3KZg1_(e|i30?P=c^7Em6|C& zPT&CM`HwC04G9RKYGoS23~)i64oz$eRavd-qwU{RAr%h@K(inY5P)<9H_T98RY%DH z0Z@`zESLej&JV&>IW2NYe!bU!Dr~~pf!TpwJ3z-7aaTDq0sq;I;q1Wdz~wq{@-Za`Kme+9grhJM z&JN5DT&@ErA5(Gw1fV)cI0~bo4_;doS8JBW%2#nq{xz?B6<7N$jg_zB;IE9t0Rqrh z#l(HL@i1`>dRwHuTlRMGeCBXbQJLZ+1`dF};v)`WMmPjn0zPN{zw=`D1ii5v05(fw{&p&$C0jOR2OjsNs!>G@s{V`7gm=$ z!qu@x$0M~U-_d$%f0bi|tK*N3M`}^Ns7D#k;NSufPeaIa2Df*8bT)l8;X)2F8c2c! zvuktKqFtH60kmPq86UikGQ!odM#m$yDBsa~YJZhugsbC^jz?-yzNkkT&*0z!FwcMN zv2TdI(EA;M&rpXO;p$kUzsfPf)$vEiBef`B)T4}NaBu;b=Rfw?H^g4( z{f@wAsKbqLb*$0xNG-~Dw4T~uIT9hy9QN}YkxB$%aAA9T@VlVW5g4u!D zfh%>uoID#ZdaMh@vJKzG)hMmmp!0f3IxNP+}cjr07s4V%pDz{ltSjxGSzMI1pAB)Dpv=f7>(WM&6GMh9?o0jMtG2$CSd zRpUJWZNnxrJMb|&fTIgQbrDC91PQJh=lO3NHksLhkI?}fT>z?!ID#ZdaMd`^f7`Ih z%np2v4&dklP+i0kBte3!#(DnRhD~O6;A3z|jSux`-o4f&^EM^Zd6Bo6PLM?7-~6?7-~6?7-~6 z?7-~6?7-~6?7-~6?7-~6?7-~6)jQz7#Y;&60jL_{00BrxaKjAcRdtjM5CA2`uP@UO zW`GOobZBB*sLEnD_P+o!?W+<pC~5YOT45md3J`#l z!~p_~>BxJ+*@4-C*@4-C*@4-C*@4-C*@4-C*@4-C*@4-C*@4-C*@4-C@3;dSUpcm& z*>HdvfW9gw4uLl~m;pX{JWN~znkEE_gK+~WU&U>Nfm!(~4h|bY`6>=!l#w_^B#0V?r+ zghLnrofl>RtsALD`63;F&I>bu)*;7EfYKr22!|j$fstC2?@nbQjLI>>A@ELMv=;aS z=zR$@fYxDJq#NN720-V989?hsYEiyO2cYx944`$$u@j(lh&aL_$WCCS7UjEBSqP(Y zjBp6N6ZlBA>@=krW_Dn9V0K`3V0K`3V0K`3V0K`3V0K`3V0K`3V0K`3V0K`3V0Pe} z?7+zPxzm?^rb4@8_C@!FS~TGY>A>8pdausD?BACUvgKzP)PcFr_uuEUr@E)K?I!#n z9hiGn@739t{rmDkw)`w(c3^g3c3^g3c3^g3c3^g3c3^g3c3^g3c3^g3c3^g3c3^g3 zc3^g3c3^g3c3^g3c3^g3rw-ufssK9hh8a#*)lo7)0F-2Ya0)ZP1$8@X6z0 z;u_F2Ay6EQ8$kIgZX*oL%2#o4*Z|5`aR{S~6^9`Jyf=YzQQT&rBv8JJ`!w-X@x%cF zeDd>3^S~Mr2MDk+Uj)&B!a$%n7&n0ORoq4xn3b>M;IILdui_9!87mG$0C;Z#<)XOF zKuMr{758c4sp5$P1lTwaq(cvg0|fZw@i0jZXqpfx4#o|jd=}}I5=zo<*PV^ zQO1hH5CGnrK)EPxGf)yJU&Vczc&d2f00BPvd8K(^jfevT*qASZXh2~gP#laKK=~?e zBMi*SS8;II0LoW!2&0UOgTn-~1G59O1G59O1G59O1G59O1G59O1G59O1G59O1G59O z1G59)QU_MQ4phc7IJf}l?|oqg&^q*=bR!(X0O-6h18ChyEy@?^0CZlM0klr#7~zl( zK<9-SKy^t!_gpmmrZ=|(t&0nmA22GF{Z zT9hx+0qDFi18AMfF~T7ofX)jufYzawodBgn#1Rfbb^;@{DBn}cTJ=}PGdQ>a=yit~ zK;y(? zQNE{?wd${oXK-)<(CZE}fYxDtq#NN720-V989?hsYEiyO2cYx944`!?#|VdX06H(s z09uDyb^??R5l1)#*$Ir)qI`EMYlfK}m>rlMm>rlMm>rlMm>rlMm>rlMm>rlMm>rlM zm>rlMm>u}!9Z=sNPL95pruChDxlUiu^4ai|L^Q`K~ z2nawG5(fxyL7fgwYztLct*U>WR^yQo5P-502MBOMoeoWG3sqUIYW7d{#B;s<8};wa zv#KK_AOKZJ93a32bviV$EmUQ-s{VCajYmd60Ln@nAixE6IyA8@RAsfQ*+112&-M0i z)W0{+s*a3+08}AyfB+ZN>CnWsP?goH`qybS9vJ}vC@XP*02kEh(8RV-mDQ?d|5Q&r z*W14(|H;n|%nr;B%nr;B%nr;B%nr;B%nr;B%nr;B%nr;B%nr;B%np1T9l&>m0QCJK zahUA}2Q$DYkB5nCK+}XkaWHNG<*T@jFfc1$#lc|%C||`Pj4~1j2=K}G3(Z6I0A_%V z`67r06b1st!B_!k0XFI(9YPTY2=K|{VUilqG$BwNj2l4tDsCeT%*t1BaM%FKS8)iV zjKl!~eDeK5^H4p28DL|+2%-Unfk1IERsdRnje1CjP{aWOeDZjhqy{ui2owk722j3= z+Xw@*@>LuhHh}U~9Kt9gaex4ye812_AAiyWzFEkI;1DF9e=8GU2P#6dl z2jc{@1G59O1G59O1G59O1G59O1G59O1G59O1G59O1G59O1G59O162pqcZwri6@GX) zQj79MLCSar2NwYSy)VoFS~u&{eL>p*wC>aN4~32aYR6fDl>Z2KRuL;sBef`Bq*BH+ zIJf}leF-yw*3J5KU(hxHt@||nL!o1U+HqDOpo5YQ0N$-cAOPR`HygC6|v$pQj79MDrG!_gA0J(moNip-K6!0f>6!0f>6!0f>6!0f>6!0f>6!0f>6!0f>6z~}3L z`hswB^yM)v>Fmqwi{B0x+y@8^KIl81r-0?-VJ0|dCBPKPG8g{rJpwbMFLGC%;zN*o}-1$8rlMm>rlMm>rlMm>rlMm>rlMm>rlMm>rlMm>rlM_~tsGzI#+$ z@5Gg_;w~0XB_j?HfWChv4)twtFavz@c$l~bG))K;2jd1%zKYuj1GDl~92_=)@>Lwd zC?j!z09W2GC^P~p0~W=>xB--};x@v-tb7&M>i}PXEBBxBs+wU2xN?0cN&_kj0>#0& z0hF)eHp0NHd=&?W4WN7#hcL=W93a4L*O$szodS#-;O6l*3V<2l%KfLjXeDug09URL zMQK1~L7+GoH-Pe0+(sCfm9OI9umO~>;t)m|i30?__;|5T^irWYSv+`9O95#URRUE=7BXNKLA8UOvj|pZ6W(Q^m zW(Q^mW(Q^mW(Q^mW(Q^mW(Q^mW(Q^mW(Q^mW(Pi12k0wD)ZNR$VJFa=w^z4QSqRgc zw^xV2JAvN3y*iZD>#(7N89;x}4Ksk&&H8j-&^7?A^Zg^q$?b&0PGF=K<-1c^2%~b0 za0t8;7^y}1qAX=RgM$kI?Y}d)S>Jg7sO=}W6An9pky@1RPGupC$}z$r@J?W)7Uhew zl<^D>E&${EH`GY+MruYFfN#G(n(E1|gTqc>q!#77Q&|Y3a*S{Yyb~CyMfsvEWjup} z3&6L3zCsUaz9T(F7=WGnhoUB!9he=M9he=M9he=M9he=M9he=M9he=M9he=M9he=M z9he>XmO6m15&_1(Q$!em8PD$*v;$yrvoG)%IzRxU z#9-A-MnC|BFksb8MnC`)27^^I836$h!hlsX836%I7z~sd90247Y?!^)OOg*D00kos z5MXbe4;(HE447^6{JTz|D+W-D3?Bw$1O#a1h9MmwfKg(wY9=Ef074kBY9=EffC+=a zs+o*{00?2gs+o*{0459u$_x$wasxKZUh5^v2M~aQ5eEpcx6TI+7X=2)wt4yjK*5Lu1lU{W1BZ(O17_Pi|E?40iUHIj!-oMG0RdXMVMqrEV3ZiF zn#l+VfDi_(n#l+VV8URqY9=Ef074kBY9=EffC+$1(S~nR10rV_Z&13`wnDI;ps4sv3Mv1|ynT&t{2w}jgnT&t{CJY9vW-I)!%QDU%aCLuV8iUSUSfPK2QvV<5Qh*O9LxYJo;QXIVF_g0{Qlo{g8D*`T10Gu zWCR3g<%S_0Ab?R~uxchFAOJ!buxchFAb<&j!K#^zfB*<#z^a*yfB+^82FeT$0CEF1 z%wFpy#>a9n1CR@G2(iJz46qRofqQ`u94>Om6wkDmLzrHN4IRt?u>%+e(9fa1odD{f zc)c84dL1@&Faz}W1ug)nZa1Jpk8mjLj396U(7i&A2)w$Y_&EV)fEYEn5Qh*O9LxY4 z@esHd_`u;JhfMKIdpU&Zb=c6s44~HuW&o{2eLDe4hlsu0$qpNVVFu7WfEhsRP~T2~ z(jnpqHxdMS0lIqN)kvEFaex3ZYETk~5E~rK02}cTxEJ`q;Ub4j@l1O;gz0tI(7_C# z*9m3-twVh~0ZNC6z1+zT8-Za4&^>?|K$r36^(Ga z1fc=|0rWb-44`$WZzn+M5V4m#*e~rWIz$}dMuH$Oz;$|nnm-7j z>7$|%ZkHfb03d)~Czt`W4)yH>C>nfB<@(U zC>a3evG7vMTQK+PWn(DYH!2)9cRDgY2buM^AwT8H{}0+bFBd%2SxHUh&8pnCu_ zfYzbDodBgn#1U>J2=W4S^}wrtWaT~hzpCkq$3WQu3n%OOm!!-fuK z0KHBy185!U+X+xQMC|2GcGw6EGl1>^%m7-4`gQ`84iQJVks!zm(A5L4M%o0<;NSuf zvxkUhaCJ%jW1lQs~vsBb4g=@7A(JK13)Fw6kD2QUL@ z9qQW&P&z~$;YNZWFF;ohyc%f}ID>-=K+GN@p25{6^^bkBaFI)jX1^)}g+g0Hs63UhZUvjleJi=pMifpmnHkCqU^CafBNQg1i7-J@9IzP2da;E&wrm zhXCt!&3@%meFujbtA^2bwX#XYRm9-)0}FKCJUc zD(WV+_@tp50P$1QN8zwi6Kr(gCfmo=^cnM_m7lTrU2FEiLN@^B`N!_~RAAR_;b#6V z$HT&Y%aiPyn-_&{0L=4`-SMeF*K)fVO&u-kci#*gdw_ZVX@7ho_}y>i6Q0|!tg~ro z#RQ+b19L0)zqi3k&!(Xj6MXIt%&k1nzxfsZ-VO1&xBaX>b)a{-r;fJFYz8%B!r6h@ zf!TrCf!TrCf$ypV^gaj>KzCx*Oh!O}Z#$lz-TCbOitE!#Pk;QQIR3n%yL&3SFKfCY zyBuR6(}p``^D5-JdJEcNZl4 zyshcpi!STU_XOS-27<&z z$It&P=l%5`x9<7+&$90B=YO*5?(S(tS5y~7Zn7xhpK`F@|M|5^3f+H-iCjQJ_XJ7; z=m4meGq@M{WPo^o{y-IEYr6mLZJGORS@-UPWZmuU;N#bId{ZyWiH)A zkjyrNSC;XeT$qk{)!52UVcr(S<+Soy_j32*?=Cq0it-AVjV?%r zcXwYQEZkL*n=DFre}BojJy*X4D7y0AfLyX`iT4$h1SMWZrj|tZb6;{8+_Uea$q%yw z;l?zTSm&`10`5yz5eMTnz`d)b)bRfPl#)iyz6B_{KTw+E zvI}!b`URWCf{e9J!ivW>#0J8Au@I3{>I{|c8qZGnZAucAO6D{LlIy6iTe?&(VXiL1fKrMAYI_D3LdMy znm^B{qRd->T&cVB=Y;Hnx{K}!oB~Cck>0!6TJBnR_fixQMVVMxA!0Lp>#Dl_>40w{;u#naWC&!(sPIskw8 zcb$MazxWgPFLa7@!662LFYQ=xR|U`2`Olx{F#&oD0Pg+$U%5!n)n5S=-Ak6qd(mY? zdni+NeSz0H`Z6_n2c7-!?oX@_$Nnbjvz7bi27k}dZLZ^x@>SK`X%kNmhT@}1F z=j$cpFB2$l0Sfp2S1vMe(Ir!5YrlU5rvSxEkoFLa{oLIbIB`64R(&1ONH6~2ellfp zv>YUH_bg;x#5o6B*aHcjphKHn5v>XphGYZ;P!5z=naLLrKsnqlp04J6_FaAf@Pye% z5aZK}zoS#mpIK_^NI^2k8#Z%Sz2A%d_v9A6oj9hR@jPd;fQUHz@vGGmj zevZFQE%Bb4%-&{R{GB4`|FqH=BRnEVod587o@bDM z5Ww7jQkjvnUjdNrX>B&7OOW;ujQ!kQ^96wOtv~#`f?R0(;vc#9_up)_D1m}R4o?Tc zq3ANwHJj*J>h=*JfKg(wY9=Ef074kBY9=EffC+=as+o*{00?2gs(ICD!kj$F-}SO% z_2J)TcUJr2A35*z4~i?C=MlsY{PjY@$XxmHS)Bjyd7cXMFuVm|?w?d>;>=e7q2CSM_jV8>=gLm--0H4&DI{(Zcg*XsX{mNA0f(1cxNnc5s zxvSpa)cN_y{Zs3P>{&o8zpoAx@PYL34rEIf*l5w4$(%qU4l>nfB?ELFaw~HP4JUY(9*){ zLfZv)7Ni5voDcE`G#(%fR z9Qcyxr8CD``CR1s_m>s9k$vYB%^}fOD=jr`A@BD~CCag$i4WWx5e{ZuzVqJ`WG?T6 zGR~;=&sv*hY?QOP*5MlAb_qfS00QW~zzl#&Ho?Dzg5KJ}>OwzakO0yF==z-eM@Rxd zr2OIEv59+PC3G45@^{u{=9Igb>`#B!0UkpwTt&osZdUhW2nLS60s!a0Q%bq`o}0yP zzGfH(cX$74Nct!RTS)YRVe(DsH*ZefbSK3!-yj6;*@jEHd<#!?c~I`rr;IbwbG8}A zM>!jBsSyqpjRYZIKmgqrm;q49CTJGedC*ro@OPoB6+Wq!YP;sJ`0($-C(i8FtV{PF zelNPp)HQ4;`@^3(F9JM=9PXjIdglAD5T!VluK)^HemmH4&07H0MXd8yPpD4nQq!|( z_I0dzMn#`7-(}!p{JXZsxb_tyaF13Qb?LGCdxBz%K4qMdp0mv`KFZm63yg56Xe0>v z0s`p1zzl#&HbL_S&v}TW?zr{~0Hg!Z^|_FLK@vbNiw}F^zOpjw;@!RYyUTK?jDBf- zpWAqEA%}aYEf5-u935wHR-}Fy!bnJ=1#-EH@=T;yn9DEoqx1f7x%NT zi2a^YiOl6M03w%9M-&=e7}$8x?zF$w%HaQE*%IbTh4)fp-RGBkBQElcM-1I~9!~ay@pVgM}H9c1$JQpr{ddpO~ zN2fB*Xlc$>lEGol##>;7LpdWs$QKYm_XTDERI&+nS!2oz2;fWj2crl=S*PsP{NnH2 zH>b2D_^tMJAO72T&yn+w_Uh{WKR&tfOJ4yP?j@E8ZsUwV2jKnxgHZ%A=1I?~(Pi;* zhu4P9=Qf|bnur@}Zn4la4aiIWBOJnv1R-BQ0Nodu0Z_>%*ylZ7POR%2f3J!cf0t-; zN=pKF+O6q1lx?}YH&5z$1i5)=b-%iBMw0&d0K-t8& ztD+Zwmu7QH3xc0&-^W}tJKP6S&m-#jvDN+Ig0aHkCm-K(Up!^Fd{V5=kE|}*gZp1r z8#wpyLUiK35_LlVM!y>_&uXy_t|H=Peo32Vw95@SDB1{zFe5?87Z5=21(*R)$tG|v z|E@(bjQt8AaF14rbO5@(A7RhH#j$Txogu`2%8Gao9KZNyoh_tm^zX_yeE6?8_q%b9 zSvqsUWFEwPe{8t+ZwJyB047vAEQSk%CZ1!%g#Tr=xe5HSDslXjaKicjJBOtE2|pXo zPigSB_4-jXDPD01v=cxMiZ{X`%t#RO1q9H00cHSHvI(3^v&<>XpFfay;GT(a5a|Fk zS8Y_MKH+y&?2CW!wN>Efu30tXP#A|+XT%EgY&Sj3NeH$=&=sg{9w}weC76nzonPwT&EctBPm50 z;Sgpd2>Aj6=pMohfJ!!jclo7K!1t$9>$J2czf-$CMB6Ax zTl{uO}8g~yky&)nBcxO?XIySG36*RiRH2wy541jG{u2%viiGr;$!lkd~& zFJxSsLyLZ(erL`bC`fFjJ!p~R$FV-E-(TbC5!+h*sQn#&|F*&EehALs@`r!Ng7e@p z=ZJTH9Ge?Qa`?$7t2j4Y_+0eC|E9YA>A#;{d)4zTza5>l5Rfl%fB?FOFavytI`ip0 z_S3bac?%F)^c(ftL9~s&?1|j75f)uW?0Y)bzWV8Srii<{_ILQhcLrAVTPP=U`0(%8 z%!T`NtXk)dHhw%?_u{j0q@0&Oy@PB1tUAJLTAt+R7lI|B=0H)6GY4x1!TU5UV2yOe5`t2ZENkJm_W`v|m(D3^%L&SFaN zgXiiDr|%4`?#~b=bIt`<=ED6g(qr>LXn-c(SDy`L0M9;Q6oKd5`vUV}KXUe001_8( zDM;qNP|VEjcNZW3WpFCO!>2|B0a=Ix1kgQ%8Q{CrJyI6~jP0;*0V4O4k)mx5gDli{*2D z{4GG{_?y+t#d{0_?rVt&Za2{U^8Z5QQiDe?tqKA%5eEpMdk8bYcd4UK@3Eh$9p)`S zt-B;RKiA|62H8~A>%e&x&aK=>4oYp8yQ zKfm}pC9CfKjxhqqAO0)dbCjk2rNKqG7gvgn??x~kv;;A?dk zomN|4ehUy~?vcI~0GEId0`3b^6b{Bw(EjvaM=Uj{5C6^w0`eyg5P)1JhYt94kAp{Q zWagqJzXeE6zq616c^HCZ?w*91D|eeC^8Flb(iBqrQVEYahv(?U-_fMH`yKJ{Bfi8K z-F+r?-fS)R0o@nv8>@aPDa<_b4+1c^UyBf21};d3{LO0N_Cjx+fl96F!@mmx0Rf5c7Bv9BSdO05FRP!drmC{X;nl8;}c9nDUHLxV_MGXCRc? z{NK@100<}`aex52hcE+tw>t9`0NN2jcngrZ-&D(!(Fe&oUhB-2d(oZ4l|7ohbv%@c zPx6kjFa9n7RCjm3m3VN;6ziTdsq={}=%R3P>CvfbJp80N=0fS+z3t#eWNsTlD9O&VRp~b#H6B_oB=C zQ(jBI7N>-CJe2U99X9?>;LoCiOQu}SxZX0U^JZ(g59q#X-(1TRl4PdbfBd81pI3Bu zPeu1-O;==Rq5DoRlUM4^H8r}C^Nq7 zaoiQYA9`pea(DMi&A!L`5;8fG05`jS3a5^XXp7(tEJbsUdmJU zy;;E!sAXsYDOJ=F;h^U%`!=Auh{O3b= zuXQ!9s}CVJ9v)m@Yt^f>)?Dbp0d55xt@2nbKnIsYUp}h1G(FC`S&3{{qf~a=JwMXD zuZvvL^~T#VTZZOO>7$Ma2R&yQ&sE3NBw!vidYLu}*a#hfmF5x50MSxs9_jTY!-r(gH?Mn5zTx&N=Jwz%*`wS&cMMMU;ZP)t zPY~9ljAE5#oRFr+SvLcpfeVhfso)}nntsHZcEA?h1YPbNUCZM=FwbCfA z_RHfu%W~H}DE5r{bvakK2ltktnsWkok2)d}XBn@nj;Tq&M(6;nG>>2gP__6v`uk8< z{Us;E{V~MD5I+bgj6*-gIEnKvxL9|eSY4k}`U6f^(7l!jug+HcJ_Iv4B^Y$a;tO;r z+5^Y}j?P?07A;sbJcz`z7Np|7l5ccW?#A=vgSZ!hDUGAgZjQ;(eyaGa{lua%Dt|2 ze5@v*xH^;4D9-zp`ZKDd#c{TlyY?;ijI_>V3DTCKnsYFBSM_*~Bq?5XOihA2G6Div zX&%805G{3PU*nnFQ_jWrYyJDsA?GVsp62lot9eL$WyNbWEzfuHjIVt=n1VL9y!w?v z)8nk0Rh0=t?m@&b?FLkK#nlOwMseP+cOFVN!kL;wpq8PUDOJ=F;h^U%=C=Jx6h{vxLYGk1sr^LX@{qoLy9;0Eg zWzy3i*2#}#+zG1eimTHpjpDo?&miJDS{y6g2xqc{KrKTxQz})D=SY&`RmapMxFaJV zfR*ME%mC3+XZCeB+`n3G>Hn*k^PP78+odEAd<8(9gU_Kb2k18mVy#*C5Qj+@2e01v znsp?2@CJVomtf`=kNF{bn#W^wHXIGi3=6O6an{YMB(X@%*DA(GD!bz9OiH6T@7FsI zr5oW)%^^_BP|cJo>WFaAbC&U3bxchHHbMtrrFjH1K(y4EeeQ<)vEdf~zsfn^Y4?Ap zK4h=(ivtyx|Mj?_!EX}8TC-n1=blB)4|w@6oU^~eDaPPO_#PlC;JEto?{erXu5@G5 z^fG9Y-(Jq-3QjFUHB;EABf>$?S;lkKF*OO; z2pxcx<`K*Q(Nbsjxogh2kGM$WzkfbOBxg!*3b z@^9w$0H@-U5qysj6>wPm)xY3ggcemZ%tlR*vu<^Aik^F(dAav^Eo)SE#nlm%Msc;@ zspB%wjIidJaCgm_TtVG3RI`cal|+ObHRFA&V`>tx5jp@X%_EorqQ%zYM{PB~I;qvY z1ZTs24b-_0e^S3fjOP%%3isy8%<)@*%$1YQ^LW6s0?p&K_=HsEq38}p26Q|E7I4S` z3T08L-0+$nXXp7hZLSA-waUG$`4(ql1#!#J0+w)T?-Jpl_qB}Ys$*&run{@{E6pRA z0ivbO>~mM#KVE6L^gjJ9S*< znNeBu%=o+oXH;bhwBeEvTy}K#$Ptk^%XnRNOc5*qG6DivX&%805G{3PG3WnyCF}A( zA*$!H6q$XV zFqIdPrpMVG&VQa!xtFrHkeYzv>NrZHIPX{L%BYUEJkB2FuHzMZMpeu}8!idKWk+|9 z91)4LjMr7i6u|;mHIgss00C4jzK*63KYJxt_!M0Y_x>f3XWo8Ky(#XGp_Vds=-yYs zOnwVcxK}R;m#w+sUW-q1cvqhj4n?+(4mnDZ+0O~Ka;P*t&brk>jx#}?QMvaAQrQ(( zM^qZcdB0LuMs>91arP*89k19ks$vG(a7hR*JGy)1h)A4eyskQ?2o}Jqk$g!92%u{5 zb<}YMnrwZMXYLhF z$>%}*obUkACOD)p5FYK%328?_;((^d*@eKIdJCrs(QV74*wq9SS3Dw88pU}v6_#Kf zZF#JGdpQ#|IJFGbOktyr2nRi98P8S6)FfCpk}m}T1W>j3I_heE3_?_xKm1o!KgDq8 zKmNOQZU1)QG1ONn-M0Xlc|e)%JiN)w?IC+br&m9GjX3L$S<-VqX5T12_~D^A`t0J; z^f>GGl*o1x73af`{q8Ee;_8S>qd4!!l|@`fTOKRlUd}`fPAx+hj#&gv% zH3`;@z_%E$@H$cl%t;KPhwK%7=fedr7iO-=3+jZLWBDQ>k_yk+j@viV$oMaqtiZNq0zs z&f6jR2Kg}z=ncR^<(eL6-70pLeUdEBiXXMR%C5NDTWJ(m`|UC2Sw(F3OXf_jK(-9k zOktyr2nRi98P8S6)FfCpk}m}T1Q50O7k_tgHUEjS#L2cQ0K4;$Z9w)_*I=afPxY}E3 z6j%H0G3HrCTs$+eN56({lIyYf~w%Rw<3*YQIa3%bYT* zG6mXjNeC`Gx_jh^NStN7t~#a&7Qm{Jd`SlgP}P!O{2fiz{3|E+ocZu?bT3(U@%yvb zS1GAKxOAC$B);ZegR3(>q!{k@;D$kWC|oZ(1Yf5t;}8y4a8>8?o@mzIG zO@ehJ`BD%-0Ir2z{7p4~Ag`laXpWm<`_dnOA}7hb5hSrl>PC za|i+xGr54H5RoIoLC;ynbJa0LumDz#2f(5W@Bwx}20>qFmvwiXBYW^u{ zn-Bl2dnvk$qvwSTuWb)&M(wRv7mh#u`{?2S6MpdM^y}c)^Jp_4LzuYIO+-zPvt6gH zsb)9AcEFG3`Ok}Ld*fSf8Lz93sa=;R72Um57CL!I{Eo%8BffiykNF6FDzUusylz)wg|%2p6{wYONe z5U{6gVm}azr@jIm>viI;IE~z^ajaNe2iJLpCq|T+PoV?Nfn3sQL@2 z%Y%EJ;uXbY&~{bstylMYUx#$Wq2_wW&heN_$oHdnjw{`S(eyaGl&>k^GDMzwxi5CW z6d>2=y>l&{?&?>Vpvju146d{w)klPbUS}E4RmT*;0$4SYFX;dQY{)s99he=M9he=M z9he>XemYRU^Dyr{=)D3U9OAo{&?(PR%Ywv@0mzu(`=kH=`Ira)5B$H`2mN09P=|9w zNW~9F;rm`Yy0P?R1T=i`QJv*ZsG2iDQ$Vp_V+>`H^^U@4UHUp6+;Nu^a=_@3N4POL1OP`v&gOdQ;wHi1ST@$9JlEsRwx_%KbX$KWx70A?(rWq5d-M=q6YHYR10L=iB5xAzqj7LJsBIaA671j4yaS59{%A z_oZDUzaDY5oEZ+8iG$ePLoVqf|3I_E!3ym|<8t{&Z8@8%wS#wH_*QvOir3-0m_zwiTwDT7+n@NdCjM|YYZTTaXu{dg4px@~ z##njOb*CLqaK+Ue-deY8yf4R$p!YW|Qj4Xyl-T%2t}3BQR=>@0X5*lHP}V4;E1mx^ zb$)Q`{wTf^v@hb}pjLmIa}?)49q2=zRJy0$mRs^Y7KV{ngJ=$Qv$7A5#0$Pv-jm{W z_%7y9z7BT87%oe^PBftv}~l01_Zso zX^~ni#ihi?H*!@8RkC`D&TJiOK?oZA%8yq%|MAH&G$1~>v#(zM&5L+Er>@#V>XN%1 z+|ceBdVV-MBVENSqJy&qUy4^WVhOS_l$-~>_mOzX*W^7h-lp$L4&`e&*8|^Yv?88) zPq`}`?Rt&8A#&;Dr#Aprmjik=h0pZzz1r>7Q2JE|%4(L4&*f=@VDP3Esl`$pvXXD& zsuFCnW`fRKIW!U%H};huuXX;zZ(1WP2uxFJ&&k!ngh?Q-9JZX z4!#txL~d6Aw$+t`ssbbNlCQ~oV!Tb?l^n{~aBix;)tGTQ^qxyk1@#(vL*&xIPfr6@ zmjik=h0pZjz1iyx(YoH3H}a<#>0LI#5P@2x7E5u+OumV$>NCmO={a-dP=7-3VPjwU z@jB=KQJ(cMXL^VeKj!k%DfPoc>PbA|)jX2UOW+$~WT$jLcryATUXg@4%Fv?KrL)pJ z8i@yA$_p+h;5+F+6Q_K^DcuI=bHrw*Y;KT_b$%wO*2t@4@ADh~ZkNaHc+!tetN#8wVGrmBqgDWB>X0UZFOCSZRpn zVFKZ19+uwghbhxTjCaVs>5%&2F`Ci=w|k~MIn-|evQxSreWd7%ctz6ozu4?YJK|U1 zNIdw`o&l$Fk`89#lrK1?+u(eT*vyp84OY*!7pLB@K2PN0vp$d4@9jQ!dP$l%&*h!G z;lp6bNN=(UCV#XI&85JKBQt!#nG!gem|8F!2Op*p#=i37bP%|$V*(COxL&SOYxL$ zl?w&qU-|J$=l@N7*%v0v4{73uSYJM(VRVePV;I=;pLIcT z2!iyG;-%X$u*o;YD~eD@8P-UxXst2hk$B{byu_J2X#gD=!6{$jlx~#^Iea&f%n#DZ zPEP}Njl4acf4uxB2dm2gvp$d4?)4srcF~X&_vNiTd;pe=^d6gFc1vxe=4Q+GD+pSi zT}%ljnVeQI8;2OB634#s<8{t|Xh`)C=370)qL02noX}WpIOT_k&7Ogde50&vPj9Mx zL%itXA445w*dn#0tqROYJmZ`48V*?qsQOlXO+2Mr;bKaAS-8zy%N^O`|_MYu(*e|ndT;&;)+{9spW;G5=t^TsbnUH z9L37hLeQ=M{QHQ(k$@kEOaOL0gWf5^?5l?`-RdF4iboX(G{!i3(=o@3_wWcE`6hE@ z(&I6bxxNsu2%6p(Q$(Tonp_@_#52Arui;c$Ro{xQiKlccTukrVi9$ke!`aTS@43&h z>+#WX*$fA(%K@`KkC*Rue^)8_}LduyH-LMZoeq?iAg zD*OoG5W~563Ao!a`{etv*fQnsheX5oz}BYqY>YS*-_n{HiC6iCycMUCs`?gvt9VMc z$fe}IpQs}?*Bx%WzN62n?@ekITRIQ`mn$LoME>9T*N$A>E zH8&U)M_v(zmJ>@QVc3NhBSAXrmpuPr{`}z9{jqsk$VDIMCw}>?AMx_< zwq2RWA^YUJ^UYVL{Qa0n__}!IsykmSKwT@C)g$pL-;lTBavHv?`WAhwcuKd(rKG-} z$RC^QPB&iP(dX3nD2EviR+j@ta~>_z4j4fb_3gcGt}{Oh&UnQ^{VahJ zaRcU%u;8oAu#F(cYDq{|+MEPKz4y9?hV znajI6A}jdl;w^6_5+r)Fax)xR6l5ZVP=@vQkHv_E4%N_m<2wWDuzx#h* z@}&$tat>#o{~vs>w*S=MZGP9QA6%mLdanDx5@n<}*aX?j9d!}({|WB@{$Jzg2aoUn z&u}OqCwu=taw6Hh^o_<3|Ht*fUwDfTVEZ3ehjfu}k5LzbcrJkW5F{QZ5N_sS>8#V! zfq#B{EJ1tE|FO6fbj-;Mb;y(}9Pm)=^;s$wLVmwdjbt%Uof{)a1Eup^Q;Q>NPWgIK zU7yma97?ws4-L!!J$)$$99$NKv%cf`Upv>P-7C)%I5)-t>+{^uIFKt}9*T2)C(i$) zc%G=wKkCbQ7tVh+FMMfnch&RXG|drj{<+OOc-5QVuJlkmd*A;bic3L;u^T=f1-n3HKOvA&TcHevtSv02vdk4tji<3Qge$_#HaG|2x9NfMckHigfYv z#q&KFubx4@K*A?X=uBPk6)|gSF-MhG=~ND-3prK1RE%#Z>WbZ6{r=Ax-SM|?00Ng+ zh66?p;alL3{|_y_#4O2iaFka>Dr{n3l7iml}P)vM!X;QyZKprX|`%)*!+sCyi*A!nVd&4t1kf91;-x1_CNO> z=1925SXdZ(n1H_p3qaJ>k70vV4`KS%L-afNXonob57JpZ2Iw-z`!P%EJ9HE;y1+LK zk)sZ|zao`TS+9KH?{hdB~hi!k&RCg5+OU;tY_MvwJrcOf2|uK=R_=!2Cv z0AY|0>LCnL`otjzfvgMEF*v0MJKrPu^uaKZWhqwl3>|!z!v_m=DhF{&2aa-pBVU+_ zgDx0-KcVu2bh6XafbDZMdXC-z1P80j0ew6QpXqR~wzRRW4wU0_c}L&+)<$}BO|bQo z3^g+Cdpr$9y=`xb5lX0%)$=fBTXQYNxdyQP&wYnE5?G5c^ra=>Z=q6vRCjX=v!e&s zgY}nxyaDi`aZZmhk>Q$Ljz~IhI(RXzYQ6{Myvl_+EGcM_Pu7v|Qux4=PUQfnbi^SC zaqxu^TeaG(v5k5!YN5ZQnFuSFkI)iS-rtz}m=7SoTHJT8WBaZ0@m$9o$mKHRS7ZZ`7a`OK4M z;Z+bK2Djhn3{$7bWYSkBnRj^+zNH1YCt+|QigX?aV zaUpaI6Pn9-yti)Fk&Wi;jl=8Nj2%yo>*kJ$o9%AgY#%u`8`s5h&MtN|Uzi(B>#~}s z|NKzUIp>mJV!80USUEu@;j&$}*4xQldhI;Y+9bHHuj#k*`_AU2b<^C=x)-Uq4f!nA z%@0eM?W{SDp}VfR8B#OWvagNqJmu4He*I71>r2PpUd3fz%3UGerW!Ag1Nk6sOZ`8r z+ca*-7gBDHH|?VW9ceVDuOD7d<`RBt+%UIK+-wcwX8TC7*|;H=vJSDM`4+~yuJy0~ zK%D**Xr0ST{v1n{Kg8m+a48A>Uc$3oC#ysEn?*B)eln%YU ziosjVR4%s|>7M>mL2qJl*m$`tE4!6k`F69uw2yJ}?sj+n+L7s4*1%KahPi8&p*4(~ z?IXoz-Kv9+gW!b8Mh;!#o7d63C(_+o_ZWZcV2VtZl*MKT?@QDPx&N-^HjS_ zD8%9=Z8nX#T{}`^o-ZBpaFvdHxayz&m&$Md`j5M~#rx4M?`-ELjvMpE)z(AGjmu7s zc~^G=*CFXx*3G-`0L;d1Vr`zLy%{%KONh5I#x1Y=_|0$z%fxgnWh34`F>W)r&C|3u<7WE^vDvsyEact9 zj^^v@zdV~~(oo%vbL}j#(7lN*Gs}_oTTArLUAjy;e%mLyuCMC1DX#H8lrQClb=-XJ z!~Z42a)%hm6ZjCT6GG$qd2eU=IL7X{=Kc`ZbhYejqnwj`=F-ji^*?*9A0M~U!LL`_ zr|U~7;dS{-D+D&NaZKC}gD&l1*w}cu{vs`E$bRA zjlY^hyfzOMO|uL=bo&tvKKh}vSTo|U&$~NYj$`bOYrbby%f2>x|KeV|mq55)R+Khl zGdJQ*>Zy%+zOm5b8soatoo&6^K3(5J4KKT1uR~xHTYJ}57<7y4>MoP@-PS1wTaz-J zJeA&g0gw*2-_c+9&v8F+I`^X=6SijgX0h$l|9s7}xu4;(?)dMxxnBMbJ!Pvt)ra9Z z7P`xX8cSUz;WDmlEw@j7;kTbUZiT|Bp|4{-)}GM#x{eE0Uu##>tg$1Fa=&lK&SFjf z*U$SqOUE&G$2Fg5)htDm009C72oU%$3Vb%NXX5(D?N^Q|zot7QRe60_Q=XD^InbQ* z_z~7c9G_w&fq%8Y&(Ck|`o1;qgS7Fd+Gp0u&zdYZ*7s?iTGM85O}y#38z@UTYYBIE zq=UdeByj!w-tOu9_-_Yo=h1elli{Vs(dpf|YfY9L*RK2Zjhgs)in5fmmT-K}MiO|B zz~%GTmo@X3Uq}8OeH}ky4a@G=#@7YKKZxXnD{#Y~r zIPx*Nb@-iDsgpD6fBXl#+ns$rAHL;f9-o;foVSGIyEc-*dj!7y`uve!0et!K`Rm&W zdpWiK!}^-bDeGGfu_Qizz{@<*C2P8%f|z0-+9meGg^-?R#jPQU2?D_NT14)M>1~ zbJmxal@lLdWUBvDmvDUFMiO|Fz_(kvV~4BTDdm51{r_?L`ikR{xNJSXjZ7yIJ>HoS z1pX<3%QgIK*w5=(?`xHZlT-fwj|L~NZ)J^%*R4mb6Nw(}NriNO6DxmOQI>VJ34 zZs*AI{QAZWwh0iJ3+%l{j%7M~J$$5mcgO5@_Un9QeSHR71PI&_C~pqVyYIYZc%*!H z$Lw}?OWA!7FQxH*T?b}^z;S_4UgaML=aZ)YWO$@}cgO5@whPJSf9G%bC5;DwR!p0~ zae?l`LGx*oj|?3t-`z30o$WGid1M<~+qv_$MzjbJI4sbAICwto%l0F~N6L41%x-71 zj7u5V#F}R5y)-dR0t5~U3||hK&)R-vsMOt^m*pwV?wH-qW);^mu!%Lz(tB-Ungj?O z6d1l7JfF4w%5bT>x7YuJ@9>s&{G2ablmLM}fnz_mu^CnB?(OyeU`G$S_l{g20Ro%K zo{}6IHP+&|-DTeGY(_p9_n^B;q)C9lM&QNQ|C>A^FxKL@-ObtUY({$h-(YAGI9cGu z_y1g#qXIAf``;{msm1c1z_{Jb+3jpbUdq5G z)-+4+rHN@0AaGRRwa@>|vX@#swf^gm1I_kW|K_;OdqoKl*cW*1^S`s4@2P*E|D9Qj z5jaU86j=Ikpn1AG-~X8neoy`XksknNd(6-OKK?()5hZXy;H>Z0zN!B6j|0sQB<^$e z{p9&*Q33?E0_Xg`#=Gi&`Ej7_s$=Bd?~}|&ixMEPD-a(FUM#Ay{&)BDYUhjVdwfXu z9|K=qs(A$dV*<;c0x!O({s-#+<@G%}q`kj^{$uCRatXXBP~HlB;!VG1Pw%h)@^1lg zPtU#Sm)3TF%In*Qj|gCl=?{kP@+iJ{b3E_qFaIH?GGsm8sJyHSzI$ zMiThl1ct8yU!MJsf=i9PtN!a(fy<7b_|c!;DK+u$c19f!f#U+hk9_>=>c7NPqj->7< zG$-|Ha^Bf2R&5&HG%kusO;${N$_6pmV4lrsE@sehr_sPiQBQW}O)UK-ZLY00$6757 zv8kn$dy2!5v*N&l3X;|}XcSq3l>(!l>@1y1!tf{qatdP0($+o2Vdz?++3BFbPP_wo6Qq6BkWtbErygJbl zp*7b_x4!pKK9pV+4LcJW%8%{1Ups8`VEI(9GJFjJ!kTj2Kwol(Z$&+g89u#nn4$1f z8xwK<`BXOF*Iv^6IZg52x>U^?K{Y8CY!F-iK8}U^5vn;V&!YujkaZL;5L5M2XCTR& z(Rd%ChQu(_=0g@tU5BstQq3yOBoxRbq>N?| ziiXRwlBI2-ol)u2wnRM_7`7=(w~eVgh#*gaXiSa!z6wia!KKX0MXfR#t}rYSN(ZiB zbHuSFqT$jr2EI;v&}M+GNCj(|S?PRsR?8UxVZdY#GT|iwhEBVpk(3Blyho4s_+nk_ zv=S3(RT62sjHqYU*{y3pu1G?*04E^_&t_?l2|1jAoC4VF*=C;Ix(4K`1mqOJNyvfI zE}d^B0M_zI*0S4#Jj8&U23SF+jkC4H4hQ?7zOLKvq6Xfufj13p0xyM&bAy8iMW2;- zXdWA#J6jIr?Uj$N5wa{A9+y3BUPnWM8v=a13rx=$h}ZkH)7vQn-O8@tY4~HqpWIm2 z@9b}`BkeX(K^qlmj$-twF=qVC8Cv{t#^1H{A<2#Uw%TgWaK~i0Bi5d|3&8#VYB7$(>o89x&xbl z(ppv=W*oWs`{LBO+2PaIEgT*Vf1#o+1w?#be&Xx+C00GSLJJig7KxiV_(Bre%<+zD z%e7}x>y4reNcD{eLLQ|f{L0kQCw>d7P(d^9H$6CdIX%t&3&UY{TfWL1E zYXXUKy5TRv1rtOUjsseHWSJGFS*g_h)(G_tJ_A;Vo({XzI_flV~EOVsimJ1y;ELk+WqRn;T`IW&i55t?K^*9KnW3?DXB@i`7XqxK_lSfc(Y!{Vr%pb z2TGj@WzeO|rB8qAKq2)?F)G&al6|g5Z5`SI>fNaY>6O)@Sg$7FBA zBodgplpg+*aM04d>`Fs(!*Tf1K~Yu8L=yTpNEz>SwDmNjvr z2bUq(j5zRW9c;}~`e?l$^L zjcjIHX8b^DIqe?gzWiNLZ3KIeu85>S^|z&cRjsFzhL4p9xAf80l@JQMZ~48r#Dx0m zRicEm)~>nU(iC4amNINF4y&NCv&YdlUK1lYvs2_w(L#@WUw9|5vt@`@>P@|u$Yka{ zGlkOTSCOt4j7M$bEW!$>E874F$qa)^_6?+C8zpCdVdgnnTO%|$em@nw7 zm&iPY2!)aduhleD$EsaaJzbl~Qth-w$AYMq9(wM1EC}r_?k>v$r?v(o6%Kx|_S2zT zMY&$tSYz{lqEZoit9z{&$H9z-p~1h#N)6TpzOwTc;nNrQTv%czM#~FeQ{*7}=w~9| zG_0Wu<`QVNzLvPi$x&YEr~3{d-MbsKDuIs<3${goop|%F#kgEJT9zbj$h7QRMrhFX zAB%=uNJmimd`3<+5SX0W*!Ro1IB9$AQ-b2K#Wit(ALUSVlkn1`>%_zoBgZCKaGFJa z^U;T!9Zn`-mUd-9XqMxS6`TK`Vx-=E$Q2ET?D*Ci z-}wWORsRf`7YTlGFU~1ybGNeEAH)3@7sz6SOij-esn8T}m{FtcWg2U|w zVXg2OaoFHxy{q4*Ixjvk)@xHF>B(P+5Xa0DPe8iS3}6oB%7S0&5_7XdzUJ)QMZacnBvgnULva?o z`}oJC6d$s6sl36JNA)%5pdtA3>u|ufZb=)S7E@7-x}O%qyk-<7uyZIWAFFPx>c`1d zwo4BPKUnWBPKJ$mQz~IbnO!Q<}+dhzOvyN@dbLV6~TYG4}E&Uv!am8 z&B0bNp5{D@!0h6$UyzAXeE_GuwQxFp_t|&!~^@C?}#@{#nC)%S>mY&hK9L+pU87pC( zS54PGU!o#7g>L>ny7y(fpW3#IX?|3Y46@v-Z`>~`^Aq>s4DQBpnUxunu|6fdgf)}c z(`L!ApRVF+Z&w0?@pn!REI59;yeJxW(KmJ==W*jjYhJcXYTF^M4H>SV{v+l8p2l$* zEf9*IYL&d<6xFCp?!>t7agUH8)nUczUl12+r)$$%doH2!-J-E1bqt{v+)Cfvts1TB zo+Z?5mx2m@J7^0TA@rEOXxQjf)!3o=Q&C7CV!LO6IbGALFYB2>t^bb9zV{9}Y(nkv zOp?%KS_tIipg!PbeNW9Z0*DEKeyACko6eU-s>YekQg$tTGz?O!(nvYDNTB`>jn(fF z6E(9*Qh6X_|FBGJqaAn1^SNiT02&u?@@rp4|EXpnL9LD9=Cz||Ec~XJt#Kn0FcVOptynYi72{L=Oeq7)m~kkq9c$N%Fs=EPCTT}V zR_g6db91{C(hxT|s%-VMpS+G|7a7=S$n9E*>6?EP7in}uCk761+_deh&Mw({j|kF% zJ@#rf>nJj`X1uHR#!GknL7an@rmE+9c1}Cpvle--mC&3Z+ERDY5#DGI<3kq7$Hmn9 zv~W`M71EW0gtf*^a~9iq1Lj=@%&HRAm88G~2&5}tF(+-yLDAfxn+(A{0UPdVr-C=2 z0i?FG8DvDL`X>?tjY1}p2jkn>HIW3y^S)|<-ur$x-{5`WV8ax`Fl8D|{jnIOrZ6=2 zEiuGnbV?QBIG+AXeNXuF~MR-1B5H(=-8M z9?Aei#fce=f(WnW$v}~jRSmy|J)g!F2u`c;zK4O7N?T_%!Ugc_lfZd)N1ab-rLpk) zVm{|iYxK%#z#J13#>5vR&x|zuaY0msi>%Jw&?5tKSsP+b?R4*9v64Ej5e52<-MnAy zZ?2s>`!=eS3j86MM5cON&za5B3G?*x87KTN5*77a)(hDGcFiqEmwTxW|LZ;^>8Brw zn=oead|1V!jNs{p{(MG&V_@?v0mF>~8oNrHzAe1lVOSp?b_qo8M6tDV&hAHEHRV=f zV$}P06!3Q4(-`1O4;4Kn9n}4Zqk!h05CJ~*yUkV>yXS-$W20x+NMA`rc|pn>PDG|B zrScZN@XTy+5xxB>>iFoB5c}qTAT|3=iqX`4nRIci^HH)l_O+8@rj5Vnu|se>eet9i zg>7q69#W($3kRi6E*N&d!}riJ90)Bqfro*-jeh*2I9iQyoF<2YGX^up=_s|I6k}s} zWax-S&;CgI`+ex|b4fH@XVjP@Es71H@7O_cjF83j#!qqO6>_X1i@lMeigT%1plX+H z$vQTl0!$_QIix7M{B;cS)mAApgH=(k%MqZDowA0B2@rz;p+%$=P6B zD2tsD79R)aFS+D7X`I=>>M$hR@`>T16!7r>b|ZG1G?aF4W6;t^0nPIO5y05V-hfFb zj2tk{?U8Km$7Qfk!D489{)o}lj1;B7)J5c8fnydvYZS?!vBW$Olzl& zbF?Z;avts6ikPpfbhsFjpq5GUf!N<%@>kdyv;b_7-`?16i&*5Rvqz{wp8}eE>)W(~ zdshX1UsnkXfBYq`+?99^1cLGmCupveb3wa;oq~4#QkG?iV*69^O5yzHdi};4avq2D z{|U=-Xkfz}4bR4yvEiWhUr8s``5lD}OzLa9-TaVEic8eMCG=_x>~Fdf0UW#%=QTg- zE{cYYCos5RPUqNH4&mg}S&xsac~Ud#DzRUp_F-}Y)enc-Uw;muiihK$knFG^4T_-~ zLpdmG=X0_=4FzVv71X_hXYs4G$^Gmj`3MQ6AVhgbA@%r3SxQG9apdVgK2(UgUXdkT z(2kuj*EoSPz_9csy#pvj($a-s9b5>7T;H2i2tyfo3m%|(;~f(Q+`l&+5P3o!qwqYL z4vqoXS6=|wqOtNq1|%$`gF(adb6iHK{RLT(39fw5X-IW6JbHjximRCyH8yKTD-@vo zzXIU@*2^GkGhbgfzd)~QbQ>7AzXbQ4G6fIQ0k@N8jdG-=n(>@!+9zucS~d?~49>$! zb1>92LJe9hr>ITu;xOArKfQZCe*W4X0C7!_lhi|Xri(ji%A*5F zR$rqXdvFN$_7x6n&c`H$o0B*8r;AgkayutVmr;Z;INA@P|2L5zrrnPebL0253WNG@ zIxqoi7z;-N^*mbjt@JFLpzfsB8Tevw29_y^+mqDGN(Y>2QrBk3#Aw)?HU{0clxO9gWCyY44j7eTsYU}Q9-qoS+|FD7&^snb z$2FhqJcJ|AEOPJjpo#fsCm>hX13|hyPyXQ5I@kAe;Z=9XVXD@9`P7 zg^iSL@qZIw(OAn2u2?K{AQP6-{`LHX!)nk=KNrCxk*P0)(!NapiBv7NIM5i^M2mJV`$gysE%A3(j>D2sUmc&bMZ#nGKlMKy@JU*m&-|moc-~m8+qhvfH|R>Hy6km^y>R^tf?A@)oqw5ZrkIUdM4h~ z4=R-$Ec=D}o{C0jb(E(iq%~-MX0IKUHArKMeI6(HGG;%9**jt_3&EbH^q>AQXmAXC zPP|&17Y&!w?#tv?XwNZOXOfb|rb8%BoVYih5Y=W&wlGnD2dI5K*BX-TdHEvReFXxY zyJP+NrXgqx+*eA~7K~$>GS+gXL34nzujVNj-Df;Za4~W;w!&z%xrx9+F8#TtVcho= z&?|qJ8>}*iaDjv`E$Q z)iWLuYiVKc?WeoTHO^ELPm(B8RZ>%@ELTM6$aS*rp_d(ad{pHwyGPl~BEnXSOvd9U zNLrw>R8bkEqs5kf&_`-+puInl9^UDKOEI_%Q=L5Zx}|F6&t+70+V;2%#O_)u>#;{2 zux+=_t_FVEXgaH}+1UOmyKTqLJwm^AIwpg#zul<&DQubVw8?nxA(ihy`x$=D4U?FR z=dY$IlNmA<$>*9zsO=l|I;Gmbou3Pdv<(}z*j=|Pj)U&m?B*~x`FxqOVl~f)LWVn@ z?5PE<7u*S2Fk%`hrqy`@<1(hjZ)#^0jE1kXS_2m?y;hb)!*2c<|DonRCvCY?e%jyu zHRZs5uxIgZ;yF;rFf<-BReSS5pQkL*dGU7n0$T(sndk_`zWr@E4rU*4bFMr(adeaU zI3gOodC7?n?Y|69*?-zI2FiIcWGbhRYM>mw=DjjAF@0OPc;bt6b`vyM zp6aO$wPSrV?^%E01}IJhifwDy==Uq5_dk5u<~i57Ej;wIKcDH~GavZO2mVEUV6Ef- E0UUB=5dZ)H diff --git a/src/font/sprite/underline.zig b/src/font/sprite/underline.zig index 29bf7b251..cd580d141 100644 --- a/src/font/sprite/underline.zig +++ b/src/font/sprite/underline.zig @@ -70,8 +70,8 @@ fn drawSingle(alloc: Allocator, width: u32, thickness: u32) !CanvasAndOffset { canvas.rect(.{ .x = 0, .y = 0, - .width = width, - .height = thickness, + .width = @floatFromInt(width), + .height = @floatFromInt(thickness), }, .on); const offset_y: i32 = 0; @@ -91,15 +91,15 @@ fn drawDouble(alloc: Allocator, width: u32, thickness: u32) !CanvasAndOffset { canvas.rect(.{ .x = 0, .y = 0, - .width = width, - .height = thickness, + .width = @floatFromInt(width), + .height = @floatFromInt(thickness), }, .on); canvas.rect(.{ .x = 0, - .y = @intCast(thickness + gap), - .width = width, - .height = thickness, + .y = @floatFromInt(thickness * 2), + .width = @floatFromInt(width), + .height = @floatFromInt(thickness), }, .on); const offset_y: i32 = -@as(i32, @intCast(thickness)); @@ -121,10 +121,10 @@ fn drawDotted(alloc: Allocator, width: u32, thickness: u32) !CanvasAndOffset { const x = @min(i * (dot_width + gap_width), width - 1); const rect_width = @min(width - x, dot_width); canvas.rect(.{ - .x = @intCast(x), + .x = @floatFromInt(x), .y = 0, - .width = rect_width, - .height = thickness, + .width = @floatFromInt(rect_width), + .height = @floatFromInt(thickness), }, .on); } @@ -146,10 +146,10 @@ fn drawDashed(alloc: Allocator, width: u32, thickness: u32) !CanvasAndOffset { const x = @min(i * dash_width, width - 1); const rect_width = @min(width - x, dash_width); canvas.rect(.{ - .x = @intCast(x), + .x = @floatFromInt(x), .y = 0, - .width = rect_width, - .height = thickness, + .width = @floatFromInt(rect_width), + .height = @floatFromInt(thickness), }, .on); } diff --git a/vendor/pixman/.editorconfig b/vendor/pixman/.editorconfig deleted file mode 100644 index b98bc65a7..000000000 --- a/vendor/pixman/.editorconfig +++ /dev/null @@ -1,14 +0,0 @@ -# To use this config on you editor, follow the instructions at: -# http://editorconfig.org - -root = true - -[*] -tab_width = 8 - -[Makefile.*] -indent_style = tab - -[meson.build,meson_options.txt] -indent_style = space -indent_size = 2 diff --git a/vendor/pixman/.gitignore b/vendor/pixman/.gitignore deleted file mode 100644 index 046b161bc..000000000 --- a/vendor/pixman/.gitignore +++ /dev/null @@ -1,56 +0,0 @@ -Makefile -Makefile.in -.deps -.libs -.msg -*.pc -*.lo -*.la -*.a -*.o -*~ -aclocal.m4 -autom4te.cache -compile -config.guess -config.log -config.status -config.sub -configure -depcomp -install-sh -libtool -ltmain.sh -missing -stamp-h? -config.h -config.h.in -.*.swp -demos/*-test -demos/checkerboard -demos/clip-in -demos/linear-gradient -demos/quad2quad -demos/scale -demos/dither -pixman/pixman-srgb.c -pixman/pixman-version.h -test/*-test -test/affine-bench -test/alpha-loop -test/alphamap -test/check-formats -test/clip-in -test/composite -test/infinite-loop -test/lowlevel-blt-bench -test/radial-invalid -test/region-translate -test/scaling-bench -test/trap-crasher -*.pdb -*.dll -*.lib -*.ilk -*.obj -*.exe diff --git a/vendor/pixman/.gitlab-ci.yml b/vendor/pixman/.gitlab-ci.yml deleted file mode 100644 index 25c73df12..000000000 --- a/vendor/pixman/.gitlab-ci.yml +++ /dev/null @@ -1,19 +0,0 @@ -image: fedora:28 - -autotools-build: - script: - - dnf -y install dnf-plugins-core - - dnf -y groupinstall buildsys-build - - dnf -y builddep pixman - - ./autogen.sh - - make -sj4 check - -meson-build: - script: - - dnf -y install dnf-plugins-core - - dnf -y groupinstall buildsys-build - - dnf -y builddep pixman - - dnf -y install ninja-build - - python3 -m pip install meson>=0.52.1 - - meson build - - ninja -C build test diff --git a/vendor/pixman/AUTHORS b/vendor/pixman/AUTHORS deleted file mode 100644 index e69de29bb..000000000 diff --git a/vendor/pixman/CODING_STYLE b/vendor/pixman/CODING_STYLE deleted file mode 100644 index 9f5171d10..000000000 --- a/vendor/pixman/CODING_STYLE +++ /dev/null @@ -1,199 +0,0 @@ -Pixman coding style. -==================== - -The pixman coding style is close to cairo's with one exception: braces -go on their own line, rather than on the line of the if/while/for: - - if (condition) - { - do_something(); - do_something_else(); - } - -not - - if (condition) { - do_something(); - do_something_else(); - } - - - -Indentation -=========== - -Each new level is indented four spaces: - - if (condition) - do_something(); - -This may be achieved with space characters or with a combination of -tab characters and space characters. Tab characters are interpreted as - - Advance to the next column which is a multiple of 8. - - -Names -===== - -In all names, words are separated with underscores. Do not use -CamelCase for any names. - -Macros have ALL_CAPITAL_NAMES - -Type names are in lower case and end with "_t". For example -pixman_image_t. - -Labels, functions and variables have lower case names. - - -Braces -====== - -Braces always go on their own line: - - if (condition) - { - do_this (); - do_that (); - } - else - { - do_the_other (); - } - -Rules for braces and substatements of if/while/for/do: - -* If a substatement spans multiple lines, then there must be braces - around it. - -* If the condition of an if/while/for spans multiple lines, then - braces must be used for the substatements. - -* If one substatement of an if statement has braces, then the other - must too. - -* Otherwise, don't add braces. - - -Comments -======== - -For comments either like this: - - /* One line comment */ - -or like this: - - /* This is a multi-line comment - * - * It extends over multiple lines - */ - -Generally comments should say things that aren't clear from the code -itself. If too many comments say obvious things, then people will just -stop reading all comments, including the good ones. - - -Whitespace -========== - -* Put a single space after commas - -* Put spaces around arithmetic operators such a +, -, *, /: - - y * stride + x - - x / unit_x - -* Do not put spaces after the address-of operator, the * when used as - a pointer derefernce or the ! and ~ operators: - - &foo; - - ~0x00000000 - - !condition - - *result = 100 - -* Break up long lines (> ~80 characters) and use whitespace to align - things nicely. This is one way: - - some_very_long_function name ( - implementation, op, src, mask, dest, - src_x, src_y, mask_x, mask_y, dest_x, dest_y, - width, height); - - This is another: - - some_very_long_function_name (implementation, op, - src, mask, dest, - src_x, src_y, - mask_x, mask_y, - dest_x, dest_y, - width, height); - -* Separate logically distinct chunks with a single newline. This - obviously applies between functions, but also applies within a - function or block or structure definition. - -* Use a newline after a block of variable declarations. - -* Use a single space before a left parenthesis, except where the - standard will not allow it, (eg. when defining a parameterized macro). - -* Don't eliminate newlines just because things would still fit on one - line. This breaks the expected visual structure of the code making - it much harder to read and understand: - - if (condition) foo (); else bar (); /* Yuck! */ - - -Function Definitions -==================== - -Function definitions should take the following form: - - void - my_function (int argument) - { - do_my_things (); - } - -If all the parameters to a function fit naturally on one line, format -them that way. Otherwise, put one argument on each line, adding -whitespace so that the parameter names are aligned with each other. - -I.e., do either this: - - void - short_arguments (const char *str, int x, int y, int z) - { - } - -or this: - - void - long_arguments (const char *char_star_arg, - int int_arg, - double *double_star_arg, - double double_arg) - { - } - - -Mode lines -========== - -Given the rules above, what is the best way to simplify one's life as -a code monkey? Get your editor to do most of the tedious work of -beautifying your code! - -As a reward for reading this far, here are some mode lines for the more -popular editors: -/* - * vim:sw=4:sts=4:ts=8:tw=78:fo=tcroq:cindent:cino=\:0,(0 - * vim:isk=a-z,A-Z,48-57,_,.,-,> - */ - diff --git a/vendor/pixman/COPYING b/vendor/pixman/COPYING deleted file mode 100644 index 6168dea56..000000000 --- a/vendor/pixman/COPYING +++ /dev/null @@ -1,42 +0,0 @@ -The following is the MIT license, agreed upon by most contributors. -Copyright holders of new code should use this license statement where -possible. They may also add themselves to the list below. - -/* - * Copyright 1987, 1988, 1989, 1998 The Open Group - * Copyright 1987, 1988, 1989 Digital Equipment Corporation - * Copyright 1999, 2004, 2008 Keith Packard - * Copyright 2000 SuSE, Inc. - * Copyright 2000 Keith Packard, member of The XFree86 Project, Inc. - * Copyright 2004, 2005, 2007, 2008, 2009, 2010 Red Hat, Inc. - * Copyright 2004 Nicholas Miell - * Copyright 2005 Lars Knoll & Zack Rusin, Trolltech - * Copyright 2005 Trolltech AS - * Copyright 2007 Luca Barbato - * Copyright 2008 Aaron Plattner, NVIDIA Corporation - * Copyright 2008 Rodrigo Kumpera - * Copyright 2008 André Tupinambá - * Copyright 2008 Mozilla Corporation - * Copyright 2008 Frederic Plourde - * Copyright 2009, Oracle and/or its affiliates. All rights reserved. - * Copyright 2009, 2010 Nokia Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ diff --git a/vendor/pixman/ChangeLog b/vendor/pixman/ChangeLog deleted file mode 100644 index e69de29bb..000000000 diff --git a/vendor/pixman/INSTALL b/vendor/pixman/INSTALL deleted file mode 100644 index 5458714e1..000000000 --- a/vendor/pixman/INSTALL +++ /dev/null @@ -1,234 +0,0 @@ -Installation Instructions -************************* - -Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, -2006 Free Software Foundation, Inc. - -This file is free documentation; the Free Software Foundation gives -unlimited permission to copy, distribute and modify it. - -Basic Installation -================== - -Briefly, the shell commands `./configure; make; make install' should -configure, build, and install this package. The following -more-detailed instructions are generic; see the `README' file for -instructions specific to this package. - - The `configure' shell script attempts to guess correct values for -various system-dependent variables used during compilation. It uses -those values to create a `Makefile' in each directory of the package. -It may also create one or more `.h' files containing system-dependent -definitions. Finally, it creates a shell script `config.status' that -you can run in the future to recreate the current configuration, and a -file `config.log' containing compiler output (useful mainly for -debugging `configure'). - - It can also use an optional file (typically called `config.cache' -and enabled with `--cache-file=config.cache' or simply `-C') that saves -the results of its tests to speed up reconfiguring. Caching is -disabled by default to prevent problems with accidental use of stale -cache files. - - If you need to do unusual things to compile the package, please try -to figure out how `configure' could check whether to do them, and mail -diffs or instructions to the address given in the `README' so they can -be considered for the next release. If you are using the cache, and at -some point `config.cache' contains results you don't want to keep, you -may remove or edit it. - - The file `configure.ac' (or `configure.in') is used to create -`configure' by a program called `autoconf'. You need `configure.ac' if -you want to change it or regenerate `configure' using a newer version -of `autoconf'. - -The simplest way to compile this package is: - - 1. `cd' to the directory containing the package's source code and type - `./configure' to configure the package for your system. - - Running `configure' might take a while. While running, it prints - some messages telling which features it is checking for. - - 2. Type `make' to compile the package. - - 3. Optionally, type `make check' to run any self-tests that come with - the package. - - 4. Type `make install' to install the programs and any data files and - documentation. - - 5. You can remove the program binaries and object files from the - source code directory by typing `make clean'. To also remove the - files that `configure' created (so you can compile the package for - a different kind of computer), type `make distclean'. There is - also a `make maintainer-clean' target, but that is intended mainly - for the package's developers. If you use it, you may have to get - all sorts of other programs in order to regenerate files that came - with the distribution. - -Compilers and Options -===================== - -Some systems require unusual options for compilation or linking that the -`configure' script does not know about. Run `./configure --help' for -details on some of the pertinent environment variables. - - You can give `configure' initial values for configuration parameters -by setting variables in the command line or in the environment. Here -is an example: - - ./configure CC=c99 CFLAGS=-g LIBS=-lposix - - *Note Defining Variables::, for more details. - -Compiling For Multiple Architectures -==================================== - -You can compile the package for more than one kind of computer at the -same time, by placing the object files for each architecture in their -own directory. To do this, you can use GNU `make'. `cd' to the -directory where you want the object files and executables to go and run -the `configure' script. `configure' automatically checks for the -source code in the directory that `configure' is in and in `..'. - - With a non-GNU `make', it is safer to compile the package for one -architecture at a time in the source code directory. After you have -installed the package for one architecture, use `make distclean' before -reconfiguring for another architecture. - -Installation Names -================== - -By default, `make install' installs the package's commands under -`/usr/local/bin', include files under `/usr/local/include', etc. You -can specify an installation prefix other than `/usr/local' by giving -`configure' the option `--prefix=PREFIX'. - - You can specify separate installation prefixes for -architecture-specific files and architecture-independent files. If you -pass the option `--exec-prefix=PREFIX' to `configure', the package uses -PREFIX as the prefix for installing programs and libraries. -Documentation and other data files still use the regular prefix. - - In addition, if you use an unusual directory layout you can give -options like `--bindir=DIR' to specify different values for particular -kinds of files. Run `configure --help' for a list of the directories -you can set and what kinds of files go in them. - - If the package supports it, you can cause programs to be installed -with an extra prefix or suffix on their names by giving `configure' the -option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. - -Optional Features -================= - -Some packages pay attention to `--enable-FEATURE' options to -`configure', where FEATURE indicates an optional part of the package. -They may also pay attention to `--with-PACKAGE' options, where PACKAGE -is something like `gnu-as' or `x' (for the X Window System). The -`README' should mention any `--enable-' and `--with-' options that the -package recognizes. - - For packages that use the X Window System, `configure' can usually -find the X include and library files automatically, but if it doesn't, -you can use the `configure' options `--x-includes=DIR' and -`--x-libraries=DIR' to specify their locations. - -Specifying the System Type -========================== - -There may be some features `configure' cannot figure out automatically, -but needs to determine by the type of machine the package will run on. -Usually, assuming the package is built to be run on the _same_ -architectures, `configure' can figure that out, but if it prints a -message saying it cannot guess the machine type, give it the -`--build=TYPE' option. TYPE can either be a short name for the system -type, such as `sun4', or a canonical name which has the form: - - CPU-COMPANY-SYSTEM - -where SYSTEM can have one of these forms: - - OS KERNEL-OS - - See the file `config.sub' for the possible values of each field. If -`config.sub' isn't included in this package, then this package doesn't -need to know the machine type. - - If you are _building_ compiler tools for cross-compiling, you should -use the option `--target=TYPE' to select the type of system they will -produce code for. - - If you want to _use_ a cross compiler, that generates code for a -platform different from the build platform, you should specify the -"host" platform (i.e., that on which the generated programs will -eventually be run) with `--host=TYPE'. - -Sharing Defaults -================ - -If you want to set default values for `configure' scripts to share, you -can create a site shell script called `config.site' that gives default -values for variables like `CC', `cache_file', and `prefix'. -`configure' looks for `PREFIX/share/config.site' if it exists, then -`PREFIX/etc/config.site' if it exists. Or, you can set the -`CONFIG_SITE' environment variable to the location of the site script. -A warning: not all `configure' scripts look for a site script. - -Defining Variables -================== - -Variables not defined in a site shell script can be set in the -environment passed to `configure'. However, some packages may run -configure again during the build, and the customized values of these -variables may be lost. In order to avoid this problem, you should set -them in the `configure' command line, using `VAR=value'. For example: - - ./configure CC=/usr/local2/bin/gcc - -causes the specified `gcc' to be used as the C compiler (unless it is -overridden in the site shell script). - -Unfortunately, this technique does not work for `CONFIG_SHELL' due to -an Autoconf bug. Until the bug is fixed you can use this workaround: - - CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash - -`configure' Invocation -====================== - -`configure' recognizes the following options to control how it operates. - -`--help' -`-h' - Print a summary of the options to `configure', and exit. - -`--version' -`-V' - Print the version of Autoconf used to generate the `configure' - script, and exit. - -`--cache-file=FILE' - Enable the cache: use and save the results of the tests in FILE, - traditionally `config.cache'. FILE defaults to `/dev/null' to - disable caching. - -`--config-cache' -`-C' - Alias for `--cache-file=config.cache'. - -`--quiet' -`--silent' -`-q' - Do not print messages saying which checks are being made. To - suppress all normal output, redirect it to `/dev/null' (any error - messages will still be shown). - -`--srcdir=DIR' - Look for the package's source code in directory DIR. Usually - `configure' can determine that directory automatically. - -`configure' also accepts some other, not widely useful, options. Run -`configure --help' for more details. - diff --git a/vendor/pixman/Makefile.am b/vendor/pixman/Makefile.am deleted file mode 100644 index 048fc8d8a..000000000 --- a/vendor/pixman/Makefile.am +++ /dev/null @@ -1,143 +0,0 @@ -SUBDIRS = pixman demos test - -pkgconfigdir=$(libdir)/pkgconfig -pkgconfig_DATA=pixman-1.pc - -$(pkgconfig_DATA): pixman-1.pc.in - -snapshot: - distdir="$(distdir)-`date '+%Y%m%d'`"; \ - test -d "$(srcdir)/.git" && distdir=$$distdir-`cd "$(srcdir)" && git rev-parse HEAD | cut -c 1-6`; \ - $(MAKE) $(AM_MAKEFLAGS) distdir="$$distdir" dist - -GPGKEY=3892336E -USERNAME=$$USER -RELEASE_OR_SNAPSHOT = $$(if test "x$(PIXMAN_VERSION_MINOR)" = "x$$(echo "$(PIXMAN_VERSION_MINOR)/2*2" | bc)" ; then echo release; else echo snapshot; fi) -RELEASE_CAIRO_HOST = $(USERNAME)@cairographics.org -RELEASE_CAIRO_DIR = /srv/cairo.freedesktop.org/www/$(RELEASE_OR_SNAPSHOT)s -RELEASE_CAIRO_URL = https://cairographics.org/$(RELEASE_OR_SNAPSHOT)s -RELEASE_XORG_URL = https://www.x.org/releases/individual/lib -RELEASE_XORG_HOST = $(USERNAME)@xorg.freedesktop.org -RELEASE_XORG_DIR = /srv/xorg.freedesktop.org/archive/individual/lib -RELEASE_ANNOUNCE_LIST = cairo-announce@cairographics.org, xorg-announce@lists.freedesktop.org, pixman@lists.freedesktop.org - -EXTRA_DIST = \ - Makefile.win32 \ - Makefile.win32.common \ - meson.build \ - meson_options.txt \ - neon-test.S \ - a64-neon-test.S \ - arm-simd-test.S \ - $(NULL) - -tar_gz = $(PACKAGE)-$(VERSION).tar.gz -tar_xz = $(PACKAGE)-$(VERSION).tar.xz - -sha512_tgz = $(tar_gz).sha512 -sha256_tgz = $(tar_gz).sha256 - -sha512_txz = $(tar_xz).sha512 -sha256_txz = $(tar_xz).sha256 - -gpg_file = $(sha512_tgz).asc - -$(sha512_tgz): $(tar_gz) - sha512sum $^ > $@ - -$(sha256_tgz): $(tar_gz) - sha256sum $^ > $@ - -$(sha512_txz): $(tar_xz) - sha512sum $^ > $@ - -$(sha256_txz): $(tar_xz) - sha256sum $^ > $@ - -$(gpg_file): $(sha512_tgz) - @echo "Please enter your GPG password to sign the checksum." - gpg --armor --sign $^ - -HASHFILES = $(sha512_tgz) $(sha512_txz) $(sha256_tgz) $(sha256_txz) - -release-verify-newer: - @echo -n "Checking that no $(VERSION) release already exists at $(RELEASE_XORG_HOST)..." - @ssh $(RELEASE_XORG_HOST) test ! -e $(RELEASE_XORG_DIR)/$(tar_gz) \ - || (echo "Ouch." && echo "Found: $(RELEASE_XORG_HOST):$(RELEASE_XORG_DIR)/$(tar_gz)" \ - && echo "Refusing to try to generate a new release of the same name." \ - && false) - @ssh $(RELEASE_CAIRO_HOST) test ! -e $(RELEASE_CAIRO_DIR)/$(tar_gz) \ - || (echo "Ouch." && echo "Found: $(RELEASE_CAIRO_HOST):$(RELEASE_CAIRO_DIR)/$(tar_gz)" \ - && echo "Refusing to try to generate a new release of the same name." \ - && false) - @echo "Good." - -release-remove-old: - $(RM) $(tar_gz) $(tar_xz) $(HASHFILES) $(gpg_file) - -ensure-prev: - @if [ "$(PREV)" = "" ]; then \ - echo "" && \ - echo "You must set the PREV variable on the make command line to" && \ - echo "the last version." && \ - echo "" && \ - echo "For example:" && \ - echo " make PREV=0.7.3" && \ - echo "" && \ - false; \ - fi - -release-check: ensure-prev release-verify-newer release-remove-old distcheck - -release-tag: - git tag -u $(GPGKEY) -m "$(PACKAGE) $(VERSION) release" $(PACKAGE)-$(VERSION) - -release-upload: release-check $(tar_gz) $(tar_xz) $(sha512_tgz) $(sha512_txz) $(sha256_tgz) $(gpg_file) - scp $(tar_gz) $(sha512_tgz) $(gpg_file) $(RELEASE_CAIRO_HOST):$(RELEASE_CAIRO_DIR) - scp $(tar_gz) $(tar_xz) $(RELEASE_XORG_HOST):$(RELEASE_XORG_DIR) - ssh $(RELEASE_CAIRO_HOST) "rm -f $(RELEASE_CAIRO_DIR)/LATEST-$(PACKAGE)-[0-9]* && ln -s $(tar_gz) $(RELEASE_CAIRO_DIR)/LATEST-$(PACKAGE)-$(VERSION)" - -RELEASE_TYPE = $$(if test "x$(PIXMAN_VERSION_MINOR)" = "x$$(echo "$(PIXMAN_VERSION_MINOR)/2*2" | bc)" ; then echo "stable release in the" ; else echo "development snapshot leading up to a stable"; fi) - -release-publish-message: $(HASHFILES) ensure-prev - @echo "Please follow the instructions in RELEASING to push stuff out and" - @echo "send out the announcement mails. Here is the excerpt you need:" - @echo "" - @echo "Lists: $(RELEASE_ANNOUNCE_LIST)" - @echo "Subject: [ANNOUNCE] $(PACKAGE) release $(VERSION) now available" - @echo "============================== CUT HERE ==============================" - @echo "A new $(PACKAGE) release $(VERSION) is now available. This is a $(RELEASE_TYPE)" - @echo "" - @echo "tar.gz:" - @echo " $(RELEASE_CAIRO_URL)/$(tar_gz)" - @echo " $(RELEASE_XORG_URL)/$(tar_gz)" - @echo "" - @echo "tar.xz:" - @echo " $(RELEASE_XORG_URL)/$(tar_xz)" - @echo "" - @echo "Hashes:" - @echo -n " SHA256: " - @cat $(sha256_tgz) - @echo -n " SHA256: " - @cat $(sha256_txz) - @echo -n " SHA512: " - @cat $(sha512_tgz) - @echo -n " SHA512: " - @cat $(sha512_txz) - @echo "" - @echo "GPG signature:" - @echo " $(RELEASE_CAIRO_URL)/$(gpg_file)" - @echo " (signed by`gpg --list-keys $(GPGKEY) | grep uid | cut -b4- | tr -s " "`)" - @echo "" - @echo "Git:" - @echo " https://gitlab.freedesktop.org/pixman/pixman.git" - @echo " tag: $(PACKAGE)-$(VERSION)" - @echo "" - @echo "Log:" - @git log --no-merges "$(PACKAGE)-$(PREV)".."$(PACKAGE)-$(VERSION)" | git shortlog | awk '{ printf "\t"; print ; }' | cut -b1-80 - @echo "============================== CUT HERE ==============================" - @echo "" - -release-publish: release-upload release-tag release-publish-message - -.PHONY: release-upload release-publish release-publish-message release-tag diff --git a/vendor/pixman/Makefile.win32 b/vendor/pixman/Makefile.win32 deleted file mode 100644 index c3ca3bc59..000000000 --- a/vendor/pixman/Makefile.win32 +++ /dev/null @@ -1,25 +0,0 @@ -default: all - -top_srcdir = . -include $(top_srcdir)/Makefile.win32.common - -all: pixman test - -pixman: - @$(MAKE) -C pixman -f Makefile.win32 - -test: - @$(MAKE) -C test -f Makefile.win32 - -clean_r: - @$(MAKE) -C pixman -f Makefile.win32 clean - @$(MAKE) -C test -f Makefile.win32 clean - -check: - @$(MAKE) -C test -f Makefile.win32 check - - -clean: clean_r - - -.PHONY: all pixman test clean check diff --git a/vendor/pixman/Makefile.win32.common b/vendor/pixman/Makefile.win32.common deleted file mode 100644 index 1b2f89487..000000000 --- a/vendor/pixman/Makefile.win32.common +++ /dev/null @@ -1,73 +0,0 @@ -LIBRARY = pixman-1 - -ifeq ($(shell echo ""),) -# POSIX style shell -mkdir_p = mkdir -p $1 -rm = $(RM) $1 -echo = echo "$1" -else -# DOS/Windows style shell -mkdir_p = if not exist $(subst /,\,$1) md $(subst /,\,$1) -echo = echo $1 -rm = del $(subst /,\,$1) -endif - -CC = cl -LD = link -AR = lib -PERL = perl - -ifneq ($(shell echo ""),) -RM = del -endif - -ifeq ($(top_builddir),) -top_builddir = $(top_srcdir) -endif - -CFG_VAR = $(CFG) -ifeq ($(CFG_VAR),) -CFG_VAR = release -endif - -ifeq ($(CFG_VAR),debug) -CFG_CFLAGS = -MDd -Od -Zi -CFG_LDFLAGS = -DEBUG -else -CFG_CFLAGS = -MD -O2 -CFG_LDFLAGS = -endif - -# Package definitions, to be used instead of those provided in config.h -PKG_CFLAGS = -DPACKAGE=$(LIBRARY) -DPACKAGE_VERSION="" -DPACKAGE_BUGREPORT="" - -BASE_CFLAGS = -nologo -I. -I$(top_srcdir) -I$(top_srcdir)/pixman - -PIXMAN_CFLAGS = $(BASE_CFLAGS) $(PKG_CFLAGS) $(CFG_CFLAGS) $(CFLAGS) -PIXMAN_LDFLAGS = -nologo $(CFG_LDFLAGS) $(LDFLAGS) -PIXMAN_ARFLAGS = -nologo $(LDFLAGS) - - -inform: -ifneq ($(CFG),release) -ifneq ($(CFG),debug) -ifneq ($(CFG),) - @echo "Invalid specified configuration option: "$(CFG)"." - @echo - @echo "Possible choices for configuration are 'release' and 'debug'" - @exit 1 -endif - @echo "Using default RELEASE configuration... (use CFG=release or CFG=debug)" -endif -endif - -$(CFG_VAR): - @$(call mkdir_p,$@) - -$(CFG_VAR)/%.obj: %.c $(libpixman_headers) | $(CFG_VAR) - $(CC) -c $(PIXMAN_CFLAGS) -Fo"$@" $< - -clean: inform $(CFG_VAR) - -$(call rm,$(CFG_VAR)/*.exe $(CFG_VAR)/*.ilk $(CFG_VAR)/*.lib $(CFG_VAR)/*.obj $(CFG_VAR)/*.pdb) - -.PHONY: inform clean diff --git a/vendor/pixman/NEWS b/vendor/pixman/NEWS deleted file mode 100644 index e69de29bb..000000000 diff --git a/vendor/pixman/README b/vendor/pixman/README deleted file mode 100644 index 961a8529b..000000000 --- a/vendor/pixman/README +++ /dev/null @@ -1,140 +0,0 @@ -Pixman -====== - -Pixman is a library that provides low-level pixel manipulation -features such as image compositing and trapezoid rasterization. - -Questions should be directed to the pixman mailing list: - - https://lists.freedesktop.org/mailman/listinfo/pixman - -You can also file bugs at - - https://gitlab.freedesktop.org/pixman/pixman/-/issues/new - -or submit improvements in form of a Merge Request via - - https://gitlab.freedesktop.org/pixman/pixman/-/merge_requests - -For real time discussions about pixman, feel free to join the IRC -channels #cairo and #xorg-devel on the FreeNode IRC network. - - -Contributing ------------- - -In order to contribute to pixman, you will need a working knowledge of -the git version control system. For a quick getting started guide, -there is the "Everyday Git With 20 Commands Or So guide" - - https://www.kernel.org/pub/software/scm/git/docs/everyday.html - -from the Git homepage. For more in depth git documentation, see the -resources on the Git community documentation page: - - https://git-scm.com/documentation - -Pixman uses the infrastructure from the freedesktop.org umbrella -project. For instructions about how to use the git service on -freedesktop.org, see: - - https://www.freedesktop.org/wiki/Infrastructure/git/Developers - -The Pixman master repository can be found at: - - https://gitlab.freedesktop.org/pixman/pixman - - -Sending patches ---------------- - -Patches should be submitted in form of Merge Requests via Gitlab. - -You will first need to create a fork of the main pixman repository at - - https://gitlab.freedesktop.org/pixman/pixman - -via the Fork button on the top right. Once that is done you can add your -personal repository as a remote to your local pixman development git checkout: - - git remote add my-gitlab git@gitlab.freedesktop.org:YOURUSERNAME/pixman.git - - git fetch my-gitlab - -Make sure to have added ssh keys to your gitlab profile at - - https://gitlab.freedesktop.org/profile/keys - -Once that is set up, the general workflow for sending patches is to create a -new local branch with your improvements and once it's ready push it to your -personal pixman fork: - - git checkout -b fix-some-bug - ... - git push my-gitlab - -The output of the `git push` command will include a link that allows you to -create a Merge Request against the official pixman repository. - -Whenever you make changes to your branch (add new commits or fix up commits) -you push them back to your personal pixman fork: - - git push -f my-gitlab - -If there is an open Merge Request Gitlab will automatically pick up the -changes from your branch and pixman developers can review them anew. - -In order for your patches to be accepted, please consider the -following guidelines: - - - At each point in the series, pixman should compile and the test - suite should pass. - - The exception here is if you are changing the test suite to - demonstrate a bug. In this case, make one commit that makes the - test suite fail due to the bug, and then another commit that fixes - the bug. - - You can run the test suite with - - make check - - if you built pixman with autotools or - - meson test -C builddir - - if you built pixman with meson. - - It will take around two minutes to run on a modern PC. - - - Follow the coding style described in the CODING_STYLE file - - - For bug fixes, include an update to the test suite to make sure - the bug doesn't reappear. - - - For new features, add tests of the feature to the test - suite. Also, add a program demonstrating the new feature to the - demos/ directory. - - - Write descriptive commit messages. Useful information to include: - - Benchmark results, before and after - - Description of the bug that was fixed - - Detailed rationale for any new API - - Alternative approaches that were rejected (and why they - don't work) - - If review comments were incorporated, a brief version - history describing what those changes were. - - - For big patch series, write an introductory post with an overall - description of the patch series, including benchmarks and - motivation. Each commit message should still be descriptive and - include enough information to understand why this particular commit - was necessary. - -Pixman has high standards for code quality and so almost everybody -should expect to have the first versions of their patches rejected. - -If you think that the reviewers are wrong about something, or that the -guidelines above are wrong, feel free to discuss the issue. The purpose -of the guidelines and code review is to ensure high code quality; it is -not an exercise in compliance. diff --git a/vendor/pixman/RELEASING b/vendor/pixman/RELEASING deleted file mode 100644 index e104bda9f..000000000 --- a/vendor/pixman/RELEASING +++ /dev/null @@ -1,59 +0,0 @@ -Here are the steps to follow to create a new pixman release: - -1) Ensure that there are no uncommitted changes or unpushed commits, - and that you are up to date with the latest commits in the central - repository. Here are a couple of useful commands: - - git diff (no output) - - git status (should report "nothing to commit") - - git log master...origin (no output; note: *3* dots) - -2) Increment pixman_(major|minor|micro) in configure.ac and meson.build - according to the directions in those files. - -3) Make sure that new version works, including - - - make distcheck passes - - - the X server still works with the new pixman version - installed - - - the cairo test suite hasn't gained any new failures compared - to last pixman version. - -4) Use "git commit" to record the changes made in step 2 and 3. - -5) Generate and publish the tar files by running - - make PREV= GPGKEY= release-publish - - If your freedesktop user name is different from your local one, - then also set the variable USER to your freedesktop user name. - -6) Run - - make release-publish-message - - to generate a draft release announcement. Edit it as appropriate and - send it to - - cairo-announce@cairographics.org - - pixman@lists.freedesktop.org - - xorg-announce@lists.freedesktop.org - -7) Increment pixman_micro to the next larger (odd) number in - configure.ac. Commit this change, and push all commits created - during this process using - - git push - git push --tags - - You must use "--tags" here; otherwise the new tag will not - be pushed out. - -8) Change the topic of the #cairo IRC channel on freenode to advertise - the new version. diff --git a/vendor/pixman/a64-neon-test.S b/vendor/pixman/a64-neon-test.S deleted file mode 100644 index 5d4a4eaa9..000000000 --- a/vendor/pixman/a64-neon-test.S +++ /dev/null @@ -1,5 +0,0 @@ -.text -.arch armv8-a -.altmacro -prfm pldl2strm, [x0] -xtn v0.8b, v0.8h diff --git a/vendor/pixman/arm-simd-test.S b/vendor/pixman/arm-simd-test.S deleted file mode 100644 index 910c814d6..000000000 --- a/vendor/pixman/arm-simd-test.S +++ /dev/null @@ -1,10 +0,0 @@ -.text -.arch armv6 -.object_arch armv4 -.arm -.altmacro -#ifndef __ARM_EABI__ -#error EABI is required (to be sure that calling conventions are compatible) -#endif -pld [r0] -uqadd8 r0, r0, r0 diff --git a/vendor/pixman/autogen.sh b/vendor/pixman/autogen.sh deleted file mode 100755 index fc34bd55c..000000000 --- a/vendor/pixman/autogen.sh +++ /dev/null @@ -1,14 +0,0 @@ -#! /bin/sh - -srcdir=`dirname $0` -test -z "$srcdir" && srcdir=. - -ORIGDIR=`pwd` -cd $srcdir - -autoreconf -v --install || exit 1 -cd $ORIGDIR || exit $? - -if test -z "$NOCONFIGURE"; then - $srcdir/configure "$@" -fi diff --git a/vendor/pixman/configure.ac b/vendor/pixman/configure.ac deleted file mode 100644 index b81d89843..000000000 --- a/vendor/pixman/configure.ac +++ /dev/null @@ -1,1199 +0,0 @@ -dnl Copyright 2005 Red Hat, Inc. -dnl -dnl Permission to use, copy, modify, distribute, and sell this software and its -dnl documentation for any purpose is hereby granted without fee, provided that -dnl the above copyright notice appear in all copies and that both that -dnl copyright notice and this permission notice appear in supporting -dnl documentation, and that the name of Red Hat not be used in -dnl advertising or publicity pertaining to distribution of the software without -dnl specific, written prior permission. Red Hat makes no -dnl representations about the suitability of this software for any purpose. It -dnl is provided "as is" without express or implied warranty. -dnl -dnl RED HAT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -dnl INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO -dnl EVENT SHALL RED HAT BE LIABLE FOR ANY SPECIAL, INDIRECT OR -dnl CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, -dnl DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -dnl TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -dnl PERFORMANCE OF THIS SOFTWARE. -dnl -dnl Process this file with autoconf to create configure. - -AC_PREREQ([2.57]) - -# Pixman versioning scheme -# -# - The version in git has an odd MICRO version number -# -# - Released versions, both development and stable, have an -# even MICRO version number -# -# - Released development versions have an odd MINOR number -# -# - Released stable versions have an even MINOR number -# -# - Versions that break ABI must have a new MAJOR number -# -# - If you break the ABI, then at least this must be done: -# -# - increment MAJOR -# -# - In the first development release where you break ABI, find -# all instances of "pixman-n" and change them to pixman-(n+1) -# -# This needs to be done at least in -# configure.ac -# all Makefile.am's -# pixman-n.pc.in -# -# This ensures that binary incompatible versions can be installed -# in parallel. See http://www106.pair.com/rhp/parallel.html for -# more information -# - -m4_define([pixman_major], 0) -m4_define([pixman_minor], 42) -m4_define([pixman_micro], 3) - -m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro]) - -AC_INIT(pixman, pixman_version, [pixman@lists.freedesktop.org], pixman) -AM_INIT_AUTOMAKE([foreign dist-xz]) - -# Suppress verbose compile lines -m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) - -AC_CONFIG_HEADERS(pixman-config.h) - -AC_CANONICAL_HOST - -test_CFLAGS=${CFLAGS+set} # We may override autoconf default CFLAGS. - -AC_PROG_CC -AM_PROG_AS -AC_PROG_LIBTOOL -AC_CHECK_FUNCS([getisax]) -AC_C_BIGENDIAN -AC_C_INLINE - -dnl PIXMAN_LINK_WITH_ENV(env-setup, program, true-action, false-action) -dnl -dnl Compiles and links the given program in the environment setup by env-setup -dnl and executes true-action on success and false-action on failure. -AC_DEFUN([PIXMAN_LINK_WITH_ENV],[dnl - save_CFLAGS="$CFLAGS" - save_LDFLAGS="$LDFLAGS" - save_LIBS="$LIBS" - CFLAGS="" - LDFLAGS="" - LIBS="" - $1 - CFLAGS="$save_CFLAGS $CFLAGS" - LDFLAGS="$save_LDFLAGS $LDFLAGS" - LIBS="$save_LIBS $LIBS" - AC_LINK_IFELSE( - [AC_LANG_SOURCE([$2])], - [pixman_cc_stderr=`test -f conftest.err && cat conftest.err` - pixman_cc_flag=yes], - [pixman_cc_stderr=`test -f conftest.err && cat conftest.err` - pixman_cc_flag=no]) - - if test "x$pixman_cc_stderr" != "x"; then - pixman_cc_flag=no - fi - - if test "x$pixman_cc_flag" = "xyes"; then - ifelse([$3], , :, [$3]) - else - ifelse([$4], , :, [$4]) - fi - CFLAGS="$save_CFLAGS" - LDFLAGS="$save_LDFLAGS" - LIBS="$save_LIBS" -]) - -dnl Find a -Werror for catching warnings. -WERROR= -for w in -Werror -errwarn; do - if test "z$WERROR" = "z"; then - AC_MSG_CHECKING([whether the compiler supports $w]) - PIXMAN_LINK_WITH_ENV( - [CFLAGS=$w], - [int main(int c, char **v) { (void)c; (void)v; return 0; }], - [WERROR=$w; yesno=yes], [yesno=no]) - AC_MSG_RESULT($yesno) - fi -done - -dnl PIXMAN_CHECK_CFLAG(flag, [program]) -dnl Adds flag to CFLAGS if the given program links without warnings or errors. -AC_DEFUN([PIXMAN_CHECK_CFLAG], [dnl - AC_MSG_CHECKING([whether the compiler supports $1]) - PIXMAN_LINK_WITH_ENV( - [CFLAGS="$WERROR $1"], - [$2 - int main(int c, char **v) { (void)c; (void)v; return 0; } - ], - [_yesno=yes], - [_yesno=no]) - if test "x$_yesno" = xyes; then - CFLAGS="$CFLAGS $1" - fi - AC_MSG_RESULT($_yesno) -]) - -AC_CHECK_SIZEOF(long) - -# Checks for Sun Studio compilers -AC_CHECK_DECL([__SUNPRO_C], [SUNCC="yes"], [SUNCC="no"]) -AC_CHECK_DECL([__amd64], [AMD64_ABI="yes"], [AMD64_ABI="no"]) - -# Default CFLAGS to -O -g rather than just the -g from AC_PROG_CC -# if we're using Sun Studio and neither the user nor a config.site -# has set CFLAGS. -if test $SUNCC = yes && \ - test "x$test_CFLAGS" = "x" && \ - test "$CFLAGS" = "-g" -then - CFLAGS="-O -g" -fi - -# -# We ignore pixman_major in the version here because the major version should -# always be encoded in the actual library name. Ie., the soname is: -# -# pixman-$(pixman_major).0.minor.micro -# -m4_define([lt_current], [pixman_minor]) -m4_define([lt_revision], [pixman_micro]) -m4_define([lt_age], [pixman_minor]) - -LT_VERSION_INFO="lt_current:lt_revision:lt_age" - -PIXMAN_VERSION_MAJOR=pixman_major() -AC_SUBST(PIXMAN_VERSION_MAJOR) -PIXMAN_VERSION_MINOR=pixman_minor() -AC_SUBST(PIXMAN_VERSION_MINOR) -PIXMAN_VERSION_MICRO=pixman_micro() -AC_SUBST(PIXMAN_VERSION_MICRO) - -AC_SUBST(LT_VERSION_INFO) - -# Check for dependencies - -PIXMAN_CHECK_CFLAG([-Wall]) -PIXMAN_CHECK_CFLAG([-Wdeclaration-after-statement]) -PIXMAN_CHECK_CFLAG([-Wno-unused-local-typedefs]) -PIXMAN_CHECK_CFLAG([-fno-strict-aliasing]) - -dnl ========================================================================= -dnl OpenMP for the test suite? -dnl - -# Check for OpenMP support only when autoconf support that (require autoconf >=2.62) -OPENMP_CFLAGS= -m4_ifdef([AC_OPENMP], [AC_OPENMP]) - -if test "x$enable_openmp" = "xyes" && test "x$ac_cv_prog_c_openmp" = "xunsupported" ; then - AC_MSG_WARN([OpenMP support requested but found unsupported]) -fi - -dnl May not fail to link without -Wall -Werror added -dnl So try to link only when openmp is supported -dnl ac_cv_prog_c_openmp is not defined when --disable-openmp is used -if test "x$ac_cv_prog_c_openmp" != "xunsupported" && test "x$ac_cv_prog_c_openmp" != "x"; then - m4_define([openmp_test_program],[dnl - #include - - extern unsigned int lcg_seed; - #pragma omp threadprivate(lcg_seed) - unsigned int lcg_seed; - - unsigned function(unsigned a, unsigned b) - { - lcg_seed ^= b; - return ((a + b) ^ a ) + lcg_seed; - } - - int main(int argc, char **argv) - { - int i; - int n1 = 0, n2 = argc; - unsigned checksum = 0; - int verbose = argv != NULL; - unsigned (*test_function)(unsigned, unsigned); - test_function = function; - #pragma omp parallel for reduction(+:checksum) default(none) \ - shared(n1, n2, test_function, verbose) - for (i = n1; i < n2; i++) - { - unsigned crc = test_function (i, 0); - if (verbose) - printf ("%d: %08X\n", i, crc); - checksum += crc; - } - printf("%u\n", checksum); - return 0; - } - ]) - - PIXMAN_LINK_WITH_ENV( - [CFLAGS="$OPENMP_CFLAGS" LDFLAGS="$OPENMP_CFLAGS"], - [openmp_test_program], - [have_openmp=yes], - [have_openmp=no]) - if test "x$have_openmp" = "xyes" ; then - AC_DEFINE(USE_OPENMP, 1, [use OpenMP in the test suite]) - fi -fi -AC_SUBST(OPENMP_CFLAGS) - -dnl ========================================================================= -dnl -fvisibility stuff - -PIXMAN_CHECK_CFLAG([-fvisibility=hidden], [dnl -#if defined(__GNUC__) && (__GNUC__ >= 4) -#ifdef _WIN32 -#error Have -fvisibility but it is ignored and generates a warning -#endif -#else -#error Need GCC 4.0 for visibility -#endif -]) - -PIXMAN_CHECK_CFLAG([-xldscope=hidden], [dnl -#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550) -#else -#error Need Sun Studio 8 for visibility -#endif -]) - -dnl =========================================================================== -dnl Check for Loongson Multimedia Instructions - -if test "x$LS_CFLAGS" = "x" ; then - LS_CFLAGS="-mloongson-mmi" -fi - -have_loongson_mmi=no -AC_MSG_CHECKING(whether to use Loongson MMI assembler) - -xserver_save_CFLAGS=$CFLAGS -CFLAGS=" $LS_CFLAGS $CFLAGS -I$srcdir" -AC_LINK_IFELSE([AC_LANG_SOURCE([[ -#ifndef __mips_loongson_vector_rev -#error "Loongson Multimedia Instructions are only available on Loongson" -#endif -#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 4)) -#error "Need GCC >= 4.4 for Loongson MMI compilation" -#endif -#include "pixman/loongson-mmintrin.h" -int main () { - union { - __m64 v; - char c[8]; - } a = { .c = {1, 2, 3, 4, 5, 6, 7, 8} }; - int b = 4; - __m64 c = _mm_srli_pi16 (a.v, b); - return 0; -}]])], have_loongson_mmi=yes) -CFLAGS=$xserver_save_CFLAGS - -AC_ARG_ENABLE(loongson-mmi, - [AC_HELP_STRING([--disable-loongson-mmi], - [disable Loongson MMI fast paths])], - [enable_loongson_mmi=$enableval], [enable_loongson_mmi=auto]) - -if test $enable_loongson_mmi = no ; then - have_loongson_mmi=disabled -fi - -if test $have_loongson_mmi = yes ; then - AC_DEFINE(USE_LOONGSON_MMI, 1, [use Loongson Multimedia Instructions]) -else - LS_CFLAGS= -fi - -AC_MSG_RESULT($have_loongson_mmi) -if test $enable_loongson_mmi = yes && test $have_loongson_mmi = no ; then - AC_MSG_ERROR([Loongson MMI not detected]) -fi - -AM_CONDITIONAL(USE_LOONGSON_MMI, test $have_loongson_mmi = yes) - -dnl =========================================================================== -dnl Check for MMX - -if test "x$MMX_CFLAGS" = "x" ; then - if test "x$SUNCC" = "xyes"; then - # Sun Studio doesn't have an -xarch=mmx flag, so we have to use sse - # but if we're building 64-bit, mmx & sse support is on by default and - # -xarch=sse throws an error instead - if test "$AMD64_ABI" = "no" ; then - MMX_CFLAGS="-xarch=sse" - fi - else - MMX_CFLAGS="-mmmx -Winline" - fi -fi - -have_mmx_intrinsics=no -AC_MSG_CHECKING(whether to use MMX intrinsics) -xserver_save_CFLAGS=$CFLAGS -CFLAGS="$MMX_CFLAGS $CFLAGS" -AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ -#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4)) -#error "Need GCC >= 3.4 for MMX intrinsics" -#endif -#include -#include - -/* Check support for block expressions */ -#define _mm_shuffle_pi16(A, N) \ - ({ \ - __m64 ret; \ - \ - /* Some versions of clang will choke on K */ \ - asm ("pshufw %2, %1, %0\n\t" \ - : "=y" (ret) \ - : "y" (A), "K" ((const int8_t)N) \ - ); \ - \ - ret; \ - }) - -int main () { - __m64 v = _mm_cvtsi32_si64 (1); - __m64 w; - - w = _mm_shuffle_pi16(v, 5); - - /* Some versions of clang will choke on this */ - asm ("pmulhuw %1, %0\n\t" - : "+y" (w) - : "y" (v) - ); - - return _mm_cvtsi64_si32 (v); -}]])], have_mmx_intrinsics=yes) -CFLAGS=$xserver_save_CFLAGS - -AC_ARG_ENABLE(mmx, - [AC_HELP_STRING([--disable-mmx], - [disable x86 MMX fast paths])], - [enable_mmx=$enableval], [enable_mmx=auto]) - -if test $enable_mmx = no ; then - have_mmx_intrinsics=disabled -fi - -if test $have_mmx_intrinsics = yes ; then - AC_DEFINE(USE_X86_MMX, 1, [use x86 MMX compiler intrinsics]) -else - MMX_CFLAGS= -fi - -AC_MSG_RESULT($have_mmx_intrinsics) -if test $enable_mmx = yes && test $have_mmx_intrinsics = no ; then - AC_MSG_ERROR([x86 MMX intrinsics not detected]) -fi - -AM_CONDITIONAL(USE_X86_MMX, test $have_mmx_intrinsics = yes) - -dnl =========================================================================== -dnl Check for SSE2 - -if test "x$SSE2_CFLAGS" = "x" ; then - if test "x$SUNCC" = "xyes"; then - # SSE2 is enabled by default in the Sun Studio 64-bit environment - if test "$AMD64_ABI" = "no" ; then - SSE2_CFLAGS="-xarch=sse2" - fi - else - SSE2_CFLAGS="-msse2 -Winline" - fi -fi - -have_sse2_intrinsics=no -AC_MSG_CHECKING(whether to use SSE2 intrinsics) -xserver_save_CFLAGS=$CFLAGS -CFLAGS="$SSE2_CFLAGS $CFLAGS" - -AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ -#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2)) -# if !defined(__amd64__) && !defined(__x86_64__) -# error "Need GCC >= 4.2 for SSE2 intrinsics on x86" -# endif -#endif -#include -#include -#include -int param; -int main () { - __m128i a = _mm_set1_epi32 (param), b = _mm_set1_epi32 (param + 1), c; - c = _mm_xor_si128 (a, b); - return _mm_cvtsi128_si32(c); -}]])], have_sse2_intrinsics=yes) -CFLAGS=$xserver_save_CFLAGS - -AC_ARG_ENABLE(sse2, - [AC_HELP_STRING([--disable-sse2], - [disable SSE2 fast paths])], - [enable_sse2=$enableval], [enable_sse2=auto]) - -if test $enable_sse2 = no ; then - have_sse2_intrinsics=disabled -fi - -if test $have_sse2_intrinsics = yes ; then - AC_DEFINE(USE_SSE2, 1, [use SSE2 compiler intrinsics]) -fi - -AC_MSG_RESULT($have_sse2_intrinsics) -if test $enable_sse2 = yes && test $have_sse2_intrinsics = no ; then - AC_MSG_ERROR([SSE2 intrinsics not detected]) -fi - -AM_CONDITIONAL(USE_SSE2, test $have_sse2_intrinsics = yes) - -dnl =========================================================================== -dnl Check for SSSE3 - -if test "x$SSSE3_CFLAGS" = "x" ; then - SSSE3_CFLAGS="-mssse3 -Winline" -fi - -have_ssse3_intrinsics=no -AC_MSG_CHECKING(whether to use SSSE3 intrinsics) -xserver_save_CFLAGS=$CFLAGS -CFLAGS="$SSSE3_CFLAGS $CFLAGS" - -AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ -#include -#include -#include -#include -int param; -int main () { - __m128i a = _mm_set1_epi32 (param), b = _mm_set1_epi32 (param + 1), c; - c = _mm_maddubs_epi16 (a, b); - return _mm_cvtsi128_si32(c); -}]])], have_ssse3_intrinsics=yes) -CFLAGS=$xserver_save_CFLAGS - -AC_ARG_ENABLE(ssse3, - [AC_HELP_STRING([--disable-ssse3], - [disable SSSE3 fast paths])], - [enable_ssse3=$enableval], [enable_ssse3=auto]) - -if test $enable_ssse3 = no ; then - have_ssse3_intrinsics=disabled -fi - -if test $have_ssse3_intrinsics = yes ; then - AC_DEFINE(USE_SSSE3, 1, [use SSSE3 compiler intrinsics]) -fi - -AC_MSG_RESULT($have_ssse3_intrinsics) -if test $enable_ssse3 = yes && test $have_ssse3_intrinsics = no ; then - AC_MSG_ERROR([SSSE3 intrinsics not detected]) -fi - -AM_CONDITIONAL(USE_SSSE3, test $have_ssse3_intrinsics = yes) - -dnl =========================================================================== -dnl Other special flags needed when building code using x86 ISA extensions -case $host_os in - solaris*) - # When building Solaris binaries, apply a mapfile to ensure that the - # binaries aren't flagged as only able to run on MMX/SSE/SSSE3 capable - # CPUs since they check at runtime before using those instructions. - # Not all linkers grok the mapfile format so we check for that first. - if test "$host_cpu" = "i386" -o "$host_cpu" = "x86_64"; then - use_hwcap_mapfile=no - AC_MSG_CHECKING(whether to use a hardware capability map file) - hwcap_save_LDFLAGS="$LDFLAGS" - HWCAP_LDFLAGS='-Wl,-M,$(srcdir)/solaris-hwcap.mapfile' - LDFLAGS="$LDFLAGS -Wl,-M,${srcdir}/pixman/solaris-hwcap.mapfile" - AC_LINK_IFELSE([AC_LANG_SOURCE([[int main() { return 0; }]])], - use_hwcap_mapfile=yes, - HWCAP_LDFLAGS="") - LDFLAGS="$hwcap_save_LDFLAGS" - AC_MSG_RESULT($use_hwcap_mapfile) - fi - if test "x$MMX_LDFLAGS" = "x" ; then - MMX_LDFLAGS="$HWCAP_LDFLAGS" - fi - if test "x$SSE2_LDFLAGS" = "x" ; then - SSE2_LDFLAGS="$HWCAP_LDFLAGS" - fi - if test "x$SSSE3_LDFLAGS" = "x" ; then - SSSE3_LDFLAGS="$HWCAP_LDFLAGS" - fi - ;; -esac - -AC_SUBST(LS_CFLAGS) -AC_SUBST(IWMMXT_CFLAGS) -AC_SUBST(MMX_CFLAGS) -AC_SUBST(MMX_LDFLAGS) -AC_SUBST(SSE2_CFLAGS) -AC_SUBST(SSE2_LDFLAGS) -AC_SUBST(SSSE3_CFLAGS) -AC_SUBST(SSSE3_LDFLAGS) - -dnl =========================================================================== -dnl Check for VMX/Altivec -if test -n "`$CC -v 2>&1 | grep version | grep Apple`"; then - VMX_CFLAGS="-faltivec" -else - VMX_CFLAGS="-maltivec -mabi=altivec" -fi - -have_vmx_intrinsics=no -AC_MSG_CHECKING(whether to use VMX/Altivec intrinsics) -xserver_save_CFLAGS=$CFLAGS -CFLAGS="$VMX_CFLAGS $CFLAGS" -AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ -#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4)) -#error "Need GCC >= 3.4 for sane altivec support" -#endif -#include -int main () { - vector unsigned int v = vec_splat_u32 (1); - v = vec_sub (v, v); - return 0; -}]])], have_vmx_intrinsics=yes) -CFLAGS=$xserver_save_CFLAGS - -AC_ARG_ENABLE(vmx, - [AC_HELP_STRING([--disable-vmx], - [disable VMX fast paths])], - [enable_vmx=$enableval], [enable_vmx=auto]) - -if test $enable_vmx = no ; then - have_vmx_intrinsics=disabled -fi - -if test $have_vmx_intrinsics = yes ; then - AC_DEFINE(USE_VMX, 1, [use VMX compiler intrinsics]) -else - VMX_CFLAGS= -fi - -AC_MSG_RESULT($have_vmx_intrinsics) -if test $enable_vmx = yes && test $have_vmx_intrinsics = no ; then - AC_MSG_ERROR([VMX intrinsics not detected]) -fi - -AC_SUBST(VMX_CFLAGS) - -AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes) - -dnl ========================================================================== -dnl Check if assembler is gas compatible and supports ARM SIMD instructions -have_arm_simd=no -AC_MSG_CHECKING(whether to use ARM SIMD assembler) -xserver_save_CFLAGS=$CFLAGS -CFLAGS="-x assembler-with-cpp $CFLAGS" -AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ -.text -.arch armv6 -.object_arch armv4 -.arm -.altmacro -#ifndef __ARM_EABI__ -#error EABI is required (to be sure that calling conventions are compatible) -#endif -pld [r0] -uqadd8 r0, r0, r0]])], have_arm_simd=yes) -CFLAGS=$xserver_save_CFLAGS - -AC_ARG_ENABLE(arm-simd, - [AC_HELP_STRING([--disable-arm-simd], - [disable ARM SIMD fast paths])], - [enable_arm_simd=$enableval], [enable_arm_simd=auto]) - -if test $enable_arm_simd = no ; then - have_arm_simd=disabled -fi - -if test $have_arm_simd = yes ; then - AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD assembly optimizations]) -fi - -AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes) - -AC_MSG_RESULT($have_arm_simd) -if test $enable_arm_simd = yes && test $have_arm_simd = no ; then - AC_MSG_ERROR([ARM SIMD intrinsics not detected]) -fi - -dnl ========================================================================== -dnl Check if assembler is gas compatible and supports NEON instructions -have_arm_neon=no -AC_MSG_CHECKING(whether to use ARM NEON assembler) -xserver_save_CFLAGS=$CFLAGS -CFLAGS="-x assembler-with-cpp $CFLAGS" -AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ -.text -.fpu neon -.arch armv7a -.object_arch armv4 -.eabi_attribute 10, 0 -.arm -.altmacro -#ifndef __ARM_EABI__ -#error EABI is required (to be sure that calling conventions are compatible) -#endif -pld [r0] -vmovn.u16 d0, q0]])], have_arm_neon=yes) -CFLAGS=$xserver_save_CFLAGS - -AC_ARG_ENABLE(arm-neon, - [AC_HELP_STRING([--disable-arm-neon], - [disable ARM NEON fast paths])], - [enable_arm_neon=$enableval], [enable_arm_neon=auto]) - -if test $enable_arm_neon = no ; then - have_arm_neon=disabled -fi - -if test $have_arm_neon = yes ; then - AC_DEFINE(USE_ARM_NEON, 1, [use ARM NEON assembly optimizations]) -fi - -AM_CONDITIONAL(USE_ARM_NEON, test $have_arm_neon = yes) - -AC_MSG_RESULT($have_arm_neon) -if test $enable_arm_neon = yes && test $have_arm_neon = no ; then - AC_MSG_ERROR([ARM NEON intrinsics not detected]) -fi - -dnl ========================================================================== -dnl Check if assembler is gas compatible and supports ARM-a64 NEON instructions -have_arm_a64_neon=no -AC_MSG_CHECKING(whether to use ARM A64 NEON assembler) -xserver_save_CFLAGS=$CFLAGS -CFLAGS="-x assembler-with-cpp $CFLAGS" -AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ -.text -.arch armv8-a -.altmacro -prfm pldl2strm, [x0] -xtn v0.8b, v0.8h]])], have_arm_a64_neon=yes) -CFLAGS=$xserver_save_CFLAGS - -AC_ARG_ENABLE(arm-a64-neon, - [AC_HELP_STRING([--disable-arm-a64-neon], - [disable ARM A64 NEON fast paths])], - [enable_arm_a64_neon=$enableval], [enable_arm_a64_neon=auto]) - -if test $enable_arm_a64_neon = no ; then - have_arm_a64_neon=disabled -fi - -if test $have_arm_a64_neon = yes ; then - AC_DEFINE(USE_ARM_A64_NEON, 1, [use ARM A64_NEON assembly optimizations]) -fi - -AM_CONDITIONAL(USE_ARM_A64_NEON, test $have_arm_a64_neon = yes) - -AC_MSG_RESULT($have_arm_a64_neon) -if test $enable_arm_a64_neon = yes && test $have_arm_a64_neon4 = no ; then - AC_MSG_ERROR([ARM A64 NEON intrinsics not detected]) -fi - -dnl =========================================================================== -dnl Check for IWMMXT - -AC_ARG_ENABLE(arm-iwmmxt, - [AC_HELP_STRING([--disable-arm-iwmmxt], - [disable ARM IWMMXT fast paths])], - [enable_iwmmxt=$enableval], [enable_iwmmxt=auto]) - -AC_ARG_ENABLE(arm-iwmmxt2, - [AC_HELP_STRING([--disable-arm-iwmmxt2], - [build ARM IWMMXT fast paths with -march=iwmmxt instead of -march=iwmmxt2])], - [enable_iwmmxt2=$enableval], [enable_iwmmxt2=auto]) - -if test "x$IWMMXT_CFLAGS" = "x" ; then - IWMMXT_CFLAGS="-flax-vector-conversions -Winline -march=iwmmxt" - if test $enable_iwmmxt2 != no ; then - IWMMXT_CFLAGS="${IWMMXT_CFLAGS}2" - fi -fi - -have_iwmmxt_intrinsics=no -AC_MSG_CHECKING(whether to use ARM IWMMXT intrinsics) -xserver_save_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS $IWMMXT_CFLAGS" -AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ -#ifndef __arm__ -#error "IWMMXT is only available on ARM" -#endif -#ifndef __IWMMXT__ -#error "IWMMXT not enabled (with -march=iwmmxt)" -#endif -#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)) -#error "Need GCC >= 4.8 for IWMMXT intrinsics" -#endif -#include -int main () { - union { - __m64 v; - char c[8]; - } a = { .c = {1, 2, 3, 4, 5, 6, 7, 8} }; - int b = 4; - __m64 c = _mm_srli_si64 (a.v, b); -}]])], have_iwmmxt_intrinsics=yes) -CFLAGS=$xserver_save_CFLAGS - -if test $enable_iwmmxt = no ; then - have_iwmmxt_intrinsics=disabled -fi - -if test $have_iwmmxt_intrinsics = yes ; then - AC_DEFINE(USE_ARM_IWMMXT, 1, [use ARM IWMMXT compiler intrinsics]) -else - IWMMXT_CFLAGS= -fi - -AC_MSG_RESULT($have_iwmmxt_intrinsics) -if test $enable_iwmmxt = yes && test $have_iwmmxt_intrinsics = no ; then - AC_MSG_ERROR([IWMMXT intrinsics not detected]) -fi - -AM_CONDITIONAL(USE_ARM_IWMMXT, test $have_iwmmxt_intrinsics = yes) - -dnl ========================================================================== -dnl Check if assembler is gas compatible and supports MIPS DSPr2 instructions - -have_mips_dspr2=no -AC_MSG_CHECKING(whether to use MIPS DSPr2 assembler) -xserver_save_CFLAGS=$CFLAGS -CFLAGS="-mdspr2 $CFLAGS" - -AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ -#if !(defined(__mips__) && __mips_isa_rev >= 2) -#error MIPS DSPr2 is currently only available on MIPS32r2 platforms. -#endif -int -main () -{ - int c = 0, a = 0, b = 0; - __asm__ __volatile__ ( - "precr.qb.ph %[c], %[a], %[b] \n\t" - : [c] "=r" (c) - : [a] "r" (a), [b] "r" (b) - ); - return c; -}]])], have_mips_dspr2=yes) -CFLAGS=$xserver_save_CFLAGS - -AC_ARG_ENABLE(mips-dspr2, - [AC_HELP_STRING([--disable-mips-dspr2], - [disable MIPS DSPr2 fast paths])], - [enable_mips_dspr2=$enableval], [enable_mips_dspr2=auto]) - -if test $enable_mips_dspr2 = no ; then - have_mips_dspr2=disabled -fi - -if test $have_mips_dspr2 = yes ; then - AC_DEFINE(USE_MIPS_DSPR2, 1, [use MIPS DSPr2 assembly optimizations]) -fi - -AM_CONDITIONAL(USE_MIPS_DSPR2, test $have_mips_dspr2 = yes) - -AC_MSG_RESULT($have_mips_dspr2) -if test $enable_mips_dspr2 = yes && test $have_mips_dspr2 = no ; then - AC_MSG_ERROR([MIPS DSPr2 instructions not detected]) -fi - -dnl ========================================================================================= -dnl Check for GNU-style inline assembly support - -have_gcc_inline_asm=no -AC_MSG_CHECKING(whether to use GNU-style inline assembler) -AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ -int main () { - /* Most modern architectures have a NOP instruction, so this is a fairly generic test. */ - asm volatile ( "\tnop\n" : : : "cc", "memory" ); - return 0; -}]])], have_gcc_inline_asm=yes) - -AC_ARG_ENABLE(gcc-inline-asm, - [AC_HELP_STRING([--disable-gcc-inline-asm], - [disable GNU-style inline assembler])], - [enable_gcc_inline_asm=$enableval], [enable_gcc_inline_asm=auto]) - -if test $enable_gcc_inline_asm = no ; then - have_gcc_inline_asm=disabled -fi - -if test $have_gcc_inline_asm = yes ; then - AC_DEFINE(USE_GCC_INLINE_ASM, 1, [use GNU-style inline assembler]) -fi - -AC_MSG_RESULT($have_gcc_inline_asm) -if test $enable_gcc_inline_asm = yes && test $have_gcc_inline_asm = no ; then - AC_MSG_ERROR([GNU-style inline assembler not detected]) -fi - -AM_CONDITIONAL(USE_GCC_INLINE_ASM, test $have_gcc_inline_asm = yes) - -dnl ============================================== -dnl Static test programs - -AC_ARG_ENABLE(static-testprogs, - [AC_HELP_STRING([--enable-static-testprogs], - [build test programs as static binaries [default=no]])], - [enable_static_testprogs=$enableval], [enable_static_testprogs=no]) - -TESTPROGS_EXTRA_LDFLAGS= -if test "x$enable_static_testprogs" = "xyes" ; then - TESTPROGS_EXTRA_LDFLAGS="-all-static" -fi -AC_SUBST(TESTPROGS_EXTRA_LDFLAGS) - -dnl ============================================== -dnl Timers - -AC_ARG_ENABLE(timers, - [AC_HELP_STRING([--enable-timers], - [enable TIMER_BEGIN and TIMER_END macros [default=no]])], - [enable_timers=$enableval], [enable_timers=no]) - -if test $enable_timers = yes ; then - AC_DEFINE(PIXMAN_TIMERS, 1, [enable TIMER_BEGIN/TIMER_END macros]) -fi -AC_SUBST(PIXMAN_TIMERS) - -dnl =================================== -dnl gnuplot - -AC_ARG_ENABLE(gnuplot, - [AC_HELP_STRING([--enable-gnuplot], - [enable output of filters that can be piped to gnuplot [default=no]])], - [enable_gnuplot=$enableval], [enable_gnuplot=no]) - -if test $enable_gnuplot = yes ; then - AC_DEFINE(PIXMAN_GNUPLOT, 1, [enable output that can be piped to gnuplot]) -fi -AC_SUBST(PIXMAN_GNUPLOT) - -dnl =================================== -dnl GTK+ - -AC_ARG_ENABLE(gtk, - [AC_HELP_STRING([--enable-gtk], - [enable tests using GTK+ [default=auto]])], - [enable_gtk=$enableval], [enable_gtk=auto]) - -PKG_PROG_PKG_CONFIG - -if test $enable_gtk = yes ; then - AC_CHECK_LIB([pixman-1], [pixman_version_string]) - PKG_CHECK_MODULES(GTK, [gtk+-3.0 pixman-1]) -fi - -if test $enable_gtk = auto ; then - AC_CHECK_LIB([pixman-1], [pixman_version_string], [enable_gtk=auto], [enable_gtk=no]) -fi - -if test $enable_gtk = auto ; then - PKG_CHECK_MODULES(GTK, [gtk+-3.0 pixman-1], [enable_gtk=yes], [enable_gtk=no]) -fi - -AM_CONDITIONAL(HAVE_GTK, [test "x$enable_gtk" = xyes]) - -AC_SUBST(GTK_CFLAGS) -AC_SUBST(GTK_LIBS) - -dnl ===================================== -dnl posix_memalign, sigaction, alarm, gettimeofday - -AC_CHECK_FUNC(posix_memalign, have_posix_memalign=yes, have_posix_memalign=no) -if test x$have_posix_memalign = xyes; then - AC_DEFINE(HAVE_POSIX_MEMALIGN, 1, [Whether we have posix_memalign()]) -fi - -AC_CHECK_FUNC(sigaction, have_sigaction=yes, have_sigaction=no) -if test x$have_sigaction = xyes; then - AC_DEFINE(HAVE_SIGACTION, 1, [Whether we have sigaction()]) -fi - -AC_CHECK_FUNC(alarm, have_alarm=yes, have_alarm=no) -if test x$have_alarm = xyes; then - AC_DEFINE(HAVE_ALARM, 1, [Whether we have alarm()]) -fi - -AC_CHECK_HEADER([sys/mman.h], - [AC_DEFINE(HAVE_SYS_MMAN_H, [1], [Define to 1 if we have ])]) - -AC_CHECK_FUNC(mmap, have_mmap=yes, have_mmap=no) -if test x$have_mmap = xyes; then - AC_DEFINE(HAVE_MMAP, 1, [Whether we have mmap()]) -fi - -AC_CHECK_FUNC(mprotect, have_mprotect=yes, have_mprotect=no) -if test x$have_mprotect = xyes; then - AC_DEFINE(HAVE_MPROTECT, 1, [Whether we have mprotect()]) -fi - -AC_CHECK_FUNC(getpagesize, have_getpagesize=yes, have_getpagesize=no) -if test x$have_getpagesize = xyes; then - AC_DEFINE(HAVE_GETPAGESIZE, 1, [Whether we have getpagesize()]) -fi - -AC_CHECK_HEADER([fenv.h], - [AC_DEFINE(HAVE_FENV_H, [1], [Define to 1 if we have ])]) - -AC_CHECK_LIB(m, feenableexcept, have_feenableexcept=yes, have_feenableexcept=no) -if test x$have_feenableexcept = xyes; then - AC_DEFINE(HAVE_FEENABLEEXCEPT, 1, [Whether we have feenableexcept()]) -fi - -AC_CHECK_DECL([FE_DIVBYZERO], - [AC_DEFINE(HAVE_FEDIVBYZERO, 1, [Whether we have FE_DIVBYZERO])], - [], - [[#include ]]) - -AC_CHECK_FUNC(gettimeofday, have_gettimeofday=yes, have_gettimeofday=no) -AC_CHECK_HEADER(sys/time.h, have_sys_time_h=yes, have_sys_time_h=no) -if test x$have_gettimeofday = xyes && test x$have_sys_time_h = xyes; then - AC_DEFINE(HAVE_GETTIMEOFDAY, 1, [Whether we have gettimeofday()]) -fi - -dnl ===================================== -dnl Check for missing sqrtf() as, e.g., for Solaris 9 - -AC_SEARCH_LIBS([sqrtf], [m], [], - [AC_DEFINE([sqrtf], [sqrt], - [Define to sqrt if you do not have the `sqrtf' function.])]) - -dnl ===================================== -dnl Thread local storage - -AC_MSG_CHECKING(for thread local storage (TLS) support) -AC_CACHE_VAL(ac_cv_tls, [ - ac_cv_tls=none - keywords="__thread __declspec(thread)" - for kw in $keywords ; do - AC_TRY_COMPILE([ -#if defined(__MINGW32__) && !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) -#error This MinGW version has broken __thread support -#endif -#ifdef __OpenBSD__ -#error OpenBSD has broken __thread support -#endif - -int $kw test;], [], [ac_cv_tls=$kw; break]) - done -]) -AC_MSG_RESULT($ac_cv_tls) - -if test "$ac_cv_tls" != "none"; then - AC_DEFINE_UNQUOTED([TLS], $ac_cv_tls, [The compiler supported TLS storage class]) -fi - -dnl -dnl posix tls -dnl - -m4_define([pthread_test_program],AC_LANG_SOURCE([[dnl -#include -#include - -static pthread_once_t once_control = PTHREAD_ONCE_INIT; -static pthread_key_t key; - -static void -make_key (void) -{ - pthread_key_create (&key, NULL); -} - -int -main () -{ - void *value = NULL; - - if (pthread_once (&once_control, make_key) != 0) - { - value = NULL; - } - else - { - value = pthread_getspecific (key); - if (!value) - { - value = malloc (100); - pthread_setspecific (key, value); - } - } - return 0; -} -]])) - -AC_DEFUN([PIXMAN_CHECK_PTHREAD],[dnl - if test "z$support_for_pthreads" != "zyes"; then - PIXMAN_LINK_WITH_ENV( - [$1], [pthread_test_program], - [PTHREAD_CFLAGS="$CFLAGS" - PTHREAD_LIBS="$LIBS" - PTHREAD_LDFLAGS="$LDFLAGS" - support_for_pthreads=yes]) - fi -]) - -support_for_pthreads=no - -AC_MSG_CHECKING(for pthreads) - -PIXMAN_CHECK_PTHREAD([CFLAGS="-pthread"; LDFLAGS="-pthread"]) -PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LIBS="-lpthread"]) -PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LDFLAGS="-lroot"]) - -if test $support_for_pthreads = yes; then - AC_DEFINE([HAVE_PTHREADS], [], [Whether pthreads is supported]) - if test $ac_cv_tls = none ; then - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - fi -fi - -AC_MSG_RESULT($support_for_pthreads) - -AC_SUBST(TOOLCHAIN_SUPPORTS__THREAD) -AC_SUBST(HAVE_PTHREADS) -AC_SUBST(PTHREAD_LDFLAGS) -AC_SUBST(PTHREAD_LIBS) -AC_SUBST(PTHREAD_CFLAGS) - -dnl ===================================== -dnl __attribute__((constructor)) - -support_for_attribute_constructor=no - -AC_MSG_CHECKING(for __attribute__((constructor))) -AC_LINK_IFELSE([AC_LANG_SOURCE([[ -#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 7)) -/* attribute 'constructor' is supported since gcc 2.7, but some compilers - * may only pretend to be gcc, so let's try to actually use it - */ -static int x = 1; -static void __attribute__((constructor)) constructor_function () { x = 0; } -int main (void) { return x; } -#else -#error not gcc or gcc version is older than 2.7 -#endif -]])], support_for_attribute_constructor=yes) - -if test x$support_for_attribute_constructor = xyes; then - AC_DEFINE([TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR], - [],[Whether the tool chain supports __attribute__((constructor))]) -fi - -AC_MSG_RESULT($support_for_attribute_constructor) -AC_SUBST(TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR) - -dnl ===================================== -dnl __float128 - -support_for_float128=no - -AC_MSG_CHECKING(for __float128) -AC_LINK_IFELSE([AC_LANG_SOURCE([[ -__float128 a = 1.0Q, b = 2.0Q; int main (void) { return a + b; } -]])], support_for_float128=yes) - -if test x$support_for_float128 = xyes; then - AC_DEFINE([HAVE_FLOAT128], [], [Whether the tool chain supports __float128]) -fi - -AC_MSG_RESULT($support_for_float128) - -dnl ===================================== -dnl __builtin_clz - -support_for_builtin_clz=no - -AC_MSG_CHECKING(for __builtin_clz) -AC_LINK_IFELSE([AC_LANG_SOURCE([[ -unsigned int x = 11; int main (void) { return __builtin_clz(x); } -]])], support_for_builtin_clz=yes) - -if test x$support_for_builtin_clz = xyes; then - AC_DEFINE([HAVE_BUILTIN_CLZ], [], [Whether the compiler supports __builtin_clz]) -fi - -AC_MSG_RESULT($support_for_builtin_clz) - -dnl ===================================== -dnl GCC vector extensions - -support_for_gcc_vector_extensions=no - -AC_MSG_CHECKING(for GCC vector extensions) -AC_LINK_IFELSE([AC_LANG_SOURCE([[ -unsigned int __attribute__ ((vector_size(16))) e, a, b; -int main (void) { e = a - ((b << 27) + (b >> (32 - 27))) + 1; return e[0]; } -]])], support_for_gcc_vector_extensions=yes) - -if test x$support_for_gcc_vector_extensions = xyes; then - AC_DEFINE([HAVE_GCC_VECTOR_EXTENSIONS], [], - [Whether the compiler supports GCC vector extensions]) -fi - -AC_MSG_RESULT($support_for_gcc_vector_extensions) - -dnl ================== -dnl libpng - -AC_ARG_ENABLE(libpng, AS_HELP_STRING([--enable-libpng], [Build support for libpng (default: auto)]), - [have_libpng=$enableval], [have_libpng=auto]) - -case x$have_libpng in - xyes) PKG_CHECK_MODULES(PNG, [libpng]) ;; - xno) ;; - *) PKG_CHECK_MODULES(PNG, [libpng], have_libpng=yes, have_libpng=no) ;; -esac - -if test x$have_libpng = xyes; then - AC_DEFINE([HAVE_LIBPNG], [1], [Whether we have libpng]) -fi - -AC_SUBST(HAVE_LIBPNG) - -AC_OUTPUT([pixman-1.pc - pixman-1-uninstalled.pc - Makefile - pixman/Makefile - pixman/pixman-version.h - demos/Makefile - test/Makefile]) - -m4_if(m4_eval(pixman_minor % 2), [1], [ - echo - echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" - echo - echo " Thanks for testing this development snapshot of pixman. Please" - echo " report any problems you find, either by sending email to " - echo - echo " pixman@lists.freedesktop.org" - echo - echo " or by filing a bug at " - echo - echo " https://gitlab.freedesktop.org/pixman/pixman/-/issues/new " - echo - echo " If you are looking for a stable release of pixman, please note " - echo " that stable releases have _even_ minor version numbers. Ie., " - echo " pixman-0.]m4_eval(pixman_minor & ~1)[.x are stable releases, whereas pixman-$PIXMAN_VERSION_MAJOR.$PIXMAN_VERSION_MINOR.$PIXMAN_VERSION_MICRO is a " - echo " development snapshot that may contain bugs and experimental " - echo " features. " - echo - echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" - echo -]) diff --git a/vendor/pixman/meson.build b/vendor/pixman/meson.build deleted file mode 100644 index 42dbe93d5..000000000 --- a/vendor/pixman/meson.build +++ /dev/null @@ -1,581 +0,0 @@ -# Copyright © 2018 Intel Corporation - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -project( - 'pixman', - ['c'], - version : '0.42.3', - license : 'MIT', - meson_version : '>= 0.52.0', - default_options : ['c_std=gnu99', 'buildtype=debugoptimized'], -) - -config = configuration_data() -cc = meson.get_compiler('c') -null_dep = dependency('', required : false) - -add_project_arguments( - cc.get_supported_arguments([ - '-Wdeclaration-after-statement', - '-fno-strict-aliasing', - '-fvisibility=hidden', - '-Wundef', - # -ftrapping-math is the default for gcc, but -fno-trapping-math is the - # default for clang. The FLOAT_IS_ZERO macro is used to guard against - # floating-point exceptions, however with -fno-trapping-math, the compiler - # can reorder floating-point operations so that they occur before the guard. - # Note, this function is ignored in clang < 10.0.0. - '-ftrapping-math' - ]), - language : ['c'] -) - -# GCC and Clang both ignore -Wno options that they don't recognize, so test for -# -W, then add -Wno- if it's ignored -foreach opt : ['unused-local-typedefs'] - if cc.has_argument('-W' + opt) - add_project_arguments(['-Wno-' + opt], language : ['c']) - endif -endforeach - -use_loongson_mmi = get_option('loongson-mmi') -have_loongson_mmi = false -loongson_mmi_flags = ['-mloongson-mmi'] -if not use_loongson_mmi.disabled() - if host_machine.cpu_family() == 'mips64' and cc.compiles(''' - #ifndef __mips_loongson_vector_rev - #error "Loongson Multimedia Instructions are only available on Loongson" - #endif - #if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 4)) - #error "Need GCC >= 4.4 for Loongson MMI compilation" - #endif - #include "pixman/loongson-mmintrin.h" - int main () { - union { - __m64 v; - char c[8]; - } a = { .c = {1, 2, 3, 4, 5, 6, 7, 8} }; - int b = 4; - __m64 c = _mm_srli_pi16 (a.v, b); - return 0; - }''', - args : loongson_mmi_flags, - include_directories : include_directories('.'), - name : 'Loongson MMI Intrinsic Support') - have_loongson_mmi = true - endif -endif - -if have_loongson_mmi - config.set10('USE_LOONGSON_MMI', true) -elif use_loongson_mmi.enabled() - error('Loongson MMI Support unavailable, but required') -endif - -use_mmx = get_option('mmx') -have_mmx = false -mmx_flags = [] - -if cc.get_id() == 'msvc' - mmx_flags = ['/w14710', '/w14714', '/wd4244'] -elif cc.get_id() == 'sun' - mmx_flags = ['-xarch=sse'] -else - mmx_flags = ['-mmmx', '-Winline'] -endif -if not use_mmx.disabled() - if host_machine.cpu_family() == 'x86_64' or cc.get_id() == 'msvc' - have_mmx = true - elif host_machine.cpu_family() == 'x86' and cc.compiles(''' - #include - #include - - /* Check support for block expressions */ - #define _mm_shuffle_pi16(A, N) \ - ({ \ - __m64 ret; \ - \ - /* Some versions of clang will choke on K */ \ - asm ("pshufw %2, %1, %0\n\t" \ - : "=y" (ret) \ - : "y" (A), "K" ((const int8_t)N) \ - ); \ - \ - ret; \ - }) - - int main () { - __m64 v = _mm_cvtsi32_si64 (1); - __m64 w; - - w = _mm_shuffle_pi16(v, 5); - - /* Some versions of clang will choke on this */ - asm ("pmulhuw %1, %0\n\t" - : "+y" (w) - : "y" (v) - ); - - return _mm_cvtsi64_si32 (v); - }''', - args : mmx_flags, - name : 'MMX Intrinsic Support') - have_mmx = true - endif -endif - -if have_mmx - # Inline assembly do not work on X64 MSVC, so we use - # compatibility intrinsics there - if cc.get_id() != 'msvc' or host_machine.cpu_family() != 'x86_64' - config.set10('USE_X86_MMX', true) - endif -elif use_mmx.enabled() - error('MMX Support unavailable, but required') -endif - -use_sse2 = get_option('sse2') -have_sse2 = false -sse2_flags = [] -if cc.get_id() == 'sun' - sse2_flags = ['-xarch=sse2'] -elif cc.get_id() != 'msvc' - sse2_flags = ['-msse2', '-Winline'] -endif -if not use_sse2.disabled() - if host_machine.cpu_family() == 'x86' - if cc.compiles(''' - #if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2)) - # if !defined(__amd64__) && !defined(__x86_64__) - # error "Need GCC >= 4.2 for SSE2 intrinsics on x86" - # endif - #endif - #include - #include - #include - int param; - int main () { - __m128i a = _mm_set1_epi32 (param), b = _mm_set1_epi32 (param + 1), c; - c = _mm_xor_si128 (a, b); - return _mm_cvtsi128_si32(c); - }''', - args : sse2_flags, - name : 'SSE2 Intrinsic Support') - have_sse2 = true - endif - elif host_machine.cpu_family() == 'x86_64' - have_sse2 = true - endif -endif - -if have_sse2 - config.set10('USE_SSE2', true) -elif use_sse2.enabled() - error('sse2 Support unavailable, but required') -endif - -use_ssse3 = get_option('ssse3') -have_ssse3 = false -ssse3_flags = [] -if cc.get_id() != 'msvc' - ssse3_flags = ['-mssse3', '-Winline'] -endif - -# x64 pre-2010 MSVC compilers crashes when building the ssse3 code -if not use_ssse3.disabled() and not (cc.get_id() == 'msvc' and cc.version().version_compare('<16') and host_machine.cpu_family() == 'x86_64') - if host_machine.cpu_family().startswith('x86') - if cc.compiles(''' - #include - #include - #include - int param; - int main () { - __m128i a = _mm_set1_epi32 (param), b = _mm_set1_epi32 (param + 1), c; - c = _mm_xor_si128 (a, b); - return _mm_cvtsi128_si32(c); - }''', - args : ssse3_flags, - name : 'SSSE3 Intrinsic Support') - have_ssse3 = true - endif - endif -endif - -if have_ssse3 - config.set10('USE_SSSE3', true) -elif use_ssse3.enabled() - error('ssse3 Support unavailable, but required') -endif - -use_vmx = get_option('vmx') -have_vmx = false -vmx_flags = ['-maltivec', '-mabi=altivec'] -if not use_vmx.disabled() - if host_machine.cpu_family().startswith('ppc') - if cc.compiles(''' - #include - int main () { - vector unsigned int v = vec_splat_u32 (1); - v = vec_sub (v, v); - return 0; - }''', - args : vmx_flags, - name : 'VMX/Altivec Intrinsic Support') - have_vmx = true - endif - endif -endif - -if have_vmx - config.set10('USE_VMX', true) -elif use_vmx.enabled() - error('vmx Support unavailable, but required') -endif - -use_armv6_simd = get_option('arm-simd') -have_armv6_simd = false -if not use_armv6_simd.disabled() - if host_machine.cpu_family() == 'arm' - if cc.compiles(files('arm-simd-test.S'), name : 'ARMv6 SIMD Intrinsic Support') - have_armv6_simd = true - endif - endif -endif - -if have_armv6_simd - config.set10('USE_ARM_SIMD', true) -elif use_armv6_simd.enabled() - error('ARMv6 SIMD Support unavailable, but required') -endif - -use_neon = get_option('neon') -have_neon = false -if not use_neon.disabled() - if host_machine.cpu_family() == 'arm' - if cc.compiles(files('neon-test.S'), name : 'NEON Intrinsic Support') - have_neon = true - endif - endif -endif - -if have_neon - config.set10('USE_ARM_NEON', true) -elif use_neon.enabled() - error('NEON Support unavailable, but required') -endif - -use_a64neon = get_option('a64-neon') -have_a64neon = false -if not use_a64neon.disabled() - if host_machine.cpu_family() == 'aarch64' - if cc.compiles(files('a64-neon-test.S'), name : 'NEON A64 Intrinsic Support') - have_a64neon = true - endif - endif -endif - -if have_a64neon - config.set10('USE_ARM_A64_NEON', true) -elif use_a64neon.enabled() - error('A64 NEON Support unavailable, but required') -endif - -use_iwmmxt = get_option('iwmmxt') -have_iwmmxt = false -iwmmxt_flags = ['-flax-vector-conversions', '-Winline'] -if not use_iwmmxt.disabled() - if get_option('iwmmxt2') - iwmmxt_flags += '-march=iwmmxt2' - else - iwmmxt_flags += '-march=iwmmxt' - endif - - if host_machine.cpu_family() == 'arm' - if cc.compiles(''' - #ifndef __IWMMXT__ - #error "IWMMXT not enabled (with -march=iwmmxt)" - #endif - #if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)) - #error "Need GCC >= 4.8 for IWMMXT intrinsics" - #endif - #include - int main () { - union { - __m64 v; - char c[8]; - } a = { .c = {1, 2, 3, 4, 5, 6, 7, 8} }; - int b = 4; - __m64 c = _mm_srli_si64 (a.v, b); - } - ''', - args : iwmmxt_flags, - name : 'IWMMXT Intrinsic Support') - have_iwmmxt = true - endif - endif -endif - -if have_iwmmxt - config.set10('USE_ARM_IWMMXT', true) -elif use_iwmmxt.enabled() - error('IWMMXT Support unavailable, but required') -endif - -use_mips_dspr2 = get_option('mips-dspr2') -have_mips_dspr2 = false -mips_dspr2_flags = ['-mdspr2'] -if not use_mips_dspr2.disabled() - if host_machine.cpu_family() == 'mips32' - if cc.compiles(''' - #if !(defined(__mips__) && __mips_isa_rev >= 2) - #error MIPS DSPr2 is currently only available on MIPS32r2 platforms. - #endif - int - main () - { - int c = 0, a = 0, b = 0; - __asm__ __volatile__ ( - "precr.qb.ph %[c], %[a], %[b] \n\t" - : [c] "=r" (c) - : [a] "r" (a), [b] "r" (b) - ); - return c; - }''', - args : mipds_dspr2_flags, - name : 'DSPr2 Intrinsic Support') - have_mips_dspr2 = true - endif - endif -endif - -if have_mips_dspr2 - config.set10('USE_MIPS_DSPR2', true) -elif use_mips_dspr2.enabled() - error('MIPS DSPr2 Support unavailable, but required') -endif - -use_gnu_asm = get_option('gnu-inline-asm') -if not use_gnu_asm.disabled() - if cc.compiles(''' - int main () { - /* Most modern architectures have a NOP instruction, so this is a fairly generic test. */ - asm volatile ( "\tnop\n" : : : "cc", "memory" ); - return 0; - } - ''', - name : 'GNU Inline ASM support.') - config.set10('USE_GCC_INLINE_ASM', true) - elif use_gnu_asm.enabled() - error('GNU inline assembly support missing but required.') - endif -endif - -if get_option('timers') - config.set('PIXMAN_TIMERS', 1) -endif -if get_option('gnuplot') - config.set('PIXMAN_GNUPLOT', 1) -endif - -if cc.get_id() != 'msvc' - dep_openmp = dependency('openmp', required : get_option('openmp')) - if dep_openmp.found() - config.set10('USE_OPENMP', true) - elif meson.version().version_compare('<0.51.0') - # In versions of meson before 0.51 the openmp dependency can still - # inject arguments in the the auto case when it is not found, the - # detection does work correctly in that case however, so we just - # replace dep_openmp with null_dep to work around this. - dep_openmp = null_dep - endif -else - # the MSVC implementation of openmp is not compliant enough for our - # uses here, so we disable it here. - # Please see: https://stackoverflow.com/questions/12560243/using-threadprivate-directive-in-visual-studio - dep_openmp = null_dep -endif - -dep_gtk = dependency('gtk+-3.0', required : get_option('gtk'), required: get_option('demos')) -dep_glib = dependency('glib-2.0', required : get_option('gtk'), required: get_option('demos')) - -dep_png = null_dep -if not get_option('libpng').disabled() - dep_png = dependency('libpng', required : false) - - # We need to look for the right library to link to for libpng, - # when looking for libpng manually - foreach png_ver : [ '16', '15', '14', '13', '12', '10' ] - if not dep_png.found() - dep_png = cc.find_library('libpng@0@'.format(png_ver), has_headers : ['png.h'], required : false) - endif - endforeach - - if get_option('libpng').enabled() and not dep_png.found() - error('libpng support requested but libpng library not found') - endif -endif - -if dep_png.found() - config.set('HAVE_LIBPNG', 1) -endif -dep_m = cc.find_library('m', required : false) -dep_threads = dependency('threads') - -# MSVC-style compilers do not come with pthreads, so we must link -# to it explicitly, currently pthreads-win32 is supported -pthreads_found = false - -if dep_threads.found() and cc.has_header('pthread.h') - if cc.get_argument_syntax() == 'msvc' - pthread_lib = null_dep - foreach pthread_type : ['VC3', 'VSE3', 'VCE3', 'VC2', 'VSE2', 'VCE2'] - if not pthread_lib.found() - pthread_lib = cc.find_library('pthread@0@'.format(pthread_type), required : false) - endif - endforeach - if pthread_lib.found() - pthreads_found = true - dep_threads = pthread_lib - endif - else - pthreads_found = true - endif -endif - -if pthreads_found - config.set('HAVE_PTHREADS', 1) -endif - -funcs = ['sigaction', 'alarm', 'mprotect', 'getpagesize', 'mmap', 'getisax', 'gettimeofday'] -# mingw claimes to have posix_memalign, but it doesn't -if host_machine.system() != 'windows' - funcs += 'posix_memalign' -endif - -foreach f : funcs - if cc.has_function(f) - config.set('HAVE_@0@'.format(f.to_upper()), 1) - endif -endforeach - -# This is only used in one test, that defines _GNU_SOURCE -if cc.has_function('feenableexcept', - prefix : '#define _GNU_SOURCE\n#include ', - dependencies : dep_m) - config.set('HAVE_FEENABLEEXCEPT', 1) -endif - -if cc.has_header_symbol('fenv.h', 'FE_DIVBYZERO') - config.set('HAVE_FEDIVBYZERO', 1) -endif - -foreach h : ['sys/mman.h', 'fenv.h', 'unistd.h'] - if cc.check_header(h) - config.set('HAVE_@0@'.format(h.underscorify().to_upper()), 1) - endif -endforeach - -use_tls = get_option('tls') -have_tls = '' -if not use_tls.disabled() - # gcc on Windows only warns that __declspec(thread) isn't supported, - # passing -Werror=attributes makes it fail. - if (host_machine.system() == 'windows' and - cc.compiles('int __declspec(thread) foo;', - args : cc.get_supported_arguments(['-Werror=attributes']), - name : 'TLS via __declspec(thread)')) - have_tls = '__declspec(thread)' - elif cc.compiles('int __thread foo;', name : 'TLS via __thread') - have_tls = '__thread' - endif -endif - -if have_tls != '' - config.set('TLS', have_tls) -elif use_tls.enabled() - error('Compiler TLS Support unavailable, but required') -endif - -if cc.links(''' - static int x = 1; - static void __attribute__((constructor)) constructor_function () { x = 0; } - int main (void) { return x; } - ''', - name : '__attribute__((constructor))') - config.set('TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR', 1) -endif - -if cc.links( - ' __float128 a = 1.0Q, b = 2.0Q; int main (void) { return a + b; }', - name : 'Has float128 support') - config.set('HAVE_FLOAT128', 1) -endif - -if cc.has_function('clz') - config.set('HAVE_BUILTIN_CLZ', 1) -endif - -if cc.links(''' - unsigned int __attribute__ ((vector_size(16))) e, a, b; - int main (void) { e = a - ((b << 27) + (b >> (32 - 27))) + 1; return e[0]; } - ''', - name : 'Support for GCC vector extensions') - config.set('HAVE_GCC_VECTOR_EXTENSIONS', 1) -endif - -if host_machine.endian() == 'big' - config.set('WORDS_BIGENDIAN', 1) -endif - -config.set('SIZEOF_LONG', cc.sizeof('long')) - -# Required to make pixman-private.h -config.set('PACKAGE', 'foo') - -version_conf = configuration_data() -split = meson.project_version().split('.') -version_conf.set('PIXMAN_VERSION_MAJOR', split[0]) -version_conf.set('PIXMAN_VERSION_MINOR', split[1]) -version_conf.set('PIXMAN_VERSION_MICRO', split[2]) - -add_project_arguments('-DHAVE_CONFIG_H', language : ['c']) - -subdir('pixman') - -if not get_option('tests').disabled() or not get_option('demos').disabled() - subdir(join_paths('test', 'utils')) -endif - -if not get_option('demos').disabled() - subdir('demos') -endif - -if not get_option('tests').disabled() - subdir('test') -endif - -pkg = import('pkgconfig') -pkg.generate(libpixman, - name : 'Pixman', - filebase : 'pixman-1', - description : 'The pixman library (version 1)', - subdirs: 'pixman-1', - version : meson.project_version(), -) diff --git a/vendor/pixman/meson_options.txt b/vendor/pixman/meson_options.txt deleted file mode 100644 index df10889c0..000000000 --- a/vendor/pixman/meson_options.txt +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright © 2018 Intel Corporation - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -option( - 'loongson-mmi', - type : 'feature', - description : 'Use Loongson MMI intrinsic optimized paths', -) -option( - 'mmx', - type : 'feature', - description : 'Use X86 MMX intrinsic optimized paths', -) -option( - 'sse2', - type : 'feature', - description : 'Use X86 SSE2 intrinsic optimized paths', -) -option( - 'ssse3', - type : 'feature', - description : 'Use X86 SSSE3 intrinsic optimized paths', -) -option( - 'vmx', - type : 'feature', - description : 'Use PPC VMX/Altivec intrinsic optimized paths', -) -option( - 'arm-simd', - type : 'feature', - description : 'Use ARMv6 SIMD intrinsic optimized paths', -) -option( - 'neon', - type : 'feature', - description : 'Use ARM NEON intrinsic optimized paths', -) -option( - 'a64-neon', - type : 'feature', - description : 'Use ARM A64 NEON intrinsic optimized paths', -) -option( - 'iwmmxt', - type : 'feature', - description : 'Use ARM IWMMXT intrinsic optimized paths', -) -option( - 'iwmmxt2', - type : 'boolean', - value : true, - description : 'Use ARM IWMMXT2 intrinsic instead of IWMMXT', -) -option( - 'mips-dspr2', - type : 'feature', - description : 'Use MIPS32 DSPr2 intrinsic optimized paths', -) -option( - 'gnu-inline-asm', - type : 'feature', - description : 'Use GNU style inline assembler', -) -option( - 'tls', - type : 'feature', - description : 'Use compiler support for thread-local storage', -) -option( - 'cpu-features-path', - type : 'string', - description : 'Path to platform-specific cpu-features.[ch] for systems that do not provide it (e.g. Android)', -) -option( - 'openmp', - type : 'feature', - description : 'Enable OpenMP for tests', -) -option( - 'timers', - type : 'boolean', - value : false, - description : 'Enable TIMER_* macros', -) -option( - 'gnuplot', - type : 'boolean', - value : false, - description : 'Enable output of filters that can be piped to gnuplot', -) -option( - 'gtk', - type : 'feature', - description : 'Enable demos using GTK', -) -option( - 'libpng', - type : 'feature', - description : 'Use libpng in tests' -) -option( - 'tests', - type : 'feature', - description : 'Build tests' -) -option( - 'demos', - type : 'feature', - description : 'Build demos' -) diff --git a/vendor/pixman/neon-test.S b/vendor/pixman/neon-test.S deleted file mode 100644 index c30a3990b..000000000 --- a/vendor/pixman/neon-test.S +++ /dev/null @@ -1,12 +0,0 @@ -.text -.fpu neon -.arch armv7a -.object_arch armv4 -.eabi_attribute 10, 0 -.arm -.altmacro -#ifndef __ARM_EABI__ -#error EABI is required (to be sure that calling conventions are compatible) -#endif -pld [r0] -vmovn.u16 d0, q0 diff --git a/vendor/pixman/pixman-1-uninstalled.pc.in b/vendor/pixman/pixman-1-uninstalled.pc.in deleted file mode 100644 index e0347d010..000000000 --- a/vendor/pixman/pixman-1-uninstalled.pc.in +++ /dev/null @@ -1,5 +0,0 @@ -Name: Pixman -Description: The pixman library (version 1) -Version: @PACKAGE_VERSION@ -Cflags: -I${pc_top_builddir}/${pcfiledir}/pixman -Libs: ${pc_top_builddir}/${pcfiledir}/pixman/libpixman-1.la diff --git a/vendor/pixman/pixman-1.pc.in b/vendor/pixman/pixman-1.pc.in deleted file mode 100644 index e3b9711ae..000000000 --- a/vendor/pixman/pixman-1.pc.in +++ /dev/null @@ -1,11 +0,0 @@ -prefix=@prefix@ -exec_prefix=@exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ - -Name: Pixman -Description: The pixman library (version 1) -Version: @PACKAGE_VERSION@ -Cflags: -I${includedir}/pixman-1 -Libs: -L${libdir} -lpixman-1 - diff --git a/vendor/pixman/pixman/Makefile.am b/vendor/pixman/pixman/Makefile.am deleted file mode 100644 index f05e2adc5..000000000 --- a/vendor/pixman/pixman/Makefile.am +++ /dev/null @@ -1,158 +0,0 @@ -include $(top_srcdir)/pixman/Makefile.sources - -lib_LTLIBRARIES = libpixman-1.la - -libpixman_1_la_LDFLAGS = -version-info $(LT_VERSION_INFO) -no-undefined @PTHREAD_LDFLAGS@ -libpixman_1_la_LIBADD = @PTHREAD_LIBS@ -lm -libpixman_1_la_SOURCES = $(libpixman_sources) $(libpixman_headers) - -libpixmanincludedir = $(includedir)/pixman-1 -libpixmaninclude_HEADERS = pixman.h pixman-version.h -noinst_LTLIBRARIES = - -EXTRA_DIST = \ - Makefile.win32 \ - dither/make-blue-noise.c \ - pixman-region.c \ - solaris-hwcap.mapfile \ - meson.build \ - $(NULL) - -# mmx code -if USE_X86_MMX -noinst_LTLIBRARIES += libpixman-mmx.la -libpixman_mmx_la_SOURCES = \ - pixman-mmx.c -libpixman_mmx_la_CFLAGS = $(MMX_CFLAGS) -libpixman_1_la_LDFLAGS += $(MMX_LDFLAGS) -libpixman_1_la_LIBADD += libpixman-mmx.la - -ASM_CFLAGS_mmx=$(MMX_CFLAGS) -endif - -# vmx code -if USE_VMX -noinst_LTLIBRARIES += libpixman-vmx.la -libpixman_vmx_la_SOURCES = \ - pixman-vmx.c \ - pixman-combine32.h -libpixman_vmx_la_CFLAGS = $(VMX_CFLAGS) -libpixman_1_la_LIBADD += libpixman-vmx.la - -ASM_CFLAGS_vmx=$(VMX_CFLAGS) -endif - -# sse2 code -if USE_SSE2 -noinst_LTLIBRARIES += libpixman-sse2.la -libpixman_sse2_la_SOURCES = \ - pixman-sse2.c -libpixman_sse2_la_CFLAGS = $(SSE2_CFLAGS) -libpixman_1_la_LDFLAGS += $(SSE2_LDFLAGS) -libpixman_1_la_LIBADD += libpixman-sse2.la - -ASM_CFLAGS_sse2=$(SSE2_CFLAGS) -endif - -# ssse3 code -if USE_SSSE3 -noinst_LTLIBRARIES += libpixman-ssse3.la -libpixman_ssse3_la_SOURCES = \ - pixman-ssse3.c -libpixman_ssse3_la_CFLAGS = $(SSSE3_CFLAGS) -libpixman_1_la_LDFLAGS += $(SSSE3_LDFLAGS) -libpixman_1_la_LIBADD += libpixman-ssse3.la - -ASM_CFLAGS_ssse3=$(SSSE3_CFLAGS) -endif - -# arm simd code -if USE_ARM_SIMD -noinst_LTLIBRARIES += libpixman-arm-simd.la -libpixman_arm_simd_la_SOURCES = \ - pixman-arm-simd.c \ - pixman-arm-common.h \ - pixman-arm-simd-asm.S \ - pixman-arm-simd-asm-scaled.S \ - pixman-arm-asm.h \ - pixman-arm-simd-asm.h -libpixman_1_la_LIBADD += libpixman-arm-simd.la - -ASM_CFLAGS_arm_simd= -endif - -# arm neon code -if USE_ARM_NEON -noinst_LTLIBRARIES += libpixman-arm-neon.la -libpixman_arm_neon_la_SOURCES = \ - pixman-arm-neon.c \ - pixman-arm-common.h \ - pixman-arm-neon-asm.S \ - pixman-arm-neon-asm-bilinear.S \ - pixman-arm-asm.h \ - pixman-arm-neon-asm.h -libpixman_1_la_LIBADD += libpixman-arm-neon.la - -ASM_CFLAGS_arm_neon= -endif - -# arm a64 neon code -if USE_ARM_A64_NEON -noinst_LTLIBRARIES += libpixman-arma64-neon.la -libpixman_arma64_neon_la_SOURCES = \ - pixman-arm-neon.c \ - pixman-arm-common.h \ - pixman-arma64-neon-asm.S \ - pixman-arma64-neon-asm-bilinear.S \ - pixman-arm-asm.h \ - pixman-arma64-neon-asm.h -libpixman_1_la_LIBADD += libpixman-arma64-neon.la - -ASM_CFLAGS_arm_neon= -endif - -# iwmmxt code -if USE_ARM_IWMMXT -libpixman_iwmmxt_la_SOURCES = pixman-mmx.c -noinst_LTLIBRARIES += libpixman-iwmmxt.la -libpixman_1_la_LIBADD += libpixman-iwmmxt.la - -libpixman_iwmmxt_la-pixman-mmx.lo: pixman-mmx.c - $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(AM_CPPFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $(IWMMXT_CFLAGS) -MT libpixman_iwmmxt_la-pixman-mmx.lo -MD -MP -MF $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo -c -o libpixman_iwmmxt_la-pixman-mmx.lo `test -f 'pixman-mmx.c' || echo '$(srcdir)/'`pixman-mmx.c - $(AM_V_at)$(am__mv) $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Plo - -libpixman_iwmmxt_la_DEPENDENCIES = $(am__DEPENDENCIES_1) -libpixman_iwmmxt_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ - $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ - $(CFLAGS) $(IWMMXT_CFLAGS) $(AM_LDFLAGS) \ - $(LDFLAGS) -o $@ - -libpixman-iwmmxt.la: libpixman_iwmmxt_la-pixman-mmx.lo $(libpixman_iwmmxt_la_DEPENDENCIES) - $(AM_V_CCLD)$(libpixman_iwmmxt_la_LINK) libpixman_iwmmxt_la-pixman-mmx.lo $(libpixman_iwmmxt_la_LIBADD) $(LIBS) -endif - -# mips dspr2 code -if USE_MIPS_DSPR2 -noinst_LTLIBRARIES += libpixman-mips-dspr2.la -libpixman_mips_dspr2_la_SOURCES = \ - pixman-mips-dspr2.c \ - pixman-mips-dspr2.h \ - pixman-mips-dspr2-asm.S \ - pixman-mips-dspr2-asm.h \ - pixman-mips-memcpy-asm.S -libpixman_1_la_LIBADD += libpixman-mips-dspr2.la - -ASM_CFLAGS_mips_dspr2= -endif - -# loongson code -if USE_LOONGSON_MMI -noinst_LTLIBRARIES += libpixman-loongson-mmi.la -libpixman_loongson_mmi_la_SOURCES = pixman-mmx.c loongson-mmintrin.h -libpixman_loongson_mmi_la_CFLAGS = $(LS_CFLAGS) -libpixman_1_la_LDFLAGS += $(LS_LDFLAGS) -libpixman_1_la_LIBADD += libpixman-loongson-mmi.la -endif - -.c.s : $(libpixmaninclude_HEADERS) - $(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) $(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) -I$(builddir) -I$(top_builddir) -S -o $@ $< diff --git a/vendor/pixman/pixman/Makefile.sources b/vendor/pixman/pixman/Makefile.sources deleted file mode 100644 index 23d1d974d..000000000 --- a/vendor/pixman/pixman/Makefile.sources +++ /dev/null @@ -1,43 +0,0 @@ -libpixman_sources = \ - pixman.c \ - pixman-access.c \ - pixman-access-accessors.c \ - pixman-bits-image.c \ - pixman-combine32.c \ - pixman-combine-float.c \ - pixman-conical-gradient.c \ - pixman-filter.c \ - pixman-x86.c \ - pixman-mips.c \ - pixman-arm.c \ - pixman-ppc.c \ - pixman-edge.c \ - pixman-edge-accessors.c \ - pixman-fast-path.c \ - pixman-glyph.c \ - pixman-general.c \ - pixman-gradient-walker.c \ - pixman-image.c \ - pixman-implementation.c \ - pixman-linear-gradient.c \ - pixman-matrix.c \ - pixman-noop.c \ - pixman-radial-gradient.c \ - pixman-region16.c \ - pixman-region32.c \ - pixman-solid-fill.c \ - pixman-timer.c \ - pixman-trap.c \ - pixman-utils.c \ - $(NULL) - -libpixman_headers = \ - dither/blue-noise-64x64.h \ - pixman.h \ - pixman-accessor.h \ - pixman-combine32.h \ - pixman-compiler.h \ - pixman-edge-imp.h \ - pixman-inlines.h \ - pixman-private.h \ - $(NULL) diff --git a/vendor/pixman/pixman/Makefile.win32 b/vendor/pixman/pixman/Makefile.win32 deleted file mode 100644 index 7b64033bc..000000000 --- a/vendor/pixman/pixman/Makefile.win32 +++ /dev/null @@ -1,93 +0,0 @@ -default: all - -top_srcdir = .. -include $(top_srcdir)/pixman/Makefile.sources -include $(top_srcdir)/Makefile.win32.common - -MMX_VAR = $(MMX) -ifeq ($(MMX_VAR),) -MMX_VAR=on -endif - -SSE2_VAR = $(SSE2) -ifeq ($(SSE2_VAR),) -SSE2_VAR=on -endif - -SSSE3_VAR = $(SSSE3) -ifeq ($(SSSE3_VAR),) -SSSE3_VAR=on -endif - -MMX_CFLAGS = -DUSE_X86_MMX -w14710 -w14714 -SSE2_CFLAGS = -DUSE_SSE2 -SSSE3_CFLAGS = -DUSE_SSSE3 - -# MMX compilation flags -ifeq ($(MMX_VAR),on) -PIXMAN_CFLAGS += $(MMX_CFLAGS) -libpixman_sources += pixman-mmx.c -endif - -# SSE2 compilation flags -ifeq ($(SSE2_VAR),on) -PIXMAN_CFLAGS += $(SSE2_CFLAGS) -libpixman_sources += pixman-sse2.c -endif - -# SSSE3 compilation flags -ifeq ($(SSSE3_VAR),on) -PIXMAN_CFLAGS += $(SSSE3_CFLAGS) -libpixman_sources += pixman-ssse3.c -endif - -OBJECTS = $(patsubst %.c, $(CFG_VAR)/%.obj, $(libpixman_sources)) - -# targets -all: inform informMMX informSSE2 informSSSE3 $(CFG_VAR)/$(LIBRARY).lib - -informMMX: -ifneq ($(MMX),off) -ifneq ($(MMX),on) -ifneq ($(MMX),) - @echo "Invalid specified MMX option : "$(MMX_VAR)"." - @echo - @echo "Possible choices for MMX are 'on' or 'off'" - @exit 1 -endif - @echo "Setting MMX flag to default value 'on'... (use MMX=on or MMX=off)" -endif -endif - -informSSE2: -ifneq ($(SSE2),off) -ifneq ($(SSE2),on) -ifneq ($(SSE2),) - @echo "Invalid specified SSE option : "$(SSE2)"." - @echo - @echo "Possible choices for SSE2 are 'on' or 'off'" - @exit 1 -endif - @echo "Setting SSE2 flag to default value 'on'... (use SSE2=on or SSE2=off)" -endif -endif - -informSSSE3: -ifneq ($(SSSE3),off) -ifneq ($(SSSE3),on) -ifneq ($(SSSE3),) - @echo "Invalid specified SSE option : "$(SSSE3)"." - @echo - @echo "Possible choices for SSSE3 are 'on' or 'off'" - @exit 1 -endif - @echo "Setting SSSE3 flag to default value 'on'... (use SSSE3=on or SSSE3=off)" -endif -endif - - -# pixman linking -$(CFG_VAR)/$(LIBRARY).lib: $(OBJECTS) - @$(AR) $(PIXMAN_ARFLAGS) -OUT:$@ $^ - -.PHONY: all informMMX informSSE2 informSSSE3 diff --git a/vendor/pixman/pixman/dither/blue-noise-64x64.h b/vendor/pixman/pixman/dither/blue-noise-64x64.h deleted file mode 100644 index 93c8805b5..000000000 --- a/vendor/pixman/pixman/dither/blue-noise-64x64.h +++ /dev/null @@ -1,77 +0,0 @@ -/* WARNING: This file is generated by make-blue-noise.c - * Please edit that file instead of this one. - */ - -#ifndef BLUE_NOISE_64X64_H -#define BLUE_NOISE_64X64_H - -#include - -static const uint16_t dither_blue_noise_64x64[4096] = { - 3039, 1368, 3169, 103, 2211, 1248, 2981, 668, 2633, 37, 3963, 2903, 384, 2564, 3115, 1973, 3348, 830, 2505, 1293, 3054, 1060, 1505, 3268, 400, 1341, 593, 3802, 3384, 429, 4082, 1411, 2503, 3863, 126, 1292, 1887, 2855, 205, 2094, 2977, 1899, 3924, 356, 3088, 2500, 3942, 1409, 2293, 1734, 3732, 1291, 3227, 277, 2054, 786, 2871, 411, 2425, 1678, 3986, 455, 2879, 2288, - 388, 1972, 3851, 778, 2768, 3697, 944, 2123, 1501, 3533, 937, 1713, 1381, 3888, 156, 1242, 516, 2888, 1607, 3676, 632, 2397, 3804, 2673, 1898, 3534, 2593, 1777, 1170, 2299, 3013, 1838, 523, 3053, 1647, 3601, 3197, 959, 1520, 3633, 893, 2437, 3367, 2187, 1258, 137, 1965, 401, 3546, 643, 3087, 2498, 733, 2786, 3371, 4053, 1266, 1977, 3663, 183, 2570, 2107, 1183, 3708, - 907, 2473, 1151, 3363, 1527, 1902, 232, 3903, 3060, 496, 2486, 3206, 2165, 861, 2387, 3653, 2101, 3972, 132, 2162, 3437, 1827, 215, 895, 3114, 271, 969, 2932, 197, 1598, 878, 3696, 1140, 2120, 904, 2431, 302, 3846, 2675, 481, 3187, 66, 1440, 650, 3833, 2826, 3435, 901, 2936, 2111, 250, 1875, 3609, 1174, 1747, 162, 2346, 3420, 913, 3172, 1383, 752, 3298, 1735, - 3540, 2938, 249, 2324, 526, 3099, 2561, 1324, 2347, 1861, 1200, 3702, 257, 3442, 1514, 2999, 992, 1766, 2735, 1163, 478, 2943, 1279, 3635, 2177, 1464, 3672, 2386, 3871, 3340, 2690, 64, 3489, 2811, 3999, 633, 1948, 1243, 2269, 1807, 1143, 2750, 3729, 1790, 2363, 1053, 1537, 2636, 4065, 1076, 1476, 3869, 450, 2200, 2676, 658, 2979, 1548, 544, 1913, 2838, 3911, 116, 2698, - 517, 1295, 3997, 1739, 3665, 1083, 3509, 599, 3400, 118, 2956, 720, 2689, 1907, 567, 2523, 284, 3397, 711, 3219, 2450, 3985, 1665, 2549, 562, 3011, 1855, 729, 1355, 528, 1908, 2456, 1384, 337, 1540, 2654, 3138, 3513, 703, 4080, 3314, 2047, 855, 3037, 209, 3317, 577, 1828, 17, 2336, 3193, 2748, 962, 3441, 1450, 3246, 1075, 3878, 2615, 3497, 1033, 2310, 1442, 2183, - 1654, 3254, 2061, 738, 2832, 148, 2030, 1670, 909, 3850, 2109, 1533, 4046, 1085, 3098, 3897, 1378, 2248, 3829, 1495, 1966, 23, 797, 3427, 1124, 4057, 95, 2787, 2190, 3074, 3950, 742, 3194, 1999, 3386, 1113, 16, 1657, 2804, 201, 1543, 383, 2559, 1325, 3604, 2068, 2493, 3771, 1284, 3460, 710, 1716, 2447, 80, 3811, 2032, 347, 2227, 15, 1689, 397, 3084, 662, 3798, - 973, 43, 2608, 3143, 1459, 2423, 4066, 2770, 3191, 1283, 2630, 314, 3235, 2289, 72, 1822, 2840, 924, 350, 2653, 1057, 3715, 2235, 2775, 346, 2083, 1553, 3292, 1081, 274, 1686, 1188, 2327, 3743, 578, 2234, 3916, 2519, 1011, 3056, 2207, 3438, 3890, 537, 1617, 837, 3094, 373, 2795, 1980, 276, 3951, 1353, 3015, 844, 1724, 3651, 2923, 1316, 4092, 2504, 3627, 1936, 2854, - 2461, 3929, 1193, 421, 3746, 820, 1180, 286, 2261, 532, 3625, 1812, 802, 1327, 3527, 670, 3730, 2025, 3124, 3565, 529, 2960, 1769, 1390, 3196, 2494, 3756, 796, 3618, 2602, 3463, 2847, 166, 953, 1745, 2900, 438, 2070, 1418, 3741, 639, 1205, 1891, 2882, 2282, 4012, 1182, 1696, 3630, 951, 2904, 2170, 3530, 375, 2320, 2742, 1132, 701, 3216, 2023, 847, 1230, 310, 3431, - 770, 1961, 3531, 1702, 2181, 3370, 1877, 3072, 1571, 3389, 1071, 2415, 3782, 2803, 1610, 2454, 1211, 182, 1655, 2322, 1282, 3372, 287, 3935, 704, 1232, 415, 1910, 2286, 1399, 556, 1964, 4068, 2444, 3605, 1272, 3345, 816, 3526, 256, 2402, 2777, 955, 345, 3289, 111, 2727, 635, 2396, 1488, 3331, 600, 1032, 1575, 4026, 515, 3507, 2433, 1605, 460, 3364, 2783, 1810, 1397, - 2334, 223, 2945, 688, 2533, 99, 2705, 624, 3944, 2073, 46, 2978, 508, 2132, 269, 3173, 3453, 2631, 4076, 694, 1892, 2586, 972, 2178, 3470, 1695, 2849, 3141, 77, 3884, 994, 3029, 1536, 673, 3083, 124, 2583, 1722, 2821, 1944, 4027, 1661, 3176, 3728, 1337, 1813, 3503, 2035, 3930, 157, 2537, 1865, 3096, 2646, 1941, 3252, 1449, 135, 2836, 3758, 2139, 84, 3678, 3106, - 3862, 1545, 3307, 1320, 3955, 1031, 3664, 1306, 2460, 776, 1487, 3294, 1187, 3990, 1903, 1021, 549, 1484, 943, 3027, 97, 3853, 1499, 2880, 198, 2575, 3995, 1089, 1587, 2475, 3282, 339, 2657, 1158, 2105, 1493, 3943, 580, 3232, 1287, 846, 48, 2480, 2112, 771, 2534, 459, 3134, 850, 1298, 3790, 325, 3652, 1249, 193, 940, 2202, 3895, 1829, 911, 1366, 2577, 1069, 534, - 2104, 1009, 2667, 392, 1983, 2917, 1645, 324, 3439, 2869, 3705, 1767, 2592, 756, 2916, 3683, 2276, 2850, 2053, 3594, 2403, 3181, 634, 3699, 1933, 906, 519, 2150, 3673, 764, 1770, 2220, 3795, 3336, 502, 3547, 2339, 1110, 301, 2210, 3354, 3643, 569, 1518, 2940, 3973, 1138, 1613, 2773, 2127, 2983, 1671, 769, 2161, 3800, 2730, 3127, 1179, 533, 3259, 2284, 4014, 1651, 2820, - 3566, 653, 1839, 3455, 2399, 789, 3149, 2244, 1863, 1099, 474, 2307, 158, 3541, 1312, 1711, 0, 3902, 360, 1629, 1091, 395, 1781, 1191, 2374, 3353, 1419, 3225, 206, 2931, 3553, 1046, 54, 1646, 2470, 910, 1860, 3137, 3770, 2635, 1562, 2809, 1215, 3788, 222, 2199, 3335, 67, 3606, 524, 1001, 3309, 2410, 3473, 591, 1619, 291, 2502, 3629, 2891, 335, 741, 3378, 168, - 2384, 3129, 4051, 22, 1444, 3613, 543, 3893, 186, 2665, 4062, 933, 3058, 2142, 449, 2711, 3224, 849, 1330, 3349, 2195, 2670, 3484, 2993, 32, 3774, 2722, 1859, 2548, 1268, 583, 2027, 3165, 2807, 4029, 227, 2897, 1434, 721, 1816, 195, 905, 2066, 3258, 1754, 970, 2674, 1880, 2338, 3915, 1485, 2660, 14, 1313, 2914, 2046, 4074, 791, 1917, 1301, 1725, 2687, 2019, 1443, - 418, 1186, 1664, 2859, 1049, 2056, 2741, 1226, 1589, 3186, 2042, 1377, 3449, 1574, 3941, 1063, 1930, 2501, 3751, 2930, 671, 4031, 888, 2081, 1544, 684, 1117, 351, 4052, 1698, 2393, 3881, 1439, 785, 1277, 2013, 3488, 441, 2459, 3980, 3061, 3481, 2543, 419, 3020, 609, 3515, 1350, 799, 2878, 348, 2034, 3966, 1824, 950, 3281, 1394, 2239, 3452, 55, 3922, 3119, 892, 3785, - 3023, 2140, 782, 2492, 3817, 241, 3355, 2424, 856, 3639, 612, 2556, 245, 2858, 705, 2316, 3562, 495, 1748, 128, 1912, 1454, 280, 2552, 3905, 3130, 2274, 3472, 834, 3055, 240, 2692, 471, 2272, 3301, 2632, 1080, 3693, 2136, 1029, 1364, 590, 1611, 4067, 1190, 2360, 3827, 261, 3180, 1768, 3471, 1103, 3003, 520, 3674, 151, 2571, 555, 3033, 982, 2353, 504, 1259, 2555, - 149, 3889, 3380, 493, 3178, 1681, 663, 1924, 2990, 49, 1792, 3861, 1192, 1987, 3273, 297, 1457, 3043, 1177, 2292, 3249, 2829, 3682, 1154, 1758, 428, 2872, 1993, 1500, 3703, 1129, 3421, 1840, 3754, 163, 659, 1733, 3182, 38, 2875, 1957, 3614, 2237, 78, 1873, 2801, 1513, 2121, 1074, 2516, 667, 3710, 1429, 2430, 2088, 2830, 1072, 3557, 1531, 2733, 1955, 3286, 3590, 1826, - 2778, 1068, 1932, 1452, 2279, 1185, 3564, 3952, 1391, 2726, 3313, 2331, 870, 3709, 1674, 2772, 4085, 808, 2596, 3848, 927, 538, 2335, 3334, 773, 3597, 1347, 109, 2663, 608, 2108, 2994, 936, 1524, 2922, 3968, 2422, 1467, 845, 3870, 321, 2704, 1073, 3308, 3680, 823, 430, 3375, 4030, 112, 2171, 2695, 267, 3374, 731, 1627, 3919, 1871, 352, 3839, 1370, 234, 794, 1532, - 3245, 647, 3575, 74, 3045, 2766, 285, 2174, 498, 1059, 1551, 385, 3125, 2598, 143, 1128, 2095, 3395, 318, 1590, 3524, 1345, 1969, 242, 2759, 2092, 947, 3926, 3244, 2356, 1658, 6, 3593, 2554, 1172, 1995, 371, 2755, 3417, 2294, 1570, 3164, 748, 2517, 1401, 3111, 2420, 1662, 2910, 1276, 3276, 854, 1804, 4000, 1253, 2987, 229, 2344, 3184, 649, 2196, 2921, 4095, 2389, - 1289, 2193, 2579, 4023, 757, 1858, 986, 3199, 2514, 3475, 4021, 2154, 651, 1432, 3468, 2404, 574, 1799, 3105, 2145, 86, 2614, 3218, 1565, 4088, 2481, 3079, 1815, 323, 1212, 3837, 759, 2159, 435, 3223, 784, 3659, 1114, 1888, 550, 1221, 3786, 1803, 499, 2117, 185, 3763, 942, 589, 2001, 3838, 1483, 3154, 2256, 468, 2544, 3403, 898, 1208, 2610, 3622, 967, 1929, 378, - 3781, 220, 1656, 1115, 3347, 2428, 3822, 1577, 712, 1959, 110, 2765, 1762, 3854, 979, 2928, 3714, 1371, 746, 3969, 2884, 975, 3779, 641, 1142, 159, 1460, 702, 3485, 2866, 2495, 3330, 1305, 3937, 1635, 2229, 2962, 146, 4055, 3091, 2417, 100, 3508, 2933, 4006, 1167, 1920, 2760, 3552, 2545, 433, 2845, 142, 1056, 1886, 3616, 1435, 2099, 3803, 1749, 27, 1446, 3350, 2843, - 884, 3310, 2948, 2103, 447, 1351, 187, 2895, 3655, 1256, 3036, 932, 3325, 2257, 451, 1915, 40, 2780, 2438, 1112, 1814, 423, 2290, 1905, 2898, 3419, 2306, 3760, 1938, 486, 1019, 1791, 3010, 2628, 203, 3408, 1269, 2507, 1606, 862, 2779, 2078, 952, 1529, 2638, 708, 3332, 1413, 2, 1726, 1156, 3500, 2392, 3791, 3076, 812, 107, 2861, 501, 3050, 3487, 2455, 594, 1731, - 2685, 1498, 680, 3908, 2621, 3529, 1786, 2236, 342, 2569, 1526, 3722, 230, 1290, 3203, 3947, 1609, 3516, 467, 3267, 3685, 1461, 3140, 3569, 367, 1759, 928, 2754, 1332, 2219, 4034, 260, 655, 1984, 978, 3814, 617, 2086, 3525, 279, 3841, 1373, 3361, 319, 2251, 3066, 407, 2382, 3918, 3133, 2168, 762, 1523, 507, 2641, 1677, 4025, 2413, 1584, 793, 2049, 1109, 3962, 2218, - 1194, 3692, 266, 1687, 981, 3103, 740, 3983, 1005, 3434, 570, 2383, 1942, 2718, 676, 2462, 1007, 2089, 1308, 2222, 233, 2568, 829, 1241, 2669, 3987, 514, 3303, 69, 3142, 1603, 3560, 2295, 3288, 1497, 2696, 1764, 2865, 1058, 3271, 1914, 477, 2529, 3927, 1736, 1273, 3752, 2029, 1012, 565, 2798, 4078, 1949, 3305, 1175, 2179, 380, 3366, 1195, 3849, 2637, 416, 2959, 125, - 3396, 2467, 2036, 3234, 2340, 68, 2819, 1436, 2011, 3139, 1704, 4073, 860, 3582, 1468, 2969, 211, 3157, 4056, 866, 2935, 2000, 3923, 31, 2157, 1477, 2429, 1147, 3792, 2557, 774, 2802, 1153, 3747, 464, 3192, 42, 3904, 539, 1474, 2283, 803, 2876, 1061, 75, 3477, 747, 2893, 1538, 3626, 251, 1322, 2506, 189, 2791, 3667, 939, 2991, 1971, 175, 3195, 1416, 3648, 1857, - 3052, 454, 851, 3789, 1271, 1906, 3694, 2484, 406, 2757, 26, 1189, 2909, 296, 2215, 3784, 1864, 637, 2715, 1673, 3445, 581, 1572, 3059, 3469, 761, 2984, 1737, 2058, 440, 1414, 1921, 121, 2527, 894, 2223, 1302, 2377, 3077, 2666, 3759, 3198, 1811, 3661, 2166, 2731, 1883, 359, 3285, 2458, 1805, 3459, 926, 3834, 675, 1893, 1496, 2612, 657, 3523, 1763, 2354, 564, 961, - 1367, 3977, 1588, 2714, 322, 3446, 1088, 625, 3887, 1354, 3535, 2090, 3316, 1760, 1127, 483, 3491, 1421, 2301, 94, 1202, 3740, 2311, 1014, 1878, 3836, 180, 3412, 991, 2868, 3953, 3450, 3081, 1632, 4071, 1882, 3543, 726, 1719, 179, 1171, 364, 1420, 622, 3090, 1490, 946, 4007, 2212, 1102, 619, 2739, 2189, 1669, 2937, 3426, 39, 3940, 2191, 1264, 887, 4091, 2792, 2135, - 4, 2883, 2281, 631, 3044, 1641, 2232, 3243, 1773, 2319, 827, 2591, 629, 3938, 2426, 3222, 2629, 1044, 3879, 3293, 1952, 2749, 275, 2590, 472, 1372, 2496, 660, 3669, 2264, 208, 915, 2167, 561, 2828, 307, 3265, 1104, 3964, 2155, 3425, 1951, 4077, 2391, 283, 3387, 2581, 115, 1415, 3069, 3896, 141, 3158, 1214, 442, 2405, 1349, 3085, 425, 2528, 3002, 312, 1602, 3588, - 1137, 3323, 1963, 1002, 3578, 2521, 127, 925, 2970, 273, 3737, 1573, 167, 2863, 1509, 800, 147, 2059, 2942, 409, 921, 3151, 1451, 3909, 3333, 2844, 2096, 1512, 3136, 1210, 1798, 2709, 1331, 3586, 1034, 1521, 2441, 2926, 488, 2585, 775, 3031, 2693, 879, 3602, 1173, 2028, 3654, 2781, 841, 1975, 1507, 3646, 768, 3991, 2012, 996, 3544, 1666, 3810, 1990, 3360, 753, 2597, - 3736, 304, 1473, 3828, 485, 1334, 4008, 2072, 3495, 1136, 2806, 2004, 3236, 1010, 2130, 3819, 1750, 3567, 644, 2515, 1794, 3636, 698, 2137, 1162, 832, 3761, 326, 2613, 513, 3302, 3820, 357, 3163, 2259, 3733, 101, 1922, 1386, 3587, 1640, 28, 1286, 2141, 1761, 2918, 693, 1639, 457, 3250, 2434, 365, 2599, 1729, 3284, 2643, 306, 2793, 689, 1090, 104, 1309, 2305, 1831, - 2776, 859, 2446, 2915, 1778, 3337, 2677, 614, 1508, 2409, 469, 4033, 1321, 3563, 402, 3131, 2720, 1093, 1569, 4042, 1229, 2277, 216, 3046, 1817, 57, 3006, 1684, 4059, 2016, 795, 2440, 1652, 1960, 610, 2763, 920, 3864, 3110, 1026, 2326, 3762, 3233, 521, 3856, 173, 2457, 3939, 2138, 1262, 3572, 989, 3021, 2238, 119, 1445, 3832, 1809, 2297, 3467, 2700, 3684, 3102, 394, - 4036, 2050, 3256, 89, 2198, 1079, 248, 1845, 3805, 3104, 880, 1779, 2688, 717, 2373, 1375, 262, 2249, 3071, 13, 2813, 3429, 1600, 3984, 2416, 3603, 1299, 2298, 998, 3492, 1393, 2951, 10, 4009, 1247, 3462, 1679, 2204, 414, 2736, 316, 1894, 2816, 1050, 3373, 1462, 3107, 817, 3464, 21, 1835, 4070, 568, 1178, 3718, 875, 3168, 466, 2974, 1458, 2084, 616, 1564, 1018, - 1693, 546, 1244, 3899, 716, 3160, 3608, 2877, 1220, 334, 3443, 2270, 44, 3000, 1843, 3928, 3405, 766, 3686, 2040, 587, 993, 2647, 387, 930, 2753, 630, 3274, 150, 2808, 453, 3638, 1092, 2352, 3030, 239, 2562, 700, 3240, 1257, 4016, 730, 1515, 2203, 2551, 417, 1866, 1123, 2348, 2902, 1550, 2678, 2075, 3238, 1630, 2531, 2115, 1255, 4054, 840, 290, 3874, 2477, 3399, - 2250, 3577, 2817, 1626, 2576, 1356, 2315, 792, 2087, 2618, 1612, 3855, 1263, 3637, 1036, 494, 1535, 2553, 1198, 1715, 3867, 3170, 1359, 1954, 3483, 1539, 2069, 3886, 1772, 2487, 1534, 2045, 3242, 806, 1578, 2018, 3948, 1423, 3596, 2076, 2466, 3424, 139, 3688, 871, 4049, 2852, 3342, 547, 3719, 327, 852, 3505, 207, 2794, 542, 3600, 45, 2411, 3324, 1788, 3012, 1235, 61, - 2655, 917, 253, 1986, 3738, 313, 1706, 4072, 120, 3229, 957, 597, 2024, 3262, 2453, 2857, 2002, 3190, 210, 2784, 2206, 300, 2400, 3766, 553, 3152, 218, 1150, 2988, 883, 3753, 627, 2664, 3831, 437, 3385, 1008, 2957, 60, 1636, 891, 2899, 1776, 3062, 1315, 2026, 194, 1643, 2079, 1296, 3201, 2465, 1379, 1927, 3898, 1125, 1847, 2846, 1552, 1028, 2725, 2169, 787, 3202, - 1441, 3982, 3032, 1052, 3251, 605, 2639, 3073, 1431, 3642, 2329, 2949, 341, 1634, 833, 129, 4020, 916, 3571, 669, 1506, 3411, 821, 2856, 1207, 2337, 2683, 3448, 340, 2214, 3128, 235, 1738, 1288, 2833, 2419, 606, 1884, 2668, 552, 3765, 1176, 399, 2302, 596, 3591, 2634, 767, 3845, 2767, 995, 3967, 491, 3057, 814, 2300, 3422, 691, 3797, 254, 3645, 509, 3478, 1836, - 2119, 475, 2445, 1525, 2175, 3539, 914, 1926, 473, 1157, 1800, 3971, 2701, 3739, 2129, 3486, 1333, 1784, 2366, 2982, 1070, 4089, 1802, 73, 1642, 3958, 835, 1837, 1480, 4043, 1217, 2469, 3416, 2113, 88, 3668, 1240, 3255, 3920, 2355, 3167, 2003, 2645, 3936, 3228, 1592, 1144, 3474, 2394, 79, 1820, 2241, 1594, 3656, 2584, 153, 1448, 3034, 2005, 2511, 1692, 1335, 3913, 217, - 2822, 3391, 745, 3813, 192, 1274, 2941, 3847, 2489, 3440, 744, 161, 1422, 1086, 572, 3004, 2617, 338, 3807, 2031, 236, 2472, 3065, 2098, 3358, 362, 2163, 3574, 497, 2788, 1970, 948, 3885, 685, 3100, 1712, 2228, 292, 1408, 1016, 164, 3537, 1417, 941, 34, 2172, 3001, 358, 1491, 3147, 699, 3356, 258, 1149, 2946, 1787, 3931, 382, 1146, 3291, 818, 2890, 2379, 1096, - 3679, 1328, 1901, 3162, 2747, 1730, 2253, 5, 1556, 2818, 2093, 3166, 2522, 3410, 2287, 1701, 956, 3237, 620, 1596, 3300, 1307, 511, 3701, 1020, 2939, 1362, 2532, 3208, 749, 3641, 160, 1522, 2624, 1095, 4086, 826, 2841, 3583, 2173, 1727, 723, 2925, 1911, 2482, 3726, 863, 1962, 4028, 1111, 2835, 3773, 2449, 2022, 582, 3278, 923, 2619, 2152, 4039, 92, 1934, 3145, 677, - 2530, 53, 2303, 1003, 458, 3989, 739, 3321, 1064, 369, 3556, 877, 1900, 426, 3876, 1, 3617, 2106, 1197, 2805, 3634, 857, 2706, 1504, 2418, 682, 3868, 20, 1139, 1688, 2333, 3311, 2907, 1945, 265, 2385, 3433, 1601, 636, 2620, 3095, 4044, 386, 3382, 1184, 527, 2814, 3414, 2342, 465, 1889, 1343, 874, 3479, 1502, 2233, 3689, 1385, 559, 2745, 1463, 3465, 376, 1718, - 3217, 4045, 1580, 3612, 2525, 1228, 3018, 1958, 3725, 2358, 1361, 3996, 1581, 3063, 1224, 2737, 1475, 2442, 3946, 191, 1796, 2128, 3975, 134, 1916, 3318, 1597, 2071, 3749, 2672, 403, 1278, 602, 3745, 3220, 1374, 445, 2064, 3830, 243, 1252, 2390, 1563, 2724, 3875, 1818, 1346, 165, 1650, 3264, 2680, 117, 2998, 4081, 343, 2799, 9, 3122, 1743, 3724, 1040, 2231, 3842, 1209, - 900, 398, 2851, 697, 1797, 3482, 293, 2679, 1649, 566, 2954, 91, 2697, 714, 2060, 3211, 781, 480, 3040, 1038, 2611, 666, 2989, 3458, 1201, 2796, 548, 2975, 839, 3121, 1850, 4001, 2208, 1631, 790, 2558, 2972, 1148, 3213, 1849, 3624, 971, 2102, 108, 772, 3101, 2589, 3777, 1042, 656, 3907, 2097, 1615, 2540, 805, 1935, 1231, 3494, 2451, 268, 2995, 750, 2682, 2020, - 3024, 1392, 2124, 3279, 106, 2217, 1387, 822, 3214, 3825, 2160, 1000, 2395, 3691, 228, 4038, 1872, 3413, 1608, 2225, 3536, 303, 1653, 886, 2541, 224, 4037, 2252, 1428, 172, 3504, 958, 2848, 113, 3628, 1834, 3979, 19, 2317, 779, 2797, 518, 3174, 3549, 1482, 2266, 444, 2014, 3555, 2439, 1213, 3113, 535, 1135, 3204, 3858, 2309, 931, 623, 2009, 3359, 1566, 140, 3550, - 1808, 3872, 2488, 1152, 3764, 2892, 3960, 2412, 353, 1223, 1825, 3444, 3116, 1717, 1082, 2313, 1280, 2661, 82, 3852, 1389, 3200, 2330, 3812, 2038, 3581, 1728, 1039, 3339, 2427, 586, 2580, 1238, 3328, 2280, 1047, 595, 2662, 1363, 3338, 1620, 3934, 2497, 1881, 1054, 3954, 3215, 864, 2887, 1801, 320, 3519, 2378, 3704, 1753, 424, 2958, 1660, 4005, 2601, 1116, 3912, 2381, 573, - 2740, 200, 828, 1667, 432, 1931, 1035, 1616, 3598, 2640, 728, 264, 1437, 557, 3501, 2966, 372, 3734, 974, 1978, 758, 2719, 1145, 452, 1433, 725, 2681, 408, 3843, 1918, 1547, 3906, 1996, 503, 1456, 3019, 3493, 1700, 3742, 355, 2134, 176, 1311, 615, 2867, 315, 1680, 1314, 8, 3297, 1494, 783, 1950, 83, 2656, 1382, 3561, 138, 2834, 1404, 330, 1904, 3156, 1027, - 1357, 3381, 3041, 3666, 2729, 734, 3415, 177, 3051, 2021, 4079, 2823, 3775, 2186, 2616, 869, 1668, 3148, 2367, 3315, 393, 4075, 1870, 2920, 3343, 2362, 3188, 1303, 2782, 825, 3171, 259, 2905, 3717, 2538, 184, 2074, 838, 2860, 2407, 1024, 3496, 3008, 3706, 1985, 2349, 3623, 2582, 4058, 2184, 2694, 3873, 2964, 990, 3346, 690, 2033, 1066, 2201, 3490, 2971, 718, 3700, 2188, - 4061, 391, 1989, 2325, 1430, 3150, 2125, 2526, 592, 1403, 976, 2351, 1165, 1851, 114, 3921, 2063, 613, 1358, 2785, 1623, 2254, 25, 3542, 1045, 246, 1852, 3554, 87, 2243, 3615, 1169, 727, 1705, 968, 3957, 3185, 1251, 500, 4063, 1751, 2622, 842, 1519, 90, 3393, 819, 490, 1874, 999, 571, 1275, 2271, 1586, 4040, 2448, 3126, 3731, 436, 885, 1708, 2421, 24, 1599, - 889, 2563, 1199, 645, 70, 4013, 1237, 3723, 1694, 3499, 3, 3266, 484, 2997, 3390, 1233, 2842, 3687, 152, 3480, 1084, 3698, 881, 2490, 1542, 3992, 2209, 692, 1690, 3022, 1470, 2625, 2114, 3512, 2359, 381, 2684, 1897, 3368, 1395, 3080, 289, 2065, 3981, 2758, 1141, 3097, 1472, 2870, 3352, 3707, 225, 3159, 505, 1895, 214, 1222, 1774, 2686, 3978, 3275, 1196, 3518, 2825, - 3270, 1720, 3796, 3466, 2650, 1841, 298, 899, 2862, 2091, 2671, 1744, 3735, 801, 1560, 349, 2262, 903, 1833, 2524, 512, 3117, 1793, 2827, 476, 3038, 1216, 2550, 3826, 980, 431, 4048, 35, 2992, 1265, 1595, 765, 3675, 76, 2247, 696, 3456, 1254, 2452, 664, 1757, 2133, 3750, 145, 2332, 1554, 1981, 3580, 2712, 868, 3640, 2919, 638, 2275, 1427, 309, 2595, 2006, 492, - 2226, 178, 2911, 836, 1528, 3028, 2240, 3327, 404, 3970, 707, 1294, 2464, 2131, 4032, 2600, 3319, 1406, 2913, 3974, 2156, 1425, 221, 3877, 2017, 811, 3662, 272, 3287, 1988, 2408, 3357, 1746, 598, 3239, 3823, 2182, 2934, 1078, 2604, 3840, 1697, 2906, 413, 3210, 3880, 331, 2644, 1260, 848, 3042, 2535, 1077, 1438, 3261, 2365, 1561, 3799, 85, 3082, 1876, 674, 3932, 1101, - 3644, 1344, 1943, 2401, 390, 3835, 1048, 2572, 1541, 1133, 3075, 3584, 308, 2889, 1065, 1869, 601, 3783, 282, 1181, 736, 3312, 2368, 1126, 3383, 1675, 2734, 1426, 628, 2873, 1317, 843, 2717, 2048, 1004, 2536, 333, 1782, 3295, 1517, 219, 2153, 815, 3502, 1579, 2268, 987, 3409, 1780, 4018, 354, 665, 3914, 47, 1956, 456, 1006, 2010, 3406, 1130, 3621, 2894, 1549, 3092, - 2485, 640, 3993, 3179, 1270, 3436, 585, 1925, 3757, 2304, 136, 1976, 1486, 646, 3520, 50, 3155, 1637, 2435, 3522, 1937, 2756, 3748, 661, 2224, 58, 3230, 2357, 1830, 3892, 170, 3607, 1447, 3949, 190, 3392, 1336, 584, 4010, 918, 3016, 3670, 1155, 2406, 52, 1304, 3009, 607, 2085, 2699, 3205, 1848, 2291, 3402, 2764, 3865, 3048, 2508, 735, 2710, 443, 2341, 897, 263, - 1785, 2769, 983, 56, 2197, 1685, 2703, 202, 2944, 810, 3377, 2626, 3787, 3047, 2055, 1236, 2752, 2122, 945, 3093, 96, 1624, 439, 3014, 1388, 4015, 977, 448, 3506, 1098, 2242, 3026, 506, 2361, 2952, 1862, 3619, 2790, 1992, 2483, 525, 1868, 2652, 4093, 1998, 3595, 2478, 3816, 122, 1412, 929, 3716, 1166, 1648, 813, 1300, 199, 1489, 3998, 1771, 1310, 3808, 2052, 3423, - 434, 3712, 1625, 3558, 2955, 853, 4019, 1348, 3511, 1732, 1246, 487, 934, 1672, 2510, 3965, 788, 3711, 396, 1369, 4090, 1055, 2603, 1879, 3528, 2518, 2067, 3005, 1516, 2588, 751, 1740, 3418, 1131, 1576, 686, 2296, 1118, 18, 3263, 1365, 3401, 294, 737, 3177, 410, 867, 1633, 2963, 3579, 2375, 252, 2881, 479, 2471, 3576, 2180, 3306, 332, 2255, 3035, 41, 2648, 1396, - 2929, 2230, 1219, 2512, 446, 2008, 3189, 2388, 626, 2164, 2831, 4047, 2376, 174, 3272, 368, 1469, 3226, 2578, 1991, 2874, 2263, 3681, 876, 188, 1239, 683, 3776, 226, 3183, 4083, 2148, 63, 2649, 3859, 299, 3086, 3933, 1585, 2185, 3767, 988, 1707, 2908, 1407, 1844, 2771, 2245, 1161, 560, 1755, 3376, 2051, 4064, 3135, 1832, 652, 2853, 1051, 3649, 760, 3290, 1105, 3945, - 872, 154, 3207, 713, 3780, 1453, 281, 1087, 3695, 30, 3299, 1919, 1400, 3551, 1119, 1890, 2314, 618, 1703, 3428, 724, 295, 3146, 1557, 3341, 2896, 1683, 2723, 1974, 1017, 541, 1380, 3720, 804, 3280, 2082, 997, 2567, 777, 2961, 213, 2707, 2328, 3632, 1025, 3891, 3304, 255, 4003, 3108, 2587, 1323, 743, 1479, 105, 1013, 3901, 1618, 2044, 2627, 1465, 1846, 576, 1994, - 2560, 3521, 1742, 2118, 2800, 3404, 1783, 2609, 2968, 1582, 1022, 412, 2713, 687, 2976, 3857, 2761, 3620, 62, 1108, 3844, 1340, 2100, 540, 2345, 3925, 405, 3457, 1319, 2468, 3362, 2815, 1867, 2372, 1281, 1714, 3690, 482, 3498, 1842, 1285, 3994, 558, 2039, 81, 2499, 678, 1481, 1923, 964, 12, 3824, 2980, 2205, 2762, 3432, 2398, 181, 3247, 462, 4094, 2350, 3589, 3089, - 1555, 1094, 4041, 247, 1267, 908, 3959, 2041, 732, 3860, 2343, 3132, 3769, 2144, 1621, 237, 912, 1329, 3025, 2146, 2642, 1775, 3721, 2746, 1121, 1953, 902, 2285, 130, 3671, 1659, 278, 3153, 522, 2721, 123, 2996, 1466, 2380, 377, 3231, 873, 1510, 3476, 3123, 1250, 2147, 3650, 2839, 3451, 2323, 1122, 3545, 379, 1765, 1218, 603, 3768, 1360, 938, 2885, 133, 1245, 363, - 2364, 554, 2743, 3344, 2474, 530, 3112, 169, 1297, 3430, 536, 1741, 98, 1043, 2574, 3253, 2246, 1854, 4022, 510, 3283, 204, 858, 3398, 36, 3118, 1478, 3794, 2986, 706, 2176, 922, 3559, 1097, 3976, 3322, 2149, 1160, 2810, 3883, 2007, 2513, 2953, 328, 1721, 3793, 422, 2566, 807, 329, 1638, 1967, 648, 2520, 3727, 3109, 2116, 2927, 2491, 1939, 3365, 1709, 2728, 3815, - 2037, 3120, 831, 1405, 1896, 3592, 1622, 2369, 2864, 2151, 1107, 2542, 3532, 1410, 3917, 427, 3568, 709, 2509, 1503, 1037, 2973, 2436, 1604, 4035, 2594, 563, 1819, 2659, 1234, 4004, 2565, 1511, 2273, 1823, 336, 882, 3772, 575, 1628, 171, 3570, 1120, 2260, 2716, 935, 3064, 1806, 1342, 3144, 3900, 2744, 3296, 985, 1546, 238, 896, 1663, 305, 3660, 695, 2213, 960, 3407, - 144, 1795, 3894, 2267, 51, 2708, 1023, 3818, 366, 1821, 4087, 2985, 755, 2057, 2912, 949, 1583, 2774, 231, 3447, 2258, 3866, 1982, 672, 1225, 2077, 3320, 1062, 370, 3241, 1968, 7, 3068, 681, 3631, 2573, 1567, 3175, 2321, 1067, 3070, 722, 1856, 3744, 642, 1471, 4084, 131, 3514, 2443, 531, 1227, 155, 2265, 4024, 2658, 3326, 3910, 1168, 3078, 1530, 3956, 489, 1424, - 3647, 1203, 420, 2924, 3755, 719, 3248, 1376, 3067, 890, 196, 1559, 3269, 270, 2432, 1885, 3212, 1164, 3778, 1752, 579, 1338, 344, 3585, 3017, 288, 3658, 2371, 3882, 1691, 611, 2789, 3809, 1339, 389, 2950, 2015, 59, 3548, 2751, 2158, 4011, 1352, 29, 3388, 2370, 2812, 1946, 954, 2110, 1558, 2947, 3573, 1909, 1326, 679, 1853, 2312, 551, 2702, 33, 2414, 3209, 2824, - 2547, 2143, 3379, 966, 1492, 1979, 2479, 463, 2194, 3657, 2738, 2318, 1261, 3713, 604, 4002, 11, 2192, 2967, 919, 2607, 3369, 2837, 1676, 2539, 984, 1568, 93, 2901, 1318, 3538, 1041, 2216, 1756, 3454, 1030, 4050, 1402, 798, 1723, 311, 3277, 2546, 2886, 2043, 461, 1206, 3677, 361, 3260, 3988, 809, 2605, 470, 3007, 3517, 102, 3221, 1398, 2062, 3611, 1134, 1928, 865, - 4060, 621, 1710, 2606, 3510, 317, 4017, 1682, 3329, 1159, 1940, 654, 3461, 1789, 1015, 2691, 1455, 3599, 374, 1947, 4069, 71, 2126, 763, 3961, 2278, 3161, 1997, 824, 2623, 2080, 244, 3257, 780, 2732, 2308, 545, 3351, 2476, 3806, 1204, 588, 1591, 963, 3610, 1699, 754, 3049, 2651, 1106, 65, 2221, 1644, 3821, 1100, 2463, 1614, 3801, 965, 2965, 715, 3394, 1593, 212, -}; - -#endif /* BLUE_NOISE_64X64_H */ diff --git a/vendor/pixman/pixman/dither/make-blue-noise.c b/vendor/pixman/pixman/dither/make-blue-noise.c deleted file mode 100644 index f9974b4d4..000000000 --- a/vendor/pixman/pixman/dither/make-blue-noise.c +++ /dev/null @@ -1,679 +0,0 @@ -/* Blue noise generation using the void-and-cluster method as described in - * - * The void-and-cluster method for dither array generation - * Ulichney, Robert A (1993) - * - * http://cv.ulichney.com/papers/1993-void-cluster.pdf - * - * Note that running with openmp (-DUSE_OPENMP) will trigger additional - * randomness due to computing reductions in parallel, and is not recommended - * unless generating very large dither arrays. - */ - -#include -#include -#include -#include -#include - -/* Booleans and utility functions */ - -#ifndef TRUE -# define TRUE 1 -#endif - -#ifndef FALSE -# define FALSE 0 -#endif - -typedef int bool_t; - -int -imin (int x, int y) -{ - return x < y ? x : y; -} - -/* Memory allocation */ -void * -malloc_abc (unsigned int a, unsigned int b, unsigned int c) -{ - if (a >= INT32_MAX / b) - return NULL; - else if (a * b >= INT32_MAX / c) - return NULL; - else - return malloc (a * b * c); -} - -/* Random number generation */ -typedef uint32_t xorwow_state_t[5]; - -uint32_t -xorwow_next (xorwow_state_t *state) -{ - uint32_t s = (*state)[0], - t = (*state)[3]; - (*state)[3] = (*state)[2]; - (*state)[2] = (*state)[1]; - (*state)[1] = s; - - t ^= t >> 2; - t ^= t << 1; - t ^= s ^ (s << 4); - - (*state)[0] = t; - (*state)[4] += 362437; - - return t + (*state)[4]; -} - -float -xorwow_float (xorwow_state_t *s) -{ - return (xorwow_next (s) >> 9) / (float)((1 << 23) - 1); -} - -/* Floating point matrices - * - * Used to cache the cluster sizes. - */ -typedef struct matrix_t { - int width; - int height; - float *buffer; -} matrix_t; - -bool_t -matrix_init (matrix_t *matrix, int width, int height) -{ - float *buffer; - - if (!matrix) - return FALSE; - - buffer = malloc_abc (width, height, sizeof (float)); - - if (!buffer) - return FALSE; - - matrix->buffer = buffer; - matrix->width = width; - matrix->height = height; - - return TRUE; -} - -bool_t -matrix_copy (matrix_t *dst, matrix_t const *src) -{ - float *srcbuf = src->buffer, - *srcend = src->buffer + src->width * src->height, - *dstbuf = dst->buffer; - - if (dst->width != src->width || dst->height != src->height) - return FALSE; - - while (srcbuf < srcend) - *dstbuf++ = *srcbuf++; - - return TRUE; -} - -float * -matrix_get (matrix_t *matrix, int x, int y) -{ - return &matrix->buffer[y * matrix->width + x]; -} - -void -matrix_destroy (matrix_t *matrix) -{ - free (matrix->buffer); -} - -/* Binary patterns */ -typedef struct pattern_t { - int width; - int height; - bool_t *buffer; -} pattern_t; - -bool_t -pattern_init (pattern_t *pattern, int width, int height) -{ - bool_t *buffer; - - if (!pattern) - return FALSE; - - buffer = malloc_abc (width, height, sizeof (bool_t)); - - if (!buffer) - return FALSE; - - pattern->buffer = buffer; - pattern->width = width; - pattern->height = height; - - return TRUE; -} - -bool_t -pattern_copy (pattern_t *dst, pattern_t const *src) -{ - bool_t *srcbuf = src->buffer, - *srcend = src->buffer + src->width * src->height, - *dstbuf = dst->buffer; - - if (dst->width != src->width || dst->height != src->height) - return FALSE; - - while (srcbuf < srcend) - *dstbuf++ = *srcbuf++; - - return TRUE; -} - -bool_t * -pattern_get (pattern_t *pattern, int x, int y) -{ - return &pattern->buffer[y * pattern->width + x]; -} - -void -pattern_fill_white_noise (pattern_t *pattern, float fraction, - xorwow_state_t *s) -{ - bool_t *buffer = pattern->buffer; - bool_t *end = buffer + (pattern->width * pattern->height); - - while (buffer < end) - *buffer++ = xorwow_float (s) < fraction; -} - -void -pattern_destroy (pattern_t *pattern) -{ - free (pattern->buffer); -} - -/* Dither arrays */ -typedef struct array_t { - int width; - int height; - uint32_t *buffer; -} array_t; - -bool_t -array_init (array_t *array, int width, int height) -{ - uint32_t *buffer; - - if (!array) - return FALSE; - - buffer = malloc_abc (width, height, sizeof (uint32_t)); - - if (!buffer) - return FALSE; - - array->buffer = buffer; - array->width = width; - array->height = height; - - return TRUE; -} - -uint32_t * -array_get (array_t *array, int x, int y) -{ - return &array->buffer[y * array->width + x]; -} - -bool_t -array_save_ppm (array_t *array, const char *filename) -{ - FILE *f = fopen(filename, "wb"); - - int i = 0; - int bpp = 2; - uint8_t buffer[1024]; - - if (!f) - return FALSE; - - if (array->width * array->height - 1 < 256) - bpp = 1; - - fprintf(f, "P5 %d %d %d\n", array->width, array->height, - array->width * array->height - 1); - while (i < array->width * array->height) - { - int j = 0; - for (; j < 1024 / bpp && j < array->width * array->height; ++j) - { - uint32_t v = array->buffer[i + j]; - if (bpp == 2) - { - buffer[2 * j] = v & 0xff; - buffer[2 * j + 1] = (v & 0xff00) >> 8; - } else { - buffer[j] = v; - } - } - - fwrite((void *)buffer, bpp, j, f); - i += j; - } - - if (fclose(f) != 0) - return FALSE; - - return TRUE; -} - -bool_t -array_save (array_t *array, const char *filename) -{ - int x, y; - FILE *f = fopen(filename, "wb"); - - if (!f) - return FALSE; - - fprintf (f, -"/* WARNING: This file is generated by make-blue-noise.c\n" -" * Please edit that file instead of this one.\n" -" */\n" -"\n" -"#ifndef BLUE_NOISE_%dX%d_H\n" -"#define BLUE_NOISE_%dX%d_H\n" -"\n" -"#include \n" -"\n", array->width, array->height, array->width, array->height); - - fprintf (f, "static const uint16_t dither_blue_noise_%dx%d[%d] = {\n", - array->width, array->height, array->width * array->height); - - for (y = 0; y < array->height; ++y) - { - fprintf (f, " "); - for (x = 0; x < array->width; ++x) - { - if (x != 0) - fprintf (f, ", "); - - fprintf (f, "%d", *array_get (array, x, y)); - } - - fprintf (f, ",\n"); - } - fprintf (f, "};\n"); - - fprintf (f, "\n#endif /* BLUE_NOISE_%dX%d_H */\n", - array->width, array->height); - - if (fclose(f) != 0) - return FALSE; - - return TRUE; -} - -void -array_destroy (array_t *array) -{ - free (array->buffer); -} - -/* Dither array generation */ -bool_t -compute_cluster_sizes (pattern_t *pattern, matrix_t *matrix) -{ - int width = pattern->width, - height = pattern->height; - - if (matrix->width != width || matrix->height != height) - return FALSE; - - int px, py, qx, qy, dx, dy; - float tsqsi = 2.f * 1.5f * 1.5f; - -#ifdef USE_OPENMP -#pragma omp parallel for default (none) \ - private (py, px, qy, qx, dx, dy) \ - shared (height, width, pattern, matrix, tsqsi) -#endif - for (py = 0; py < height; ++py) - { - for (px = 0; px < width; ++px) - { - bool_t pixel = *pattern_get (pattern, px, py); - float dist = 0.f; - - for (qx = 0; qx < width; ++qx) - { - dx = imin (abs (qx - px), width - abs (qx - px)); - dx = dx * dx; - - for (qy = 0; qy < height; ++qy) - { - dy = imin (abs (qy - py), height - abs (qy - py)); - dy = dy * dy; - - dist += (pixel == *pattern_get (pattern, qx, qy)) - * expf (- (dx + dy) / tsqsi); - } - } - - *matrix_get (matrix, px, py) = dist; - } - } - - return TRUE; -} - -bool_t -swap_pixel (pattern_t *pattern, matrix_t *matrix, int x, int y) -{ - int width = pattern->width, - height = pattern->height; - - bool_t new; - - float f, - dist = 0.f, - tsqsi = 2.f * 1.5f * 1.5f; - - int px, py, dx, dy; - bool_t b; - - new = !*pattern_get (pattern, x, y); - *pattern_get (pattern, x, y) = new; - - if (matrix->width != width || matrix->height != height) - return FALSE; - - -#ifdef USE_OPENMP -#pragma omp parallel for reduction (+:dist) default (none) \ - private (px, py, dx, dy, b, f) \ - shared (x, y, width, height, pattern, matrix, new, tsqsi) -#endif - for (py = 0; py < height; ++py) - { - dy = imin (abs (py - y), height - abs (py - y)); - dy = dy * dy; - - for (px = 0; px < width; ++px) - { - dx = imin (abs (px - x), width - abs (px - x)); - dx = dx * dx; - - b = (*pattern_get (pattern, px, py) == new); - f = expf (- (dx + dy) / tsqsi); - *matrix_get (matrix, px, py) += (2 * b - 1) * f; - - dist += b * f; - } - } - - *matrix_get (matrix, x, y) = dist; - return TRUE; -} - -void -largest_cluster (pattern_t *pattern, matrix_t *matrix, - bool_t pixel, int *xmax, int *ymax) -{ - int width = pattern->width, - height = pattern->height; - - int x, y; - - float vmax = -INFINITY; - -#ifdef USE_OPENMP -#pragma omp parallel default (none) \ - private (x, y) \ - shared (height, width, pattern, matrix, pixel, xmax, ymax, vmax) -#endif - { - int xbest = -1, - ybest = -1; - -#ifdef USE_OPENMP - float vbest = -INFINITY; - -#pragma omp for reduction (max: vmax) collapse (2) -#endif - for (y = 0; y < height; ++y) - { - for (x = 0; x < width; ++x) - { - if (*pattern_get (pattern, x, y) != pixel) - continue; - - if (*matrix_get (matrix, x, y) > vmax) - { - vmax = *matrix_get (matrix, x, y); -#ifdef USE_OPENMP - vbest = vmax; -#endif - xbest = x; - ybest = y; - } - } - } - -#ifdef USE_OPENMP -#pragma omp barrier -#pragma omp critical - { - if (vmax == vbest) - { - *xmax = xbest; - *ymax = ybest; - } - } -#else - *xmax = xbest; - *ymax = ybest; -#endif - } - - assert (vmax > -INFINITY); -} - -void -generate_initial_binary_pattern (pattern_t *pattern, matrix_t *matrix) -{ - int xcluster = 0, - ycluster = 0, - xvoid = 0, - yvoid = 0; - - for (;;) - { - largest_cluster (pattern, matrix, TRUE, &xcluster, &ycluster); - assert (*pattern_get (pattern, xcluster, ycluster) == TRUE); - swap_pixel (pattern, matrix, xcluster, ycluster); - - largest_cluster (pattern, matrix, FALSE, &xvoid, &yvoid); - assert (*pattern_get (pattern, xvoid, yvoid) == FALSE); - swap_pixel (pattern, matrix, xvoid, yvoid); - - if (xcluster == xvoid && ycluster == yvoid) - return; - } -} - -bool_t -generate_dither_array (array_t *array, - pattern_t const *prototype, matrix_t const *matrix, - pattern_t *temp_pattern, matrix_t *temp_matrix) -{ - int width = prototype->width, - height = prototype->height; - - int x, y, rank; - - int initial_rank = 0; - - if (array->width != width || array->height != height) - return FALSE; - - // Make copies of the prototype and associated sizes matrix since we will - // trash them - if (!pattern_copy (temp_pattern, prototype)) - return FALSE; - - if (!matrix_copy (temp_matrix, matrix)) - return FALSE; - - // Compute initial rank - for (y = 0; y < height; ++y) - { - for (x = 0; x < width; ++x) - { - if (*pattern_get (temp_pattern, x, y)) - initial_rank += 1; - - *array_get (array, x, y) = 0; - } - } - - // Phase 1 - for (rank = initial_rank; rank > 0; --rank) - { - largest_cluster (temp_pattern, temp_matrix, TRUE, &x, &y); - swap_pixel (temp_pattern, temp_matrix, x, y); - *array_get (array, x, y) = rank - 1; - } - - // Make copies again for phases 2 & 3 - if (!pattern_copy (temp_pattern, prototype)) - return FALSE; - - if (!matrix_copy (temp_matrix, matrix)) - return FALSE; - - // Phase 2 & 3 - for (rank = initial_rank; rank < width * height; ++rank) - { - largest_cluster (temp_pattern, temp_matrix, FALSE, &x, &y); - swap_pixel (temp_pattern, temp_matrix, x, y); - *array_get (array, x, y) = rank; - } - - return TRUE; -} - -bool_t -generate (int size, xorwow_state_t *s, - char const *c_filename, char const *ppm_filename) -{ - bool_t ok = TRUE; - - pattern_t prototype, temp_pattern; - array_t array; - matrix_t matrix, temp_matrix; - - printf ("Generating %dx%d blue noise...\n", size, size); - - if (!pattern_init (&prototype, size, size)) - return FALSE; - - if (!pattern_init (&temp_pattern, size, size)) - { - pattern_destroy (&prototype); - return FALSE; - } - - if (!matrix_init (&matrix, size, size)) - { - pattern_destroy (&temp_pattern); - pattern_destroy (&prototype); - return FALSE; - } - - if (!matrix_init (&temp_matrix, size, size)) - { - matrix_destroy (&matrix); - pattern_destroy (&temp_pattern); - pattern_destroy (&prototype); - return FALSE; - } - - if (!array_init (&array, size, size)) - { - matrix_destroy (&temp_matrix); - matrix_destroy (&matrix); - pattern_destroy (&temp_pattern); - pattern_destroy (&prototype); - return FALSE; - } - - printf("Filling initial binary pattern with white noise...\n"); - pattern_fill_white_noise (&prototype, .1, s); - - printf("Initializing cluster sizes...\n"); - if (!compute_cluster_sizes (&prototype, &matrix)) - { - fprintf (stderr, "Error while computing cluster sizes\n"); - ok = FALSE; - goto out; - } - - printf("Generating initial binary pattern...\n"); - generate_initial_binary_pattern (&prototype, &matrix); - - printf("Generating dither array...\n"); - if (!generate_dither_array (&array, &prototype, &matrix, - &temp_pattern, &temp_matrix)) - { - fprintf (stderr, "Error while generating dither array\n"); - ok = FALSE; - goto out; - } - - printf("Saving dither array...\n"); - if (!array_save (&array, c_filename)) - { - fprintf (stderr, "Error saving dither array\n"); - ok = FALSE; - goto out; - } - -#if SAVE_PPM - if (!array_save_ppm (&array, ppm_filename)) - { - fprintf (stderr, "Error saving dither array PPM\n"); - ok = FALSE; - goto out; - } -#else - (void)ppm_filename; -#endif - - printf("All done!\n"); - -out: - array_destroy (&array); - matrix_destroy (&temp_matrix); - matrix_destroy (&matrix); - pattern_destroy (&temp_pattern); - pattern_destroy (&prototype); - return ok; -} - -int -main (void) -{ - xorwow_state_t s = {1185956906, 12385940, 983948, 349208051, 901842}; - - if (!generate (64, &s, "blue-noise-64x64.h", "blue-noise-64x64.ppm")) - return -1; - - return 0; -} diff --git a/vendor/pixman/pixman/loongson-mmintrin.h b/vendor/pixman/pixman/loongson-mmintrin.h deleted file mode 100644 index 0e79e8648..000000000 --- a/vendor/pixman/pixman/loongson-mmintrin.h +++ /dev/null @@ -1,412 +0,0 @@ -/* The gcc-provided loongson intrinsic functions are way too fucking broken - * to be of any use, otherwise I'd use them. - * - * - The hardware instructions are very similar to MMX or iwMMXt. Certainly - * close enough that they could have implemented the _mm_*-style intrinsic - * interface and had a ton of optimized code available to them. Instead they - * implemented something much, much worse. - * - * - pshuf takes a dead first argument, causing extra instructions to be - * generated. - * - * - There are no 64-bit shift or logical intrinsics, which means you have - * to implement them with inline assembly, but this is a nightmare because - * gcc doesn't understand that the integer vector datatypes are actually in - * floating-point registers, so you end up with braindead code like - * - * punpcklwd $f9,$f9,$f5 - * dmtc1 v0,$f8 - * punpcklwd $f19,$f19,$f5 - * dmfc1 t9,$f9 - * dmtc1 v0,$f9 - * dmtc1 t9,$f20 - * dmfc1 s0,$f19 - * punpcklbh $f20,$f20,$f2 - * - * where crap just gets copied back and forth between integer and floating- - * point registers ad nauseum. - * - * Instead of trying to workaround the problems from these crap intrinsics, I - * just implement the _mm_* intrinsics needed for pixman-mmx.c using inline - * assembly. - */ - -#include - -/* vectors are stored in 64-bit floating-point registers */ -typedef double __m64; -/* having a 32-bit datatype allows us to use 32-bit loads in places like load8888 */ -typedef float __m32; - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_setzero_si64 (void) -{ - return 0.0; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_add_pi16 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("paddh %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_add_pi32 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("paddw %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_adds_pu16 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("paddush %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_adds_pu8 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("paddusb %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_and_si64 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("and %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("pcmpeqw %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_empty (void) -{ - -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_madd_pi16 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("pmaddhw %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mulhi_pu16 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("pmulhuh %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mullo_pi16 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("pmullh %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_or_si64 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("or %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_packs_pu16 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("packushb %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_packs_pi32 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("packsswh %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ - (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_set_pi16 (uint16_t __w3, uint16_t __w2, uint16_t __w1, uint16_t __w0) -{ - if (__builtin_constant_p (__w3) && - __builtin_constant_p (__w2) && - __builtin_constant_p (__w1) && - __builtin_constant_p (__w0)) - { - uint64_t val = ((uint64_t)__w3 << 48) - | ((uint64_t)__w2 << 32) - | ((uint64_t)__w1 << 16) - | ((uint64_t)__w0 << 0); - return *(__m64 *)&val; - } - else if (__w3 == __w2 && __w2 == __w1 && __w1 == __w0) - { - /* TODO: handle other cases */ - uint64_t val = __w3; - uint64_t imm = _MM_SHUFFLE (0, 0, 0, 0); - __m64 ret; - asm("pshufh %0, %1, %2\n\t" - : "=f" (ret) - : "f" (*(__m64 *)&val), "f" (*(__m64 *)&imm) - ); - return ret; - } else { - uint64_t val = ((uint64_t)__w3 << 48) - | ((uint64_t)__w2 << 32) - | ((uint64_t)__w1 << 16) - | ((uint64_t)__w0 << 0); - return *(__m64 *)&val; - } -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_set_pi32 (unsigned __i1, unsigned __i0) -{ - if (__builtin_constant_p (__i1) && - __builtin_constant_p (__i0)) - { - uint64_t val = ((uint64_t)__i1 << 32) - | ((uint64_t)__i0 << 0); - return *(__m64 *)&val; - } - else if (__i1 == __i0) - { - uint64_t imm = _MM_SHUFFLE (1, 0, 1, 0); - __m64 ret; - asm("pshufh %0, %1, %2\n\t" - : "=f" (ret) - : "f" (*(__m32 *)&__i1), "f" (*(__m64 *)&imm) - ); - return ret; - } else { - uint64_t val = ((uint64_t)__i1 << 32) - | ((uint64_t)__i0 << 0); - return *(__m64 *)&val; - } -} -#undef _MM_SHUFFLE - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_shuffle_pi16 (__m64 __m, int64_t __n) -{ - __m64 ret; - asm("pshufh %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m), "f" (*(__m64 *)&__n) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_slli_pi16 (__m64 __m, int64_t __count) -{ - __m64 ret; - asm("psllh %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m), "f" (*(__m64 *)&__count) - ); - return ret; -} -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_slli_si64 (__m64 __m, int64_t __count) -{ - __m64 ret; - asm("dsll %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m), "f" (*(__m64 *)&__count) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_srli_pi16 (__m64 __m, int64_t __count) -{ - __m64 ret; - asm("psrlh %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m), "f" (*(__m64 *)&__count) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_srli_pi32 (__m64 __m, int64_t __count) -{ - __m64 ret; - asm("psrlw %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m), "f" (*(__m64 *)&__count) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_srli_si64 (__m64 __m, int64_t __count) -{ - __m64 ret; - asm("dsrl %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m), "f" (*(__m64 *)&__count) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_sub_pi16 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("psubh %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("punpckhbh %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("punpckhhw %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("punpcklbh %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -/* Since punpcklbh doesn't care about the high 32-bits, we use the __m32 datatype which - * allows load8888 to use 32-bit loads */ -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_unpacklo_pi8_f (__m32 __m1, __m64 __m2) -{ - __m64 ret; - asm("punpcklbh %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("punpcklhw %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_xor_si64 (__m64 __m1, __m64 __m2) -{ - __m64 ret; - asm("xor %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -loongson_extract_pi16 (__m64 __m, int64_t __pos) -{ - __m64 ret; - asm("pextrh %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m), "f" (*(__m64 *)&__pos) - ); - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -loongson_insert_pi16 (__m64 __m1, __m64 __m2, int64_t __pos) -{ - __m64 ret; - asm("pinsrh_%3 %0, %1, %2\n\t" - : "=f" (ret) - : "f" (__m1), "f" (__m2), "i" (__pos) - ); - return ret; -} diff --git a/vendor/pixman/pixman/make-srgb.pl b/vendor/pixman/pixman/make-srgb.pl deleted file mode 100644 index 8bba160cc..000000000 --- a/vendor/pixman/pixman/make-srgb.pl +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/perl -w - -use strict; - -sub linear_to_srgb -{ - my ($c) = @_; - - if ($c < 0.0031308) - { - return $c * 12.92; - } - else - { - return 1.055 * $c ** (1.0/2.4) - 0.055; - } -} - -sub srgb_to_linear -{ - my ($c) = @_; - - if ($c < 0.04045) - { - return $c / 12.92; - } - else - { - return (($c + 0.055) / 1.055) ** 2.4 - } -} - -my @linear_to_srgb; -for my $linear (0 .. 4095) -{ - my $srgb = int(linear_to_srgb($linear / 4095.0) * 255.0 + 0.5); - push @linear_to_srgb, $srgb; -} - -my @srgb_to_linear; -for my $srgb (0 .. 255) -{ - my $linear = int(srgb_to_linear($srgb / 255.0) * 65535.0 + 0.5); - push @srgb_to_linear, $linear; -} - -# Ensure that we have a lossless sRGB and back conversion loop. -# some of the darkest shades need a little bias -- maximum is just -# 5 increments out of 16. This gives us useful property with -# least amount of error in the sRGB-to-linear table, and keeps the actual -# table lookup in the other direction as simple as possible. -for my $srgb (0 .. $#srgb_to_linear) -{ - my $add = 0; - while (1) - { - my $linear = $srgb_to_linear[$srgb]; - my $srgb_lossy = $linear_to_srgb[$linear >> 4]; - last if $srgb == $srgb_lossy; - - # Add slight bias to this component until it rounds correctly - $srgb_to_linear[$srgb] ++; - $add ++; - } - die "Too many adds at $srgb" if $add > 5; -} - -print <<"PROLOG"; -/* WARNING: This file is generated by $0. - * Please edit that file instead of this one. - */ - -#include - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "pixman-private.h" - -PROLOG - -print "const uint8_t linear_to_srgb[" . @linear_to_srgb . "] =\n"; -print "{\n"; -for my $linear (0 .. $#linear_to_srgb) -{ - if (($linear % 10) == 0) - { - print "\t"; - } - print sprintf("%d, ", $linear_to_srgb[$linear]); - if (($linear % 10) == 9) - { - print "\n"; - } -} -print "\n};\n"; -print "\n"; - -print "const uint16_t srgb_to_linear[" . @srgb_to_linear . "] =\n"; -print "{\n"; -for my $srgb (0 .. $#srgb_to_linear) -{ - if (($srgb % 10) == 0) - { - print "\t"; - } - print sprintf("%d, ", $srgb_to_linear[$srgb]); - if (($srgb % 10) == 9) - { - print "\n"; - } -} -print "\n};\n"; - diff --git a/vendor/pixman/pixman/meson.build b/vendor/pixman/pixman/meson.build deleted file mode 100644 index 62ec66bec..000000000 --- a/vendor/pixman/pixman/meson.build +++ /dev/null @@ -1,143 +0,0 @@ -# Copyright © 2018 Intel Corporation - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -config_h = configure_file( - configuration : config, - output : 'pixman-config.h' -) - -version_h = configure_file( - configuration : version_conf, - input : 'pixman-version.h.in', - output : 'pixman-version.h', - install_dir : join_paths(get_option('prefix'), get_option('includedir'), 'pixman-1') -) - -libpixman_extra_cargs = [] -default_library = get_option('default_library') -if default_library != 'static' and cc.has_function_attribute('dllexport') - libpixman_extra_cargs = ['-DPIXMAN_API=__declspec(dllexport)'] -endif - -pixman_simd_libs = [] -simds = [ - # the mmx library can be compiled with mmx on x86/x86_64, iwmmxt on - # some arm cores, or loongson mmi on loongson mips systems. The - # libraries will all have the same name, "pixman-mmx", but there is - # no chance of more than one version being built in the same build - # because no system could have mmx, iwmmxt, and mmi, and it - # simplifies the build logic to give them the same name. - ['mmx', have_mmx, mmx_flags, []], - ['mmx', have_loongson_mmi, loongson_mmi_flags, []], - ['mmx', have_iwmmxt, iwmmxt_flags, []], - - ['sse2', have_sse2, sse2_flags, []], - ['ssse3', have_ssse3, ssse3_flags, []], - ['vmx', have_vmx, vmx_flags, []], - ['arm-simd', have_armv6_simd, [], - ['pixman-arm-simd-asm.S', 'pixman-arm-simd-asm-scaled.S']], - ['arm-neon', have_neon, [], - ['pixman-arm-neon-asm.S', 'pixman-arm-neon-asm-bilinear.S']], - ['arm-neon', have_a64neon, [], - ['pixman-arma64-neon-asm.S', 'pixman-arma64-neon-asm-bilinear.S']], - ['mips-dspr2', have_mips_dspr2, mips_dspr2_flags, - ['pixman-mips-dspr2-asm.S', 'pixman-mips-memcpy-asm.S']], -] - -foreach simd : simds - if simd[1] - name = 'pixman-' + simd[0] - pixman_simd_libs += static_library( - name, - [name + '.c', config_h, version_h, simd[3]], - c_args : simd[2] - ) - endif -endforeach - -pixman_files = files( - 'pixman.c', - 'pixman-access.c', - 'pixman-access-accessors.c', - 'pixman-bits-image.c', - 'pixman-combine32.c', - 'pixman-combine-float.c', - 'pixman-conical-gradient.c', - 'pixman-filter.c', - 'pixman-x86.c', - 'pixman-mips.c', - 'pixman-arm.c', - 'pixman-ppc.c', - 'pixman-edge.c', - 'pixman-edge-accessors.c', - 'pixman-fast-path.c', - 'pixman-glyph.c', - 'pixman-general.c', - 'pixman-gradient-walker.c', - 'pixman-image.c', - 'pixman-implementation.c', - 'pixman-linear-gradient.c', - 'pixman-matrix.c', - 'pixman-noop.c', - 'pixman-radial-gradient.c', - 'pixman-region16.c', - 'pixman-region32.c', - 'pixman-solid-fill.c', - 'pixman-timer.c', - 'pixman-trap.c', - 'pixman-utils.c', -) - -# Android cpu-features -cpu_features_path = get_option('cpu-features-path') -cpu_features_sources = [] -cpu_features_inc = [] -if cpu_features_path != '' - message('Using cpu-features.[ch] from ' + cpu_features_path) - cpu_features_sources = files( - cpu_features_path / 'cpu-features.h', - cpu_features_path / 'cpu-features.c', - ) - cpu_features_inc = include_directories(cpu_features_path) -endif - -libpixman = library( - 'pixman-1', - [pixman_files, config_h, version_h, cpu_features_sources], - link_with: pixman_simd_libs, - c_args : libpixman_extra_cargs, - dependencies : [dep_m, dep_threads], - include_directories : cpu_features_inc, - version : meson.project_version(), - install : true, -) - -inc_pixman = include_directories('.') - -idep_pixman = declare_dependency( - link_with: libpixman, - include_directories : inc_pixman, -) - -if meson.version().version_compare('>= 0.54.0') - meson.override_dependency('pixman-1', idep_pixman) -endif - -install_headers('pixman.h', subdir : 'pixman-1') diff --git a/vendor/pixman/pixman/pixman-access-accessors.c b/vendor/pixman/pixman/pixman-access-accessors.c deleted file mode 100644 index 3263582f1..000000000 --- a/vendor/pixman/pixman/pixman-access-accessors.c +++ /dev/null @@ -1,3 +0,0 @@ -#define PIXMAN_FB_ACCESSORS - -#include "pixman-access.c" diff --git a/vendor/pixman/pixman/pixman-access.c b/vendor/pixman/pixman/pixman-access.c deleted file mode 100644 index 892e70b73..000000000 --- a/vendor/pixman/pixman/pixman-access.c +++ /dev/null @@ -1,1715 +0,0 @@ -/* - * - * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. - * 2005 Lars Knoll & Zack Rusin, Trolltech - * 2008 Aaron Plattner, NVIDIA Corporation - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include -#include -#include - -#include "pixman-accessor.h" -#include "pixman-private.h" - -#define CONVERT_RGB24_TO_Y15(s) \ - (((((s) >> 16) & 0xff) * 153 + \ - (((s) >> 8) & 0xff) * 301 + \ - (((s) ) & 0xff) * 58) >> 2) - -#define CONVERT_RGB24_TO_RGB15(s) \ - ((((s) >> 3) & 0x001f) | \ - (((s) >> 6) & 0x03e0) | \ - (((s) >> 9) & 0x7c00)) - -/* Fetch macros */ - -#ifdef WORDS_BIGENDIAN -#define FETCH_1(img,l,o) \ - (((READ ((img), ((uint32_t *)(l)) + ((o) >> 5))) >> (0x1f - ((o) & 0x1f))) & 0x1) -#else -#define FETCH_1(img,l,o) \ - ((((READ ((img), ((uint32_t *)(l)) + ((o) >> 5))) >> ((o) & 0x1f))) & 0x1) -#endif - -#define FETCH_8(img,l,o) (READ (img, (((uint8_t *)(l)) + ((o) >> 3)))) - -#ifdef WORDS_BIGENDIAN -#define FETCH_4(img,l,o) \ - (((4 * (o)) & 4) ? (FETCH_8 (img,l, 4 * (o)) & 0xf) : (FETCH_8 (img,l,(4 * (o))) >> 4)) -#else -#define FETCH_4(img,l,o) \ - (((4 * (o)) & 4) ? (FETCH_8 (img, l, 4 * (o)) >> 4) : (FETCH_8 (img, l, (4 * (o))) & 0xf)) -#endif - -#ifdef WORDS_BIGENDIAN -#define FETCH_24(img,l,o) \ - ((uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 16) | \ - (uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \ - (uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 0)) -#else -#define FETCH_24(img,l,o) \ - ((uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 0) | \ - (uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \ - (uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 16)) -#endif - -/* Store macros */ - -#ifdef WORDS_BIGENDIAN -#define STORE_1(img,l,o,v) \ - do \ - { \ - uint32_t *__d = ((uint32_t *)(l)) + ((o) >> 5); \ - uint32_t __m, __v; \ - \ - __m = 1U << (0x1f - ((o) & 0x1f)); \ - __v = (v)? __m : 0; \ - \ - WRITE((img), __d, (READ((img), __d) & ~__m) | __v); \ - } \ - while (0) -#else -#define STORE_1(img,l,o,v) \ - do \ - { \ - uint32_t *__d = ((uint32_t *)(l)) + ((o) >> 5); \ - uint32_t __m, __v; \ - \ - __m = 1U << ((o) & 0x1f); \ - __v = (v)? __m : 0; \ - \ - WRITE((img), __d, (READ((img), __d) & ~__m) | __v); \ - } \ - while (0) -#endif - -#define STORE_8(img,l,o,v) (WRITE (img, (uint8_t *)(l) + ((o) >> 3), (v))) - -#ifdef WORDS_BIGENDIAN -#define STORE_4(img,l,o,v) \ - do \ - { \ - int bo = 4 * (o); \ - int v4 = (v) & 0x0f; \ - \ - STORE_8 (img, l, bo, ( \ - bo & 4 ? \ - (FETCH_8 (img, l, bo) & 0xf0) | (v4) : \ - (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4))); \ - } while (0) -#else -#define STORE_4(img,l,o,v) \ - do \ - { \ - int bo = 4 * (o); \ - int v4 = (v) & 0x0f; \ - \ - STORE_8 (img, l, bo, ( \ - bo & 4 ? \ - (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4) : \ - (FETCH_8 (img, l, bo) & 0xf0) | (v4))); \ - } while (0) -#endif - -#ifdef WORDS_BIGENDIAN -#define STORE_24(img,l,o,v) \ - do \ - { \ - uint8_t *__tmp = (l) + 3 * (o); \ - \ - WRITE ((img), __tmp++, ((v) & 0x00ff0000) >> 16); \ - WRITE ((img), __tmp++, ((v) & 0x0000ff00) >> 8); \ - WRITE ((img), __tmp++, ((v) & 0x000000ff) >> 0); \ - } \ - while (0) -#else -#define STORE_24(img,l,o,v) \ - do \ - { \ - uint8_t *__tmp = (l) + 3 * (o); \ - \ - WRITE ((img), __tmp++, ((v) & 0x000000ff) >> 0); \ - WRITE ((img), __tmp++, ((v) & 0x0000ff00) >> 8); \ - WRITE ((img), __tmp++, ((v) & 0x00ff0000) >> 16); \ - } \ - while (0) -#endif - -/* - * YV12 setup and access macros - */ - -#define YV12_SETUP(image) \ - bits_image_t *__bits_image = (bits_image_t *)image; \ - uint32_t *bits = __bits_image->bits; \ - int stride = __bits_image->rowstride; \ - int offset0 = stride < 0 ? \ - ((-stride) >> 1) * ((__bits_image->height - 1) >> 1) - stride : \ - stride * __bits_image->height; \ - int offset1 = stride < 0 ? \ - offset0 + ((-stride) >> 1) * ((__bits_image->height) >> 1) : \ - offset0 + (offset0 >> 2) - -/* Note no trailing semicolon on the above macro; if it's there, then - * the typical usage of YV12_SETUP(image); will have an extra trailing ; - * that some compilers will interpret as a statement -- and then any further - * variable declarations will cause an error. - */ - -#define YV12_Y(line) \ - ((uint8_t *) ((bits) + (stride) * (line))) - -#define YV12_U(line) \ - ((uint8_t *) ((bits) + offset1 + \ - ((stride) >> 1) * ((line) >> 1))) - -#define YV12_V(line) \ - ((uint8_t *) ((bits) + offset0 + \ - ((stride) >> 1) * ((line) >> 1))) - -/* Misc. helpers */ - -static force_inline void -get_shifts (pixman_format_code_t format, - int *a, - int *r, - int *g, - int *b) -{ - switch (PIXMAN_FORMAT_TYPE (format)) - { - case PIXMAN_TYPE_A: - *b = 0; - *g = 0; - *r = 0; - *a = 0; - break; - - case PIXMAN_TYPE_ARGB: - case PIXMAN_TYPE_ARGB_SRGB: - *b = 0; - *g = *b + PIXMAN_FORMAT_B (format); - *r = *g + PIXMAN_FORMAT_G (format); - *a = *r + PIXMAN_FORMAT_R (format); - break; - - case PIXMAN_TYPE_ABGR: - *r = 0; - *g = *r + PIXMAN_FORMAT_R (format); - *b = *g + PIXMAN_FORMAT_G (format); - *a = *b + PIXMAN_FORMAT_B (format); - break; - - case PIXMAN_TYPE_BGRA: - /* With BGRA formats we start counting at the high end of the pixel */ - *b = PIXMAN_FORMAT_BPP (format) - PIXMAN_FORMAT_B (format); - *g = *b - PIXMAN_FORMAT_B (format); - *r = *g - PIXMAN_FORMAT_G (format); - *a = *r - PIXMAN_FORMAT_R (format); - break; - - case PIXMAN_TYPE_RGBA: - /* With BGRA formats we start counting at the high end of the pixel */ - *r = PIXMAN_FORMAT_BPP (format) - PIXMAN_FORMAT_R (format); - *g = *r - PIXMAN_FORMAT_R (format); - *b = *g - PIXMAN_FORMAT_G (format); - *a = *b - PIXMAN_FORMAT_B (format); - break; - - default: - assert (0); - break; - } -} - -static force_inline uint32_t -convert_channel (uint32_t pixel, uint32_t def_value, - int n_from_bits, int from_shift, - int n_to_bits, int to_shift) -{ - uint32_t v; - - if (n_from_bits && n_to_bits) - v = unorm_to_unorm (pixel >> from_shift, n_from_bits, n_to_bits); - else if (n_to_bits) - v = def_value; - else - v = 0; - - return (v & ((1 << n_to_bits) - 1)) << to_shift; -} - -static force_inline uint32_t -convert_pixel (pixman_format_code_t from, pixman_format_code_t to, uint32_t pixel) -{ - int a_from_shift, r_from_shift, g_from_shift, b_from_shift; - int a_to_shift, r_to_shift, g_to_shift, b_to_shift; - uint32_t a, r, g, b; - - get_shifts (from, &a_from_shift, &r_from_shift, &g_from_shift, &b_from_shift); - get_shifts (to, &a_to_shift, &r_to_shift, &g_to_shift, &b_to_shift); - - a = convert_channel (pixel, ~0, - PIXMAN_FORMAT_A (from), a_from_shift, - PIXMAN_FORMAT_A (to), a_to_shift); - - r = convert_channel (pixel, 0, - PIXMAN_FORMAT_R (from), r_from_shift, - PIXMAN_FORMAT_R (to), r_to_shift); - - g = convert_channel (pixel, 0, - PIXMAN_FORMAT_G (from), g_from_shift, - PIXMAN_FORMAT_G (to), g_to_shift); - - b = convert_channel (pixel, 0, - PIXMAN_FORMAT_B (from), b_from_shift, - PIXMAN_FORMAT_B (to), b_to_shift); - - return a | r | g | b; -} - -static force_inline uint32_t -convert_pixel_to_a8r8g8b8 (bits_image_t *image, - pixman_format_code_t format, - uint32_t pixel) -{ - if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY || - PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR) - { - return image->indexed->rgba[pixel]; - } - else - { - return convert_pixel (format, PIXMAN_a8r8g8b8, pixel); - } -} - -static force_inline uint32_t -convert_pixel_from_a8r8g8b8 (pixman_image_t *image, - pixman_format_code_t format, uint32_t pixel) -{ - if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY) - { - pixel = CONVERT_RGB24_TO_Y15 (pixel); - - return image->bits.indexed->ent[pixel & 0x7fff]; - } - else if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR) - { - pixel = convert_pixel (PIXMAN_a8r8g8b8, PIXMAN_x1r5g5b5, pixel); - - return image->bits.indexed->ent[pixel & 0x7fff]; - } - else - { - return convert_pixel (PIXMAN_a8r8g8b8, format, pixel); - } -} - -static force_inline uint32_t -fetch_and_convert_pixel (bits_image_t * image, - const uint8_t * bits, - int offset, - pixman_format_code_t format) -{ - uint32_t pixel; - - switch (PIXMAN_FORMAT_BPP (format)) - { - case 1: - pixel = FETCH_1 (image, bits, offset); - break; - - case 4: - pixel = FETCH_4 (image, bits, offset); - break; - - case 8: - pixel = READ (image, bits + offset); - break; - - case 16: - pixel = READ (image, ((uint16_t *)bits + offset)); - break; - - case 24: - pixel = FETCH_24 (image, bits, offset); - break; - - case 32: - pixel = READ (image, ((uint32_t *)bits + offset)); - break; - - default: - pixel = 0xffff00ff; /* As ugly as possible to detect the bug */ - break; - } - - return convert_pixel_to_a8r8g8b8 (image, format, pixel); -} - -static force_inline void -convert_and_store_pixel (bits_image_t * image, - uint8_t * dest, - int offset, - pixman_format_code_t format, - uint32_t pixel) -{ - uint32_t converted = convert_pixel_from_a8r8g8b8 ( - (pixman_image_t *)image, format, pixel); - - switch (PIXMAN_FORMAT_BPP (format)) - { - case 1: - STORE_1 (image, dest, offset, converted & 0x01); - break; - - case 4: - STORE_4 (image, dest, offset, converted & 0xf); - break; - - case 8: - WRITE (image, (dest + offset), converted & 0xff); - break; - - case 16: - WRITE (image, ((uint16_t *)dest + offset), converted & 0xffff); - break; - - case 24: - STORE_24 (image, dest, offset, converted); - break; - - case 32: - WRITE (image, ((uint32_t *)dest + offset), converted); - break; - - default: - *dest = 0x0; - break; - } -} - -#define MAKE_ACCESSORS(format) \ - static void \ - fetch_scanline_ ## format (bits_image_t *image, \ - int x, \ - int y, \ - int width, \ - uint32_t * buffer, \ - const uint32_t *mask) \ - { \ - uint8_t *bits = \ - (uint8_t *)(image->bits + y * image->rowstride); \ - int i; \ - \ - for (i = 0; i < width; ++i) \ - { \ - *buffer++ = \ - fetch_and_convert_pixel (image, bits, x + i, PIXMAN_ ## format); \ - } \ - } \ - \ - static void \ - store_scanline_ ## format (bits_image_t * image, \ - int x, \ - int y, \ - int width, \ - const uint32_t *values) \ - { \ - uint8_t *dest = \ - (uint8_t *)(image->bits + y * image->rowstride); \ - int i; \ - \ - for (i = 0; i < width; ++i) \ - { \ - convert_and_store_pixel ( \ - image, dest, i + x, PIXMAN_ ## format, values[i]); \ - } \ - } \ - \ - static uint32_t \ - fetch_pixel_ ## format (bits_image_t *image, \ - int offset, \ - int line) \ - { \ - uint8_t *bits = \ - (uint8_t *)(image->bits + line * image->rowstride); \ - \ - return fetch_and_convert_pixel ( \ - image, bits, offset, PIXMAN_ ## format); \ - } \ - \ - static const void *const __dummy__ ## format - -MAKE_ACCESSORS(a8r8g8b8); -MAKE_ACCESSORS(x8r8g8b8); -MAKE_ACCESSORS(a8b8g8r8); -MAKE_ACCESSORS(x8b8g8r8); -MAKE_ACCESSORS(x14r6g6b6); -MAKE_ACCESSORS(b8g8r8a8); -MAKE_ACCESSORS(b8g8r8x8); -MAKE_ACCESSORS(r8g8b8x8); -MAKE_ACCESSORS(r8g8b8a8); -MAKE_ACCESSORS(r8g8b8); -MAKE_ACCESSORS(b8g8r8); -MAKE_ACCESSORS(r5g6b5); -MAKE_ACCESSORS(b5g6r5); -MAKE_ACCESSORS(a1r5g5b5); -MAKE_ACCESSORS(x1r5g5b5); -MAKE_ACCESSORS(a1b5g5r5); -MAKE_ACCESSORS(x1b5g5r5); -MAKE_ACCESSORS(a4r4g4b4); -MAKE_ACCESSORS(x4r4g4b4); -MAKE_ACCESSORS(a4b4g4r4); -MAKE_ACCESSORS(x4b4g4r4); -MAKE_ACCESSORS(a8); -MAKE_ACCESSORS(c8); -MAKE_ACCESSORS(g8); -MAKE_ACCESSORS(r3g3b2); -MAKE_ACCESSORS(b2g3r3); -MAKE_ACCESSORS(a2r2g2b2); -MAKE_ACCESSORS(a2b2g2r2); -MAKE_ACCESSORS(x4a4); -MAKE_ACCESSORS(a4); -MAKE_ACCESSORS(g4); -MAKE_ACCESSORS(c4); -MAKE_ACCESSORS(r1g2b1); -MAKE_ACCESSORS(b1g2r1); -MAKE_ACCESSORS(a1r1g1b1); -MAKE_ACCESSORS(a1b1g1r1); -MAKE_ACCESSORS(a1); -MAKE_ACCESSORS(g1); - -/********************************** Fetch ************************************/ -/* Table mapping sRGB-encoded 8 bit numbers to linearly encoded - * floating point numbers. We assume that single precision - * floating point follows the IEEE 754 format. - */ -static const uint32_t to_linear_u[256] = -{ - 0x00000000, 0x399f22b4, 0x3a1f22b4, 0x3a6eb40e, 0x3a9f22b4, 0x3ac6eb61, - 0x3aeeb40e, 0x3b0b3e5d, 0x3b1f22b4, 0x3b33070b, 0x3b46eb61, 0x3b5b518a, - 0x3b70f18a, 0x3b83e1c5, 0x3b8fe614, 0x3b9c87fb, 0x3ba9c9b5, 0x3bb7ad6d, - 0x3bc63547, 0x3bd5635f, 0x3be539bd, 0x3bf5ba70, 0x3c0373b5, 0x3c0c6152, - 0x3c15a703, 0x3c1f45bc, 0x3c293e68, 0x3c3391f4, 0x3c3e4149, 0x3c494d43, - 0x3c54b6c7, 0x3c607eb1, 0x3c6ca5df, 0x3c792d22, 0x3c830aa8, 0x3c89af9e, - 0x3c9085db, 0x3c978dc5, 0x3c9ec7c0, 0x3ca63432, 0x3cadd37d, 0x3cb5a601, - 0x3cbdac20, 0x3cc5e639, 0x3cce54ab, 0x3cd6f7d2, 0x3cdfd00e, 0x3ce8ddb9, - 0x3cf2212c, 0x3cfb9ac1, 0x3d02a569, 0x3d0798dc, 0x3d0ca7e4, 0x3d11d2ae, - 0x3d171963, 0x3d1c7c2e, 0x3d21fb3a, 0x3d2796af, 0x3d2d4ebb, 0x3d332380, - 0x3d39152b, 0x3d3f23e3, 0x3d454fd0, 0x3d4b991c, 0x3d51ffeb, 0x3d588466, - 0x3d5f26b7, 0x3d65e6fe, 0x3d6cc564, 0x3d73c210, 0x3d7add25, 0x3d810b65, - 0x3d84b793, 0x3d88732e, 0x3d8c3e48, 0x3d9018f4, 0x3d940343, 0x3d97fd48, - 0x3d9c0714, 0x3da020b9, 0x3da44a48, 0x3da883d6, 0x3daccd70, 0x3db12728, - 0x3db59110, 0x3dba0b38, 0x3dbe95b2, 0x3dc3308f, 0x3dc7dbe0, 0x3dcc97b4, - 0x3dd1641c, 0x3dd6412a, 0x3ddb2eec, 0x3de02d75, 0x3de53cd3, 0x3dea5d16, - 0x3def8e52, 0x3df4d091, 0x3dfa23e5, 0x3dff885e, 0x3e027f06, 0x3e05427f, - 0x3e080ea2, 0x3e0ae376, 0x3e0dc104, 0x3e10a752, 0x3e139669, 0x3e168e50, - 0x3e198f0e, 0x3e1c98ab, 0x3e1fab2e, 0x3e22c6a0, 0x3e25eb08, 0x3e29186a, - 0x3e2c4ed0, 0x3e2f8e42, 0x3e32d6c4, 0x3e362861, 0x3e39831e, 0x3e3ce702, - 0x3e405416, 0x3e43ca5e, 0x3e4749e4, 0x3e4ad2ae, 0x3e4e64c2, 0x3e520027, - 0x3e55a4e6, 0x3e595303, 0x3e5d0a8a, 0x3e60cb7c, 0x3e6495e0, 0x3e6869bf, - 0x3e6c4720, 0x3e702e08, 0x3e741e7f, 0x3e78188c, 0x3e7c1c34, 0x3e8014c0, - 0x3e822039, 0x3e84308b, 0x3e8645b8, 0x3e885fc3, 0x3e8a7eb0, 0x3e8ca281, - 0x3e8ecb3a, 0x3e90f8df, 0x3e932b72, 0x3e9562f6, 0x3e979f6f, 0x3e99e0e0, - 0x3e9c274e, 0x3e9e72b8, 0x3ea0c322, 0x3ea31892, 0x3ea57308, 0x3ea7d28a, - 0x3eaa3718, 0x3eaca0b7, 0x3eaf0f69, 0x3eb18332, 0x3eb3fc16, 0x3eb67a15, - 0x3eb8fd34, 0x3ebb8576, 0x3ebe12de, 0x3ec0a56e, 0x3ec33d2a, 0x3ec5da14, - 0x3ec87c30, 0x3ecb2380, 0x3ecdd008, 0x3ed081ca, 0x3ed338c9, 0x3ed5f508, - 0x3ed8b68a, 0x3edb7d52, 0x3ede4962, 0x3ee11abe, 0x3ee3f168, 0x3ee6cd64, - 0x3ee9aeb6, 0x3eec955d, 0x3eef815d, 0x3ef272ba, 0x3ef56976, 0x3ef86594, - 0x3efb6717, 0x3efe6e02, 0x3f00bd2b, 0x3f02460c, 0x3f03d1a5, 0x3f055ff8, - 0x3f06f105, 0x3f0884ce, 0x3f0a1b54, 0x3f0bb499, 0x3f0d509f, 0x3f0eef65, - 0x3f1090ef, 0x3f12353c, 0x3f13dc50, 0x3f15862a, 0x3f1732cc, 0x3f18e237, - 0x3f1a946d, 0x3f1c4970, 0x3f1e013f, 0x3f1fbbde, 0x3f21794c, 0x3f23398c, - 0x3f24fca0, 0x3f26c286, 0x3f288b42, 0x3f2a56d3, 0x3f2c253d, 0x3f2df680, - 0x3f2fca9d, 0x3f31a195, 0x3f337b6a, 0x3f35581e, 0x3f3737b1, 0x3f391a24, - 0x3f3aff7a, 0x3f3ce7b2, 0x3f3ed2d0, 0x3f40c0d2, 0x3f42b1bc, 0x3f44a58e, - 0x3f469c49, 0x3f4895ee, 0x3f4a9280, 0x3f4c91ff, 0x3f4e946c, 0x3f5099c8, - 0x3f52a216, 0x3f54ad55, 0x3f56bb88, 0x3f58ccae, 0x3f5ae0cb, 0x3f5cf7de, - 0x3f5f11ec, 0x3f612ef0, 0x3f634eef, 0x3f6571ea, 0x3f6797e1, 0x3f69c0d6, - 0x3f6beccb, 0x3f6e1bc0, 0x3f704db6, 0x3f7282af, 0x3f74baac, 0x3f76f5ae, - 0x3f7933b6, 0x3f7b74c6, 0x3f7db8de, 0x3f800000 -}; - -static const float * const to_linear = (const float *)to_linear_u; - -static uint8_t -to_srgb (float f) -{ - uint8_t low = 0; - uint8_t high = 255; - - while (high - low > 1) - { - uint8_t mid = (low + high) / 2; - - if (to_linear[mid] > f) - high = mid; - else - low = mid; - } - - if (to_linear[high] - f < f - to_linear[low]) - return high; - else - return low; -} - -static void -fetch_scanline_a8r8g8b8_sRGB_float (bits_image_t * image, - int x, - int y, - int width, - uint32_t * b, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits + y * image->rowstride; - const uint32_t *pixel = bits + x; - const uint32_t *end = pixel + width; - argb_t *buffer = (argb_t *)b; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - argb_t *argb = buffer; - - argb->a = pixman_unorm_to_float ((p >> 24) & 0xff, 8); - - argb->r = to_linear [(p >> 16) & 0xff]; - argb->g = to_linear [(p >> 8) & 0xff]; - argb->b = to_linear [(p >> 0) & 0xff]; - - buffer++; - } -} - -static void -fetch_scanline_r8g8b8_sRGB_float (bits_image_t * image, - int x, - int y, - int width, - uint32_t * b, - const uint32_t *mask) -{ - const uint8_t *bits = (uint8_t *)(image->bits + y * image->rowstride); - argb_t *buffer = (argb_t *)b; - int i; - for (i = x; i < width; ++i) - { - uint32_t p = FETCH_24 (image, bits, i); - argb_t *argb = buffer; - - argb->a = 1.0f; - - argb->r = to_linear[(p >> 16) & 0xff]; - argb->g = to_linear[(p >> 8) & 0xff]; - argb->b = to_linear[(p >> 0) & 0xff]; - - buffer++; - } -} - -/* Expects a float buffer */ -static void -fetch_scanline_a2r10g10b10_float (bits_image_t * image, - int x, - int y, - int width, - uint32_t * b, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits + y * image->rowstride; - const uint32_t *pixel = bits + x; - const uint32_t *end = pixel + width; - argb_t *buffer = (argb_t *)b; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint64_t a = p >> 30; - uint64_t r = (p >> 20) & 0x3ff; - uint64_t g = (p >> 10) & 0x3ff; - uint64_t b = p & 0x3ff; - - buffer->a = pixman_unorm_to_float (a, 2); - buffer->r = pixman_unorm_to_float (r, 10); - buffer->g = pixman_unorm_to_float (g, 10); - buffer->b = pixman_unorm_to_float (b, 10); - - buffer++; - } -} - -/* Expects a float buffer */ -#ifndef PIXMAN_FB_ACCESSORS -static void -fetch_scanline_rgbf_float (bits_image_t *image, - int x, - int y, - int width, - uint32_t * b, - const uint32_t *mask) -{ - const float *bits = (float *)image->bits + y * image->rowstride; - const float *pixel = bits + x * 3; - argb_t *buffer = (argb_t *)b; - - for (; width--; buffer++) { - buffer->r = *pixel++; - buffer->g = *pixel++; - buffer->b = *pixel++; - buffer->a = 1.f; - } -} - -static void -fetch_scanline_rgbaf_float (bits_image_t *image, - int x, - int y, - int width, - uint32_t * b, - const uint32_t *mask) -{ - const float *bits = (float *)image->bits + y * image->rowstride; - const float *pixel = bits + x * 4; - argb_t *buffer = (argb_t *)b; - - for (; width--; buffer++) { - buffer->r = *pixel++; - buffer->g = *pixel++; - buffer->b = *pixel++; - buffer->a = *pixel++; - } -} -#endif - -static void -fetch_scanline_x2r10g10b10_float (bits_image_t *image, - int x, - int y, - int width, - uint32_t * b, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits + y * image->rowstride; - const uint32_t *pixel = (uint32_t *)bits + x; - const uint32_t *end = pixel + width; - argb_t *buffer = (argb_t *)b; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint64_t r = (p >> 20) & 0x3ff; - uint64_t g = (p >> 10) & 0x3ff; - uint64_t b = p & 0x3ff; - - buffer->a = 1.0; - buffer->r = pixman_unorm_to_float (r, 10); - buffer->g = pixman_unorm_to_float (g, 10); - buffer->b = pixman_unorm_to_float (b, 10); - - buffer++; - } -} - -/* Expects a float buffer */ -static void -fetch_scanline_a2b10g10r10_float (bits_image_t *image, - int x, - int y, - int width, - uint32_t * b, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits + y * image->rowstride; - const uint32_t *pixel = bits + x; - const uint32_t *end = pixel + width; - argb_t *buffer = (argb_t *)b; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint64_t a = p >> 30; - uint64_t b = (p >> 20) & 0x3ff; - uint64_t g = (p >> 10) & 0x3ff; - uint64_t r = p & 0x3ff; - - buffer->a = pixman_unorm_to_float (a, 2); - buffer->r = pixman_unorm_to_float (r, 10); - buffer->g = pixman_unorm_to_float (g, 10); - buffer->b = pixman_unorm_to_float (b, 10); - - buffer++; - } -} - -/* Expects a float buffer */ -static void -fetch_scanline_x2b10g10r10_float (bits_image_t *image, - int x, - int y, - int width, - uint32_t * b, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits + y * image->rowstride; - const uint32_t *pixel = (uint32_t *)bits + x; - const uint32_t *end = pixel + width; - argb_t *buffer = (argb_t *)b; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint64_t b = (p >> 20) & 0x3ff; - uint64_t g = (p >> 10) & 0x3ff; - uint64_t r = p & 0x3ff; - - buffer->a = 1.0; - buffer->r = pixman_unorm_to_float (r, 10); - buffer->g = pixman_unorm_to_float (g, 10); - buffer->b = pixman_unorm_to_float (b, 10); - - buffer++; - } -} - -static void -fetch_scanline_yuy2 (bits_image_t *image, - int x, - int line, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits + image->rowstride * line; - int i; - - for (i = 0; i < width; i++) - { - int16_t y, u, v; - int32_t r, g, b; - - y = ((uint8_t *) bits)[(x + i) << 1] - 16; - u = ((uint8_t *) bits)[(((x + i) << 1) & - 4) + 1] - 128; - v = ((uint8_t *) bits)[(((x + i) << 1) & - 4) + 3] - 128; - - /* R = 1.164(Y - 16) + 1.596(V - 128) */ - r = 0x012b27 * y + 0x019a2e * v; - /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */ - g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u; - /* B = 1.164(Y - 16) + 2.018(U - 128) */ - b = 0x012b27 * y + 0x0206a2 * u; - - *buffer++ = 0xff000000 | - (r >= 0 ? r < 0x1000000 ? r & 0xff0000 : 0xff0000 : 0) | - (g >= 0 ? g < 0x1000000 ? (g >> 8) & 0x00ff00 : 0x00ff00 : 0) | - (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0); - } -} - -static void -fetch_scanline_yv12 (bits_image_t *image, - int x, - int line, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - YV12_SETUP (image); - uint8_t *y_line = YV12_Y (line); - uint8_t *u_line = YV12_U (line); - uint8_t *v_line = YV12_V (line); - int i; - - for (i = 0; i < width; i++) - { - int16_t y, u, v; - int32_t r, g, b; - - y = y_line[x + i] - 16; - u = u_line[(x + i) >> 1] - 128; - v = v_line[(x + i) >> 1] - 128; - - /* R = 1.164(Y - 16) + 1.596(V - 128) */ - r = 0x012b27 * y + 0x019a2e * v; - /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */ - g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u; - /* B = 1.164(Y - 16) + 2.018(U - 128) */ - b = 0x012b27 * y + 0x0206a2 * u; - - *buffer++ = 0xff000000 | - (r >= 0 ? r < 0x1000000 ? r & 0xff0000 : 0xff0000 : 0) | - (g >= 0 ? g < 0x1000000 ? (g >> 8) & 0x00ff00 : 0x00ff00 : 0) | - (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0); - } -} - -/**************************** Pixel wise fetching *****************************/ - -#ifndef PIXMAN_FB_ACCESSORS -static argb_t -fetch_pixel_rgbf_float (bits_image_t *image, - int offset, - int line) -{ - float *bits = (float *)image->bits + line * image->rowstride; - argb_t argb; - - argb.r = bits[offset * 3]; - argb.g = bits[offset * 3 + 1]; - argb.b = bits[offset * 3 + 2]; - argb.a = 1.f; - - return argb; -} - -static argb_t -fetch_pixel_rgbaf_float (bits_image_t *image, - int offset, - int line) -{ - float *bits = (float *)image->bits + line * image->rowstride; - argb_t argb; - - argb.r = bits[offset * 4]; - argb.g = bits[offset * 4 + 1]; - argb.b = bits[offset * 4 + 2]; - argb.a = bits[offset * 4 + 3]; - - return argb; -} -#endif - -static argb_t -fetch_pixel_x2r10g10b10_float (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t p = READ (image, bits + offset); - uint64_t r = (p >> 20) & 0x3ff; - uint64_t g = (p >> 10) & 0x3ff; - uint64_t b = p & 0x3ff; - argb_t argb; - - argb.a = 1.0; - argb.r = pixman_unorm_to_float (r, 10); - argb.g = pixman_unorm_to_float (g, 10); - argb.b = pixman_unorm_to_float (b, 10); - - return argb; -} - -static argb_t -fetch_pixel_a2r10g10b10_float (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t p = READ (image, bits + offset); - uint64_t a = p >> 30; - uint64_t r = (p >> 20) & 0x3ff; - uint64_t g = (p >> 10) & 0x3ff; - uint64_t b = p & 0x3ff; - argb_t argb; - - argb.a = pixman_unorm_to_float (a, 2); - argb.r = pixman_unorm_to_float (r, 10); - argb.g = pixman_unorm_to_float (g, 10); - argb.b = pixman_unorm_to_float (b, 10); - - return argb; -} - -static argb_t -fetch_pixel_a2b10g10r10_float (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t p = READ (image, bits + offset); - uint64_t a = p >> 30; - uint64_t b = (p >> 20) & 0x3ff; - uint64_t g = (p >> 10) & 0x3ff; - uint64_t r = p & 0x3ff; - argb_t argb; - - argb.a = pixman_unorm_to_float (a, 2); - argb.r = pixman_unorm_to_float (r, 10); - argb.g = pixman_unorm_to_float (g, 10); - argb.b = pixman_unorm_to_float (b, 10); - - return argb; -} - -static argb_t -fetch_pixel_x2b10g10r10_float (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t p = READ (image, bits + offset); - uint64_t b = (p >> 20) & 0x3ff; - uint64_t g = (p >> 10) & 0x3ff; - uint64_t r = p & 0x3ff; - argb_t argb; - - argb.a = 1.0; - argb.r = pixman_unorm_to_float (r, 10); - argb.g = pixman_unorm_to_float (g, 10); - argb.b = pixman_unorm_to_float (b, 10); - - return argb; -} - -static argb_t -fetch_pixel_a8r8g8b8_sRGB_float (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t p = READ (image, bits + offset); - argb_t argb; - - argb.a = pixman_unorm_to_float ((p >> 24) & 0xff, 8); - - argb.r = to_linear [(p >> 16) & 0xff]; - argb.g = to_linear [(p >> 8) & 0xff]; - argb.b = to_linear [(p >> 0) & 0xff]; - - return argb; -} - -static argb_t -fetch_pixel_r8g8b8_sRGB_float (bits_image_t *image, - int offset, - int line) -{ - uint8_t *bits = (uint8_t *)(image->bits + line * image->rowstride); - uint32_t p = FETCH_24 (image, bits, offset); - argb_t argb; - - argb.a = 1.0f; - - argb.r = to_linear[(p >> 16) & 0xff]; - argb.g = to_linear[(p >> 8) & 0xff]; - argb.b = to_linear[(p >> 0) & 0xff]; - - return argb; -} - -static uint32_t -fetch_pixel_yuy2 (bits_image_t *image, - int offset, - int line) -{ - const uint32_t *bits = image->bits + image->rowstride * line; - - int16_t y, u, v; - int32_t r, g, b; - - y = ((uint8_t *) bits)[offset << 1] - 16; - u = ((uint8_t *) bits)[((offset << 1) & - 4) + 1] - 128; - v = ((uint8_t *) bits)[((offset << 1) & - 4) + 3] - 128; - - /* R = 1.164(Y - 16) + 1.596(V - 128) */ - r = 0x012b27 * y + 0x019a2e * v; - - /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */ - g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u; - - /* B = 1.164(Y - 16) + 2.018(U - 128) */ - b = 0x012b27 * y + 0x0206a2 * u; - - return 0xff000000 | - (r >= 0 ? r < 0x1000000 ? r & 0xff0000 : 0xff0000 : 0) | - (g >= 0 ? g < 0x1000000 ? (g >> 8) & 0x00ff00 : 0x00ff00 : 0) | - (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0); -} - -static uint32_t -fetch_pixel_yv12 (bits_image_t *image, - int offset, - int line) -{ - YV12_SETUP (image); - int16_t y = YV12_Y (line)[offset] - 16; - int16_t u = YV12_U (line)[offset >> 1] - 128; - int16_t v = YV12_V (line)[offset >> 1] - 128; - int32_t r, g, b; - - /* R = 1.164(Y - 16) + 1.596(V - 128) */ - r = 0x012b27 * y + 0x019a2e * v; - - /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */ - g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u; - - /* B = 1.164(Y - 16) + 2.018(U - 128) */ - b = 0x012b27 * y + 0x0206a2 * u; - - return 0xff000000 | - (r >= 0 ? r < 0x1000000 ? r & 0xff0000 : 0xff0000 : 0) | - (g >= 0 ? g < 0x1000000 ? (g >> 8) & 0x00ff00 : 0x00ff00 : 0) | - (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0); -} - -/*********************************** Store ************************************/ - -#ifndef PIXMAN_FB_ACCESSORS -static void -store_scanline_rgbaf_float (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *v) -{ - float *bits = (float *)image->bits + image->rowstride * y + 4 * x; - const argb_t *values = (argb_t *)v; - - for (; width; width--, values++) - { - *bits++ = values->r; - *bits++ = values->g; - *bits++ = values->b; - *bits++ = values->a; - } -} - -static void -store_scanline_rgbf_float (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *v) -{ - float *bits = (float *)image->bits + image->rowstride * y + 3 * x; - const argb_t *values = (argb_t *)v; - - for (; width; width--, values++) - { - *bits++ = values->r; - *bits++ = values->g; - *bits++ = values->b; - } -} -#endif - -static void -store_scanline_a2r10g10b10_float (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *v) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = bits + x; - argb_t *values = (argb_t *)v; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t a, r, g, b; - - a = pixman_float_to_unorm (values[i].a, 2); - r = pixman_float_to_unorm (values[i].r, 10); - g = pixman_float_to_unorm (values[i].g, 10); - b = pixman_float_to_unorm (values[i].b, 10); - - WRITE (image, pixel++, - (a << 30) | (r << 20) | (g << 10) | b); - } -} - -static void -store_scanline_x2r10g10b10_float (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *v) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = bits + x; - argb_t *values = (argb_t *)v; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t r, g, b; - - r = pixman_float_to_unorm (values[i].r, 10); - g = pixman_float_to_unorm (values[i].g, 10); - b = pixman_float_to_unorm (values[i].b, 10); - - WRITE (image, pixel++, - (r << 20) | (g << 10) | b); - } -} - -static void -store_scanline_a2b10g10r10_float (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *v) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = bits + x; - argb_t *values = (argb_t *)v; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t a, r, g, b; - - a = pixman_float_to_unorm (values[i].a, 2); - r = pixman_float_to_unorm (values[i].r, 10); - g = pixman_float_to_unorm (values[i].g, 10); - b = pixman_float_to_unorm (values[i].b, 10); - - WRITE (image, pixel++, - (a << 30) | (b << 20) | (g << 10) | r); - } -} - -static void -store_scanline_x2b10g10r10_float (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *v) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = bits + x; - argb_t *values = (argb_t *)v; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t r, g, b; - - r = pixman_float_to_unorm (values[i].r, 10); - g = pixman_float_to_unorm (values[i].g, 10); - b = pixman_float_to_unorm (values[i].b, 10); - - WRITE (image, pixel++, - (b << 20) | (g << 10) | r); - } -} - -static void -store_scanline_a8r8g8b8_sRGB_float (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *v) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = bits + x; - argb_t *values = (argb_t *)v; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t a, r, g, b; - - a = pixman_float_to_unorm (values[i].a, 8); - r = to_srgb (values[i].r); - g = to_srgb (values[i].g); - b = to_srgb (values[i].b); - - WRITE (image, pixel++, - (a << 24) | (r << 16) | (g << 8) | b); - } -} - -static void -store_scanline_r8g8b8_sRGB_float (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *v) -{ - uint8_t *bits = (uint8_t *)(image->bits + image->rowstride * y) + 3 * x; - argb_t *values = (argb_t *)v; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t r, g, b, rgb; - - r = to_srgb (values[i].r); - g = to_srgb (values[i].g); - b = to_srgb (values[i].b); - - rgb = (r << 16) | (g << 8) | b; - - STORE_24 (image, bits, i, rgb); - } -} - -/* - * Contracts a floating point image to 32bpp and then stores it using a - * regular 32-bit store proc. Despite the type, this function expects an - * argb_t buffer. - */ -static void -store_scanline_generic_float (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *argb8_pixels; - - assert (image->common.type == BITS); - - argb8_pixels = pixman_malloc_ab (width, sizeof(uint32_t)); - if (!argb8_pixels) - return; - - /* Contract the scanline. We could do this in place if values weren't - * const. - */ - pixman_contract_from_float (argb8_pixels, (argb_t *)values, width); - - image->store_scanline_32 (image, x, y, width, argb8_pixels); - - free (argb8_pixels); -} - -static void -fetch_scanline_generic_float (bits_image_t * image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - image->fetch_scanline_32 (image, x, y, width, buffer, NULL); - - pixman_expand_to_float ((argb_t *)buffer, buffer, image->format, width); -} - -/* The 32_sRGB paths should be deleted after narrow processing - * is no longer invoked for formats that are considered wide. - * (Also see fetch_pixel_generic_lossy_32) */ -static void -fetch_scanline_a8r8g8b8_32_sRGB (bits_image_t *image, - int x, - int y, - int width, - uint32_t *buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits + y * image->rowstride; - const uint32_t *pixel = (uint32_t *)bits + x; - const uint32_t *end = pixel + width; - uint32_t tmp; - - while (pixel < end) - { - uint32_t a, r, g, b; - - tmp = READ (image, pixel++); - - a = (tmp >> 24) & 0xff; - r = (tmp >> 16) & 0xff; - g = (tmp >> 8) & 0xff; - b = (tmp >> 0) & 0xff; - - r = to_linear[r] * 255.0f + 0.5f; - g = to_linear[g] * 255.0f + 0.5f; - b = to_linear[b] * 255.0f + 0.5f; - - *buffer++ = (a << 24) | (r << 16) | (g << 8) | (b << 0); - } -} - -static void -fetch_scanline_r8g8b8_32_sRGB (bits_image_t *image, - int x, - int y, - int width, - uint32_t *buffer, - const uint32_t *mask) -{ - const uint8_t *bits = (uint8_t *)(image->bits + y * image->rowstride) + 3 * x; - uint32_t tmp; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t a, r, g, b; - - tmp = FETCH_24 (image, bits, i); - - a = 0xff; - r = (tmp >> 16) & 0xff; - g = (tmp >> 8) & 0xff; - b = (tmp >> 0) & 0xff; - - r = to_linear[r] * 255.0f + 0.5f; - g = to_linear[g] * 255.0f + 0.5f; - b = to_linear[b] * 255.0f + 0.5f; - - *buffer++ = (a << 24) | (r << 16) | (g << 8) | (b << 0); - } -} - -static uint32_t -fetch_pixel_a8r8g8b8_32_sRGB (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t tmp = READ (image, bits + offset); - uint32_t a, r, g, b; - - a = (tmp >> 24) & 0xff; - r = (tmp >> 16) & 0xff; - g = (tmp >> 8) & 0xff; - b = (tmp >> 0) & 0xff; - - r = to_linear[r] * 255.0f + 0.5f; - g = to_linear[g] * 255.0f + 0.5f; - b = to_linear[b] * 255.0f + 0.5f; - - return (a << 24) | (r << 16) | (g << 8) | (b << 0); -} - -static uint32_t -fetch_pixel_r8g8b8_32_sRGB (bits_image_t *image, - int offset, - int line) -{ - uint8_t *bits = (uint8_t *)(image->bits + line * image->rowstride); - uint32_t tmp = FETCH_24 (image, bits, offset); - uint32_t a, r, g, b; - - a = 0xff; - r = (tmp >> 16) & 0xff; - g = (tmp >> 8) & 0xff; - b = (tmp >> 0) & 0xff; - - r = to_linear[r] * 255.0f + 0.5f; - g = to_linear[g] * 255.0f + 0.5f; - b = to_linear[b] * 255.0f + 0.5f; - - return (a << 24) | (r << 16) | (g << 8) | (b << 0); -} - -static void -store_scanline_a8r8g8b8_32_sRGB (bits_image_t *image, - int x, - int y, - int width, - const uint32_t *v) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint64_t *values = (uint64_t *)v; - uint32_t *pixel = bits + x; - uint64_t tmp; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t a, r, g, b; - - tmp = values[i]; - - a = (tmp >> 24) & 0xff; - r = (tmp >> 16) & 0xff; - g = (tmp >> 8) & 0xff; - b = (tmp >> 0) & 0xff; - - r = to_srgb (r * (1/255.0f)); - g = to_srgb (g * (1/255.0f)); - b = to_srgb (b * (1/255.0f)); - - WRITE (image, pixel++, a | (r << 16) | (g << 8) | (b << 0)); - } -} - -static void -store_scanline_r8g8b8_32_sRGB (bits_image_t *image, - int x, - int y, - int width, - const uint32_t *v) -{ - uint8_t *bits = (uint8_t *)(image->bits + image->rowstride * y) + 3 * x; - uint64_t *values = (uint64_t *)v; - uint64_t tmp; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t r, g, b; - - tmp = values[i]; - - r = (tmp >> 16) & 0xff; - g = (tmp >> 8) & 0xff; - b = (tmp >> 0) & 0xff; - - r = to_srgb (r * (1/255.0f)); - g = to_srgb (g * (1/255.0f)); - b = to_srgb (b * (1/255.0f)); - - STORE_24 (image, bits, i, (r << 16) | (g << 8) | (b << 0)); - } -} - -static argb_t -fetch_pixel_generic_float (bits_image_t *image, - int offset, - int line) -{ - uint32_t pixel32 = image->fetch_pixel_32 (image, offset, line); - argb_t f; - - pixman_expand_to_float (&f, &pixel32, image->format, 1); - - return f; -} - -/* - * XXX: The transformed fetch path only works at 32-bpp so far. When all - * paths have wide versions, this can be removed. - * - * WARNING: This function loses precision! - */ -static uint32_t -fetch_pixel_generic_lossy_32 (bits_image_t *image, - int offset, - int line) -{ - argb_t pixel64 = image->fetch_pixel_float (image, offset, line); - uint32_t result; - - pixman_contract_from_float (&result, &pixel64, 1); - - return result; -} - -typedef struct -{ - pixman_format_code_t format; - fetch_scanline_t fetch_scanline_32; - fetch_scanline_t fetch_scanline_float; - fetch_pixel_32_t fetch_pixel_32; - fetch_pixel_float_t fetch_pixel_float; - store_scanline_t store_scanline_32; - store_scanline_t store_scanline_float; -} format_info_t; - -#define FORMAT_INFO(format) \ - { \ - PIXMAN_ ## format, \ - fetch_scanline_ ## format, \ - fetch_scanline_generic_float, \ - fetch_pixel_ ## format, \ - fetch_pixel_generic_float, \ - store_scanline_ ## format, \ - store_scanline_generic_float \ - } - -static const format_info_t accessors[] = -{ -/* 32 bpp formats */ - FORMAT_INFO (a8r8g8b8), - FORMAT_INFO (x8r8g8b8), - FORMAT_INFO (a8b8g8r8), - FORMAT_INFO (x8b8g8r8), - FORMAT_INFO (b8g8r8a8), - FORMAT_INFO (b8g8r8x8), - FORMAT_INFO (r8g8b8a8), - FORMAT_INFO (r8g8b8x8), - FORMAT_INFO (x14r6g6b6), - -/* sRGB formats */ - { PIXMAN_a8r8g8b8_sRGB, - fetch_scanline_a8r8g8b8_32_sRGB, fetch_scanline_a8r8g8b8_sRGB_float, - fetch_pixel_a8r8g8b8_32_sRGB, fetch_pixel_a8r8g8b8_sRGB_float, - store_scanline_a8r8g8b8_32_sRGB, store_scanline_a8r8g8b8_sRGB_float, - }, - { PIXMAN_r8g8b8_sRGB, - fetch_scanline_r8g8b8_32_sRGB, fetch_scanline_r8g8b8_sRGB_float, - fetch_pixel_r8g8b8_32_sRGB, fetch_pixel_r8g8b8_sRGB_float, - store_scanline_r8g8b8_32_sRGB, store_scanline_r8g8b8_sRGB_float, - }, - -/* 24bpp formats */ - FORMAT_INFO (r8g8b8), - FORMAT_INFO (b8g8r8), - -/* 16bpp formats */ - FORMAT_INFO (r5g6b5), - FORMAT_INFO (b5g6r5), - - FORMAT_INFO (a1r5g5b5), - FORMAT_INFO (x1r5g5b5), - FORMAT_INFO (a1b5g5r5), - FORMAT_INFO (x1b5g5r5), - FORMAT_INFO (a4r4g4b4), - FORMAT_INFO (x4r4g4b4), - FORMAT_INFO (a4b4g4r4), - FORMAT_INFO (x4b4g4r4), - -/* 8bpp formats */ - FORMAT_INFO (a8), - FORMAT_INFO (r3g3b2), - FORMAT_INFO (b2g3r3), - FORMAT_INFO (a2r2g2b2), - FORMAT_INFO (a2b2g2r2), - - FORMAT_INFO (c8), - - FORMAT_INFO (g8), - -#define fetch_scanline_x4c4 fetch_scanline_c8 -#define fetch_pixel_x4c4 fetch_pixel_c8 -#define store_scanline_x4c4 store_scanline_c8 - FORMAT_INFO (x4c4), - -#define fetch_scanline_x4g4 fetch_scanline_g8 -#define fetch_pixel_x4g4 fetch_pixel_g8 -#define store_scanline_x4g4 store_scanline_g8 - FORMAT_INFO (x4g4), - - FORMAT_INFO (x4a4), - -/* 4bpp formats */ - FORMAT_INFO (a4), - FORMAT_INFO (r1g2b1), - FORMAT_INFO (b1g2r1), - FORMAT_INFO (a1r1g1b1), - FORMAT_INFO (a1b1g1r1), - - FORMAT_INFO (c4), - - FORMAT_INFO (g4), - -/* 1bpp formats */ - FORMAT_INFO (a1), - FORMAT_INFO (g1), - -/* Wide formats */ -#ifndef PIXMAN_FB_ACCESSORS - { PIXMAN_rgba_float, - NULL, fetch_scanline_rgbaf_float, - fetch_pixel_generic_lossy_32, fetch_pixel_rgbaf_float, - NULL, store_scanline_rgbaf_float }, - - { PIXMAN_rgb_float, - NULL, fetch_scanline_rgbf_float, - fetch_pixel_generic_lossy_32, fetch_pixel_rgbf_float, - NULL, store_scanline_rgbf_float }, -#endif - - { PIXMAN_a2r10g10b10, - NULL, fetch_scanline_a2r10g10b10_float, - fetch_pixel_generic_lossy_32, fetch_pixel_a2r10g10b10_float, - NULL, store_scanline_a2r10g10b10_float }, - - { PIXMAN_x2r10g10b10, - NULL, fetch_scanline_x2r10g10b10_float, - fetch_pixel_generic_lossy_32, fetch_pixel_x2r10g10b10_float, - NULL, store_scanline_x2r10g10b10_float }, - - { PIXMAN_a2b10g10r10, - NULL, fetch_scanline_a2b10g10r10_float, - fetch_pixel_generic_lossy_32, fetch_pixel_a2b10g10r10_float, - NULL, store_scanline_a2b10g10r10_float }, - - { PIXMAN_x2b10g10r10, - NULL, fetch_scanline_x2b10g10r10_float, - fetch_pixel_generic_lossy_32, fetch_pixel_x2b10g10r10_float, - NULL, store_scanline_x2b10g10r10_float }, - -/* YUV formats */ - { PIXMAN_yuy2, - fetch_scanline_yuy2, fetch_scanline_generic_float, - fetch_pixel_yuy2, fetch_pixel_generic_float, - NULL, NULL }, - - { PIXMAN_yv12, - fetch_scanline_yv12, fetch_scanline_generic_float, - fetch_pixel_yv12, fetch_pixel_generic_float, - NULL, NULL }, - - { PIXMAN_null }, -}; - -static void -setup_accessors (bits_image_t *image) -{ - const format_info_t *info = accessors; - - while (info->format != PIXMAN_null) - { - if (info->format == image->format) - { - image->fetch_scanline_32 = info->fetch_scanline_32; - image->fetch_scanline_float = info->fetch_scanline_float; - image->fetch_pixel_32 = info->fetch_pixel_32; - image->fetch_pixel_float = info->fetch_pixel_float; - image->store_scanline_32 = info->store_scanline_32; - image->store_scanline_float = info->store_scanline_float; - - return; - } - - info++; - } -} - -#ifndef PIXMAN_FB_ACCESSORS -void -_pixman_bits_image_setup_accessors_accessors (bits_image_t *image); - -void -_pixman_bits_image_setup_accessors (bits_image_t *image) -{ - if (image->read_func || image->write_func) - _pixman_bits_image_setup_accessors_accessors (image); - else - setup_accessors (image); -} - -#else - -void -_pixman_bits_image_setup_accessors_accessors (bits_image_t *image) -{ - setup_accessors (image); -} - -#endif diff --git a/vendor/pixman/pixman/pixman-accessor.h b/vendor/pixman/pixman/pixman-accessor.h deleted file mode 100644 index 8e0b03621..000000000 --- a/vendor/pixman/pixman/pixman-accessor.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifdef PIXMAN_FB_ACCESSORS - -#define READ(img, ptr) \ - (((bits_image_t *)(img))->read_func ((ptr), sizeof(*(ptr)))) -#define WRITE(img, ptr,val) \ - (((bits_image_t *)(img))->write_func ((ptr), (val), sizeof (*(ptr)))) - -#define MEMSET_WRAPPED(img, dst, val, size) \ - do { \ - size_t _i; \ - uint8_t *_dst = (uint8_t*)(dst); \ - for(_i = 0; _i < (size_t) size; _i++) { \ - WRITE((img), _dst +_i, (val)); \ - } \ - } while (0) - -#else - -#define READ(img, ptr) (*(ptr)) -#define WRITE(img, ptr, val) (*(ptr) = (val)) -#define MEMSET_WRAPPED(img, dst, val, size) \ - memset(dst, val, size) - -#endif - diff --git a/vendor/pixman/pixman/pixman-arm-asm.h b/vendor/pixman/pixman/pixman-arm-asm.h deleted file mode 100644 index ee7854108..000000000 --- a/vendor/pixman/pixman/pixman-arm-asm.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright © 2008 Mozilla Corporation - * Copyright © 2010 Nokia Corporation - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Mozilla Corporation not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Mozilla Corporation makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Jeff Muizelaar (jeff@infidigm.net) - * - */ - -/* Supplementary macro for setting function attributes */ -.macro pixman_asm_function fname - .func fname - .global fname -#ifdef __ELF__ - .hidden fname - .type fname, %function -#endif -fname: -.endm diff --git a/vendor/pixman/pixman/pixman-arm-common.h b/vendor/pixman/pixman/pixman-arm-common.h deleted file mode 100644 index 953768830..000000000 --- a/vendor/pixman/pixman/pixman-arm-common.h +++ /dev/null @@ -1,419 +0,0 @@ -/* - * Copyright © 2010 Nokia Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) - */ - -#ifndef PIXMAN_ARM_COMMON_H -#define PIXMAN_ARM_COMMON_H - -#include "pixman-inlines.h" - -/* Define some macros which can expand into proxy functions between - * ARM assembly optimized functions and the rest of pixman fast path API. - * - * All the low level ARM assembly functions have to use ARM EABI - * calling convention and take up to 8 arguments: - * width, height, dst, dst_stride, src, src_stride, mask, mask_stride - * - * The arguments are ordered with the most important coming first (the - * first 4 arguments are passed to function in registers, the rest are - * on stack). The last arguments are optional, for example if the - * function is not using mask, then 'mask' and 'mask_stride' can be - * omitted when doing a function call. - * - * Arguments 'src' and 'mask' contain either a pointer to the top left - * pixel of the composited rectangle or a pixel color value depending - * on the function type. In the case of just a color value (solid source - * or mask), the corresponding stride argument is unused. - */ - -#define SKIP_ZERO_SRC 1 -#define SKIP_ZERO_MASK 2 - -#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name, \ - src_type, src_cnt, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_##cputype (int32_t w, \ - int32_t h, \ - dst_type *dst, \ - int32_t dst_stride, \ - src_type *src, \ - int32_t src_stride); \ - \ -static void \ -cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_composite_info_t *info) \ -{ \ - PIXMAN_COMPOSITE_ARGS (info); \ - dst_type *dst_line; \ - src_type *src_line; \ - int32_t dst_stride, src_stride; \ - \ - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ - src_stride, src_line, src_cnt); \ - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - \ - pixman_composite_##name##_asm_##cputype (width, height, \ - dst_line, dst_stride, \ - src_line, src_stride); \ -} - -#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_##cputype (int32_t w, \ - int32_t h, \ - dst_type *dst, \ - int32_t dst_stride, \ - uint32_t src); \ - \ -static void \ -cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_composite_info_t *info) \ -{ \ - PIXMAN_COMPOSITE_ARGS (info); \ - dst_type *dst_line; \ - int32_t dst_stride; \ - uint32_t src; \ - \ - src = _pixman_image_get_solid ( \ - imp, src_image, dest_image->bits.format); \ - \ - if ((flags & SKIP_ZERO_SRC) && src == 0) \ - return; \ - \ - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - \ - pixman_composite_##name##_asm_##cputype (width, height, \ - dst_line, dst_stride, \ - src); \ -} - -#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name, \ - mask_type, mask_cnt, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_##cputype (int32_t w, \ - int32_t h, \ - dst_type *dst, \ - int32_t dst_stride, \ - uint32_t src, \ - int32_t unused, \ - mask_type *mask, \ - int32_t mask_stride); \ - \ -static void \ -cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_composite_info_t *info) \ -{ \ - PIXMAN_COMPOSITE_ARGS (info); \ - dst_type *dst_line; \ - mask_type *mask_line; \ - int32_t dst_stride, mask_stride; \ - uint32_t src; \ - \ - src = _pixman_image_get_solid ( \ - imp, src_image, dest_image->bits.format); \ - \ - if ((flags & SKIP_ZERO_SRC) && src == 0) \ - return; \ - \ - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ - mask_stride, mask_line, mask_cnt); \ - \ - pixman_composite_##name##_asm_##cputype (width, height, \ - dst_line, dst_stride, \ - src, 0, \ - mask_line, mask_stride); \ -} - -#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name, \ - src_type, src_cnt, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_##cputype (int32_t w, \ - int32_t h, \ - dst_type *dst, \ - int32_t dst_stride, \ - src_type *src, \ - int32_t src_stride, \ - uint32_t mask); \ - \ -static void \ -cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_composite_info_t *info) \ -{ \ - PIXMAN_COMPOSITE_ARGS (info); \ - dst_type *dst_line; \ - src_type *src_line; \ - int32_t dst_stride, src_stride; \ - uint32_t mask; \ - \ - mask = _pixman_image_get_solid ( \ - imp, mask_image, dest_image->bits.format); \ - \ - if ((flags & SKIP_ZERO_MASK) && mask == 0) \ - return; \ - \ - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ - src_stride, src_line, src_cnt); \ - \ - pixman_composite_##name##_asm_##cputype (width, height, \ - dst_line, dst_stride, \ - src_line, src_stride, \ - mask); \ -} - -#define PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST(cputype, name, \ - src_type, src_cnt, \ - mask_type, mask_cnt, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_##cputype (int32_t w, \ - int32_t h, \ - dst_type *dst, \ - int32_t dst_stride, \ - src_type *src, \ - int32_t src_stride, \ - mask_type *mask, \ - int32_t mask_stride); \ - \ -static void \ -cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_composite_info_t *info) \ -{ \ - PIXMAN_COMPOSITE_ARGS (info); \ - dst_type *dst_line; \ - src_type *src_line; \ - mask_type *mask_line; \ - int32_t dst_stride, src_stride, mask_stride; \ - \ - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ - src_stride, src_line, src_cnt); \ - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ - mask_stride, mask_line, mask_cnt); \ - \ - pixman_composite_##name##_asm_##cputype (width, height, \ - dst_line, dst_stride, \ - src_line, src_stride, \ - mask_line, mask_stride); \ -} - -#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST(cputype, name, op, \ - src_type, dst_type) \ -void \ -pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ - int32_t w, \ - dst_type * dst, \ - const src_type * src, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx); \ - \ -static force_inline void \ -scaled_nearest_scanline_##cputype##_##name##_##op (dst_type * pd, \ - const src_type * ps, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t zero_src) \ -{ \ - pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ - vx, unit_x, \ - max_vx); \ -} \ - \ -FAST_NEAREST_MAINLOOP (cputype##_##name##_cover_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op, \ - src_type, dst_type, COVER) \ -FAST_NEAREST_MAINLOOP (cputype##_##name##_none_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op, \ - src_type, dst_type, NONE) \ -FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op, \ - src_type, dst_type, PAD) \ -FAST_NEAREST_MAINLOOP (cputype##_##name##_normal_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op, \ - src_type, dst_type, NORMAL) - -#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op, \ - src_type, dst_type) \ -void \ -pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ - int32_t w, \ - dst_type * dst, \ - const src_type * src, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - const uint8_t * mask); \ - \ -static force_inline void \ -scaled_nearest_scanline_##cputype##_##name##_##op (const uint8_t * mask, \ - dst_type * pd, \ - const src_type * ps, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t zero_src) \ -{ \ - if ((flags & SKIP_ZERO_SRC) && zero_src) \ - return; \ - pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ - vx, unit_x, \ - max_vx, \ - mask); \ -} \ - \ -FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op,\ - src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\ -FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op,\ - src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \ -FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op,\ - src_type, uint8_t, dst_type, PAD, TRUE, FALSE) \ -FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op,\ - src_type, uint8_t, dst_type, NORMAL, TRUE, FALSE) - -/* Provide entries for the fast path table */ -#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH (op,s,d,func), \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL (op,s,d,func) - -/*****************************************************************************/ - -#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST(flags, cputype, name, op, \ - src_type, dst_type) \ -void \ -pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ - dst_type * dst, \ - const src_type * top, \ - const src_type * bottom, \ - int wt, \ - int wb, \ - pixman_fixed_t x, \ - pixman_fixed_t ux, \ - int width); \ - \ -static force_inline void \ -scaled_bilinear_scanline_##cputype##_##name##_##op ( \ - dst_type * dst, \ - const uint32_t * mask, \ - const src_type * src_top, \ - const src_type * src_bottom, \ - int32_t w, \ - int wt, \ - int wb, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t zero_src) \ -{ \ - if ((flags & SKIP_ZERO_SRC) && zero_src) \ - return; \ - pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ - dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \ -} \ - \ -FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ - scaled_bilinear_scanline_##cputype##_##name##_##op, \ - src_type, uint32_t, dst_type, COVER, FLAG_NONE) \ -FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ - scaled_bilinear_scanline_##cputype##_##name##_##op, \ - src_type, uint32_t, dst_type, NONE, FLAG_NONE) \ -FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ - scaled_bilinear_scanline_##cputype##_##name##_##op, \ - src_type, uint32_t, dst_type, PAD, FLAG_NONE) \ -FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \ - scaled_bilinear_scanline_##cputype##_##name##_##op, \ - src_type, uint32_t, dst_type, NORMAL, \ - FLAG_NONE) - - -#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, cputype, name, op, \ - src_type, dst_type) \ -void \ -pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ - dst_type * dst, \ - const uint8_t * mask, \ - const src_type * top, \ - const src_type * bottom, \ - int wt, \ - int wb, \ - pixman_fixed_t x, \ - pixman_fixed_t ux, \ - int width); \ - \ -static force_inline void \ -scaled_bilinear_scanline_##cputype##_##name##_##op ( \ - dst_type * dst, \ - const uint8_t * mask, \ - const src_type * src_top, \ - const src_type * src_bottom, \ - int32_t w, \ - int wt, \ - int wb, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t zero_src) \ -{ \ - if ((flags & SKIP_ZERO_SRC) && zero_src) \ - return; \ - pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ - dst, mask, src_top, src_bottom, wt, wb, vx, unit_x, w); \ -} \ - \ -FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ - scaled_bilinear_scanline_##cputype##_##name##_##op, \ - src_type, uint8_t, dst_type, COVER, \ - FLAG_HAVE_NON_SOLID_MASK) \ -FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ - scaled_bilinear_scanline_##cputype##_##name##_##op, \ - src_type, uint8_t, dst_type, NONE, \ - FLAG_HAVE_NON_SOLID_MASK) \ -FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ - scaled_bilinear_scanline_##cputype##_##name##_##op, \ - src_type, uint8_t, dst_type, PAD, \ - FLAG_HAVE_NON_SOLID_MASK) \ -FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \ - scaled_bilinear_scanline_##cputype##_##name##_##op, \ - src_type, uint8_t, dst_type, NORMAL, \ - FLAG_HAVE_NON_SOLID_MASK) - - -#endif diff --git a/vendor/pixman/pixman/pixman-arm-detect-win32.asm b/vendor/pixman/pixman/pixman-arm-detect-win32.asm deleted file mode 100644 index 8f5d5eb2a..000000000 --- a/vendor/pixman/pixman/pixman-arm-detect-win32.asm +++ /dev/null @@ -1,21 +0,0 @@ - area pixman_msvc, code, readonly - - export pixman_msvc_try_arm_simd_op - -pixman_msvc_try_arm_simd_op - ;; I don't think the msvc arm asm knows how to do SIMD insns - ;; uqadd8 r3,r3,r3 - dcd 0xe6633f93 - mov pc,lr - endp - - export pixman_msvc_try_arm_neon_op - -pixman_msvc_try_arm_neon_op - ;; I don't think the msvc arm asm knows how to do NEON insns - ;; veor d0,d0,d0 - dcd 0xf3000110 - mov pc,lr - endp - - end diff --git a/vendor/pixman/pixman/pixman-arm-neon-asm-bilinear.S b/vendor/pixman/pixman/pixman-arm-neon-asm-bilinear.S deleted file mode 100644 index 0fd92d61c..000000000 --- a/vendor/pixman/pixman/pixman-arm-neon-asm-bilinear.S +++ /dev/null @@ -1,1358 +0,0 @@ -/* - * Copyright © 2011 SCore Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) - * Author: Taekyun Kim (tkq.kim@samsung.com) - */ - -/* - * This file contains scaled bilinear scanline functions implemented - * using older siarhei's bilinear macro template. - * - * << General scanline function procedures >> - * 1. bilinear interpolate source pixels - * 2. load mask pixels - * 3. load destination pixels - * 4. duplicate mask to fill whole register - * 5. interleave source & destination pixels - * 6. apply mask to source pixels - * 7. combine source & destination pixels - * 8, Deinterleave final result - * 9. store destination pixels - * - * All registers with single number (i.e. src0, tmp0) are 64-bits registers. - * Registers with double numbers(src01, dst01) are 128-bits registers. - * All temp registers can be used freely outside the code block. - * Assume that symbol(register .req) OUT and MASK are defined at caller of these macro blocks. - * - * Remarks - * There can be lots of pipeline stalls inside code block and between code blocks. - * Further optimizations will be done by new macro templates using head/tail_head/tail scheme. - */ - -/* Prevent the stack from becoming executable for no reason... */ -#if defined(__linux__) && defined (__ELF__) -.section .note.GNU-stack,"",%progbits -#endif - -.text -.fpu neon -.arch armv7a -.object_arch armv4 -.eabi_attribute 10, 0 -.eabi_attribute 12, 0 -.arm -.altmacro -.p2align 2 - -#include "pixman-private.h" -#include "pixman-arm-asm.h" -#include "pixman-arm-neon-asm.h" - -/* - * Bilinear macros from pixman-arm-neon-asm.S - */ - -/* - * Bilinear scaling support code which tries to provide pixel fetching, color - * format conversion, and interpolation as separate macros which can be used - * as the basic building blocks for constructing bilinear scanline functions. - */ - -.macro bilinear_load_8888 reg1, reg2, tmp - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - vld1.32 {reg1}, [TMP1], STRIDE - vld1.32 {reg2}, [TMP1] -.endm - -.macro bilinear_load_0565 reg1, reg2, tmp - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #1 - vld1.32 {reg2[0]}, [TMP1], STRIDE - vld1.32 {reg2[1]}, [TMP1] - convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp -.endm - -.macro bilinear_load_and_vertical_interpolate_two_8888 \ - acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2 - - bilinear_load_8888 reg1, reg2, tmp1 - vmull.u8 acc1, reg1, d28 - vmlal.u8 acc1, reg2, d29 - bilinear_load_8888 reg3, reg4, tmp2 - vmull.u8 acc2, reg3, d28 - vmlal.u8 acc2, reg4, d29 -.endm - -.macro bilinear_load_and_vertical_interpolate_four_8888 \ - xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ - yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi - - bilinear_load_and_vertical_interpolate_two_8888 \ - xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi - bilinear_load_and_vertical_interpolate_two_8888 \ - yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi -.endm - -.macro bilinear_load_and_vertical_interpolate_two_0565 \ - acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi - - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #1 - mov TMP2, X, asr #16 - add X, X, UX - add TMP2, TOP, TMP2, asl #1 - vld1.32 {acc2lo[0]}, [TMP1], STRIDE - vld1.32 {acc2hi[0]}, [TMP2], STRIDE - vld1.32 {acc2lo[1]}, [TMP1] - vld1.32 {acc2hi[1]}, [TMP2] - convert_0565_to_x888 acc2, reg3, reg2, reg1 - vzip.u8 reg1, reg3 - vzip.u8 reg2, reg4 - vzip.u8 reg3, reg4 - vzip.u8 reg1, reg2 - vmull.u8 acc1, reg1, d28 - vmlal.u8 acc1, reg2, d29 - vmull.u8 acc2, reg3, d28 - vmlal.u8 acc2, reg4, d29 -.endm - -.macro bilinear_load_and_vertical_interpolate_four_0565 \ - xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ - yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi - - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #1 - mov TMP2, X, asr #16 - add X, X, UX - add TMP2, TOP, TMP2, asl #1 - vld1.32 {xacc2lo[0]}, [TMP1], STRIDE - vld1.32 {xacc2hi[0]}, [TMP2], STRIDE - vld1.32 {xacc2lo[1]}, [TMP1] - vld1.32 {xacc2hi[1]}, [TMP2] - convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1 - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #1 - mov TMP2, X, asr #16 - add X, X, UX - add TMP2, TOP, TMP2, asl #1 - vld1.32 {yacc2lo[0]}, [TMP1], STRIDE - vzip.u8 xreg1, xreg3 - vld1.32 {yacc2hi[0]}, [TMP2], STRIDE - vzip.u8 xreg2, xreg4 - vld1.32 {yacc2lo[1]}, [TMP1] - vzip.u8 xreg3, xreg4 - vld1.32 {yacc2hi[1]}, [TMP2] - vzip.u8 xreg1, xreg2 - convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1 - vmull.u8 xacc1, xreg1, d28 - vzip.u8 yreg1, yreg3 - vmlal.u8 xacc1, xreg2, d29 - vzip.u8 yreg2, yreg4 - vmull.u8 xacc2, xreg3, d28 - vzip.u8 yreg3, yreg4 - vmlal.u8 xacc2, xreg4, d29 - vzip.u8 yreg1, yreg2 - vmull.u8 yacc1, yreg1, d28 - vmlal.u8 yacc1, yreg2, d29 - vmull.u8 yacc2, yreg3, d28 - vmlal.u8 yacc2, yreg4, d29 -.endm - -.macro bilinear_store_8888 numpix, tmp1, tmp2 -.if numpix == 4 - vst1.32 {d0, d1}, [OUT]! -.elseif numpix == 2 - vst1.32 {d0}, [OUT]! -.elseif numpix == 1 - vst1.32 {d0[0]}, [OUT, :32]! -.else - .error bilinear_store_8888 numpix is unsupported -.endif -.endm - -.macro bilinear_store_0565 numpix, tmp1, tmp2 - vuzp.u8 d0, d1 - vuzp.u8 d2, d3 - vuzp.u8 d1, d3 - vuzp.u8 d0, d2 - convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2 -.if numpix == 4 - vst1.16 {d2}, [OUT]! -.elseif numpix == 2 - vst1.32 {d2[0]}, [OUT]! -.elseif numpix == 1 - vst1.16 {d2[0]}, [OUT]! -.else - .error bilinear_store_0565 numpix is unsupported -.endif -.endm - - -/* - * Macros for loading mask pixels into register 'mask'. - * vdup must be done in somewhere else. - */ -.macro bilinear_load_mask_x numpix, mask -.endm - -.macro bilinear_load_mask_8 numpix, mask -.if numpix == 4 - vld1.32 {mask[0]}, [MASK]! -.elseif numpix == 2 - vld1.16 {mask[0]}, [MASK]! -.elseif numpix == 1 - vld1.8 {mask[0]}, [MASK]! -.else - .error bilinear_load_mask_8 numpix is unsupported -.endif - pld [MASK, #prefetch_offset] -.endm - -.macro bilinear_load_mask mask_fmt, numpix, mask - bilinear_load_mask_&mask_fmt numpix, mask -.endm - - -/* - * Macros for loading destination pixels into register 'dst0' and 'dst1'. - * Interleave should be done somewhere else. - */ -.macro bilinear_load_dst_0565_src numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_load_dst_8888_src numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_load_dst_8888 numpix, dst0, dst1, dst01 -.if numpix == 4 - vld1.32 {dst0, dst1}, [OUT] -.elseif numpix == 2 - vld1.32 {dst0}, [OUT] -.elseif numpix == 1 - vld1.32 {dst0[0]}, [OUT] -.else - .error bilinear_load_dst_8888 numpix is unsupported -.endif - pld [OUT, #(prefetch_offset * 4)] -.endm - -.macro bilinear_load_dst_8888_over numpix, dst0, dst1, dst01 - bilinear_load_dst_8888 numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_load_dst_8888_add numpix, dst0, dst1, dst01 - bilinear_load_dst_8888 numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_load_dst dst_fmt, op, numpix, dst0, dst1, dst01 - bilinear_load_dst_&dst_fmt&_&op numpix, dst0, dst1, dst01 -.endm - -/* - * Macros for duplicating partially loaded mask to fill entire register. - * We will apply mask to interleaved source pixels, that is - * (r0, r1, r2, r3, g0, g1, g2, g3) x (m0, m1, m2, m3, m0, m1, m2, m3) - * (b0, b1, b2, b3, a0, a1, a2, a3) x (m0, m1, m2, m3, m0, m1, m2, m3) - * So, we need to duplicate loaded mask into whole register. - * - * For two pixel case - * (r0, r1, x, x, g0, g1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1) - * (b0, b1, x, x, a0, a1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1) - * We can do some optimizations for this including last pixel cases. - */ -.macro bilinear_duplicate_mask_x numpix, mask -.endm - -.macro bilinear_duplicate_mask_8 numpix, mask -.if numpix == 4 - vdup.32 mask, mask[0] -.elseif numpix == 2 - vdup.16 mask, mask[0] -.elseif numpix == 1 - vdup.8 mask, mask[0] -.else - .error bilinear_duplicate_mask_8 is unsupported -.endif -.endm - -.macro bilinear_duplicate_mask mask_fmt, numpix, mask - bilinear_duplicate_mask_&mask_fmt numpix, mask -.endm - -/* - * Macros for interleaving src and dst pixels to rrrr gggg bbbb aaaa form. - * Interleave should be done when maks is enabled or operator is 'over'. - */ -.macro bilinear_interleave src0, src1, dst0, dst1 - vuzp.8 src0, src1 - vuzp.8 dst0, dst1 - vuzp.8 src0, src1 - vuzp.8 dst0, dst1 -.endm - -.macro bilinear_interleave_src_dst_x_src \ - numpix, src0, src1, src01, dst0, dst1, dst01 -.endm - -.macro bilinear_interleave_src_dst_x_over \ - numpix, src0, src1, src01, dst0, dst1, dst01 - - bilinear_interleave src0, src1, dst0, dst1 -.endm - -.macro bilinear_interleave_src_dst_x_add \ - numpix, src0, src1, src01, dst0, dst1, dst01 -.endm - -.macro bilinear_interleave_src_dst_8_src \ - numpix, src0, src1, src01, dst0, dst1, dst01 - - bilinear_interleave src0, src1, dst0, dst1 -.endm - -.macro bilinear_interleave_src_dst_8_over \ - numpix, src0, src1, src01, dst0, dst1, dst01 - - bilinear_interleave src0, src1, dst0, dst1 -.endm - -.macro bilinear_interleave_src_dst_8_add \ - numpix, src0, src1, src01, dst0, dst1, dst01 - - bilinear_interleave src0, src1, dst0, dst1 -.endm - -.macro bilinear_interleave_src_dst \ - mask_fmt, op, numpix, src0, src1, src01, dst0, dst1, dst01 - - bilinear_interleave_src_dst_&mask_fmt&_&op \ - numpix, src0, src1, src01, dst0, dst1, dst01 -.endm - - -/* - * Macros for applying masks to src pixels. (see combine_mask_u() function) - * src, dst should be in interleaved form. - * mask register should be in form (m0, m1, m2, m3). - */ -.macro bilinear_apply_mask_to_src_x \ - numpix, src0, src1, src01, mask, \ - tmp01, tmp23, tmp45, tmp67 -.endm - -.macro bilinear_apply_mask_to_src_8 \ - numpix, src0, src1, src01, mask, \ - tmp01, tmp23, tmp45, tmp67 - - vmull.u8 tmp01, src0, mask - vmull.u8 tmp23, src1, mask - /* bubbles */ - vrshr.u16 tmp45, tmp01, #8 - vrshr.u16 tmp67, tmp23, #8 - /* bubbles */ - vraddhn.u16 src0, tmp45, tmp01 - vraddhn.u16 src1, tmp67, tmp23 -.endm - -.macro bilinear_apply_mask_to_src \ - mask_fmt, numpix, src0, src1, src01, mask, \ - tmp01, tmp23, tmp45, tmp67 - - bilinear_apply_mask_to_src_&mask_fmt \ - numpix, src0, src1, src01, mask, \ - tmp01, tmp23, tmp45, tmp67 -.endm - - -/* - * Macros for combining src and destination pixels. - * Interleave or not is depending on operator 'op'. - */ -.macro bilinear_combine_src \ - numpix, src0, src1, src01, dst0, dst1, dst01, \ - tmp01, tmp23, tmp45, tmp67, tmp8 -.endm - -.macro bilinear_combine_over \ - numpix, src0, src1, src01, dst0, dst1, dst01, \ - tmp01, tmp23, tmp45, tmp67, tmp8 - - vdup.32 tmp8, src1[1] - /* bubbles */ - vmvn.8 tmp8, tmp8 - /* bubbles */ - vmull.u8 tmp01, dst0, tmp8 - /* bubbles */ - vmull.u8 tmp23, dst1, tmp8 - /* bubbles */ - vrshr.u16 tmp45, tmp01, #8 - vrshr.u16 tmp67, tmp23, #8 - /* bubbles */ - vraddhn.u16 dst0, tmp45, tmp01 - vraddhn.u16 dst1, tmp67, tmp23 - /* bubbles */ - vqadd.u8 src01, dst01, src01 -.endm - -.macro bilinear_combine_add \ - numpix, src0, src1, src01, dst0, dst1, dst01, \ - tmp01, tmp23, tmp45, tmp67, tmp8 - - vqadd.u8 src01, dst01, src01 -.endm - -.macro bilinear_combine \ - op, numpix, src0, src1, src01, dst0, dst1, dst01, \ - tmp01, tmp23, tmp45, tmp67, tmp8 - - bilinear_combine_&op \ - numpix, src0, src1, src01, dst0, dst1, dst01, \ - tmp01, tmp23, tmp45, tmp67, tmp8 -.endm - -/* - * Macros for final deinterleaving of destination pixels if needed. - */ -.macro bilinear_deinterleave numpix, dst0, dst1, dst01 - vuzp.8 dst0, dst1 - /* bubbles */ - vuzp.8 dst0, dst1 -.endm - -.macro bilinear_deinterleave_dst_x_src numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_deinterleave_dst_x_over numpix, dst0, dst1, dst01 - bilinear_deinterleave numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_deinterleave_dst_x_add numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_deinterleave_dst_8_src numpix, dst0, dst1, dst01 - bilinear_deinterleave numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_deinterleave_dst_8_over numpix, dst0, dst1, dst01 - bilinear_deinterleave numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_deinterleave_dst_8_add numpix, dst0, dst1, dst01 - bilinear_deinterleave numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_deinterleave_dst mask_fmt, op, numpix, dst0, dst1, dst01 - bilinear_deinterleave_dst_&mask_fmt&_&op numpix, dst0, dst1, dst01 -.endm - - -.macro bilinear_interpolate_last_pixel src_fmt, mask_fmt, dst_fmt, op - bilinear_load_&src_fmt d0, d1, d2 - bilinear_load_mask mask_fmt, 1, d4 - bilinear_load_dst dst_fmt, op, 1, d18, d19, q9 - vmull.u8 q1, d0, d28 - vmlal.u8 q1, d1, d29 - /* 5 cycles bubble */ - vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q0, d2, d30 - vmlal.u16 q0, d3, d30 - /* 5 cycles bubble */ - bilinear_duplicate_mask mask_fmt, 1, d4 - vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) - /* 3 cycles bubble */ - vmovn.u16 d0, q0 - /* 1 cycle bubble */ - bilinear_interleave_src_dst \ - mask_fmt, op, 1, d0, d1, q0, d18, d19, q9 - bilinear_apply_mask_to_src \ - mask_fmt, 1, d0, d1, q0, d4, \ - q3, q8, q10, q11 - bilinear_combine \ - op, 1, d0, d1, q0, d18, d19, q9, \ - q3, q8, q10, q11, d5 - bilinear_deinterleave_dst mask_fmt, op, 1, d0, d1, q0 - bilinear_store_&dst_fmt 1, q2, q3 -.endm - -.macro bilinear_interpolate_two_pixels src_fmt, mask_fmt, dst_fmt, op - bilinear_load_and_vertical_interpolate_two_&src_fmt \ - q1, q11, d0, d1, d20, d21, d22, d23 - bilinear_load_mask mask_fmt, 2, d4 - bilinear_load_dst dst_fmt, op, 2, d18, d19, q9 - vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q0, d2, d30 - vmlal.u16 q0, d3, d30 - vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q10, d22, d31 - vmlal.u16 q10, d23, d31 - vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS) - bilinear_duplicate_mask mask_fmt, 2, d4 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vadd.u16 q12, q12, q13 - vmovn.u16 d0, q0 - bilinear_interleave_src_dst \ - mask_fmt, op, 2, d0, d1, q0, d18, d19, q9 - bilinear_apply_mask_to_src \ - mask_fmt, 2, d0, d1, q0, d4, \ - q3, q8, q10, q11 - bilinear_combine \ - op, 2, d0, d1, q0, d18, d19, q9, \ - q3, q8, q10, q11, d5 - bilinear_deinterleave_dst mask_fmt, op, 2, d0, d1, q0 - bilinear_store_&dst_fmt 2, q2, q3 -.endm - -.macro bilinear_interpolate_four_pixels src_fmt, mask_fmt, dst_fmt, op - bilinear_load_and_vertical_interpolate_four_&src_fmt \ - q1, q11, d0, d1, d20, d21, d22, d23 \ - q3, q9, d4, d5, d16, d17, d18, d19 - pld [TMP1, PF_OFFS] - sub TMP1, TMP1, STRIDE - vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q0, d2, d30 - vmlal.u16 q0, d3, d30 - vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q10, d22, d31 - vmlal.u16 q10, d23, d31 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vshll.u16 q2, d6, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q2, d6, d30 - vmlal.u16 q2, d7, d30 - vshll.u16 q8, d18, #BILINEAR_INTERPOLATION_BITS - bilinear_load_mask mask_fmt, 4, d22 - bilinear_load_dst dst_fmt, op, 4, d2, d3, q1 - pld [TMP1, PF_OFFS] - vmlsl.u16 q8, d18, d31 - vmlal.u16 q8, d19, d31 - vadd.u16 q12, q12, q13 - vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d5, q8, #(2 * BILINEAR_INTERPOLATION_BITS) - bilinear_duplicate_mask mask_fmt, 4, d22 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vmovn.u16 d0, q0 - vmovn.u16 d1, q2 - vadd.u16 q12, q12, q13 - bilinear_interleave_src_dst \ - mask_fmt, op, 4, d0, d1, q0, d2, d3, q1 - bilinear_apply_mask_to_src \ - mask_fmt, 4, d0, d1, q0, d22, \ - q3, q8, q9, q10 - bilinear_combine \ - op, 4, d0, d1, q0, d2, d3, q1, \ - q3, q8, q9, q10, d23 - bilinear_deinterleave_dst mask_fmt, op, 4, d0, d1, q0 - bilinear_store_&dst_fmt 4, q2, q3 -.endm - -.set BILINEAR_FLAG_USE_MASK, 1 -.set BILINEAR_FLAG_USE_ALL_NEON_REGS, 2 - -/* - * Main template macro for generating NEON optimized bilinear scanline functions. - * - * Bilinear scanline generator macro take folling arguments: - * fname - name of the function to generate - * src_fmt - source color format (8888 or 0565) - * dst_fmt - destination color format (8888 or 0565) - * src/dst_bpp_shift - (1 << bpp_shift) is the size of src/dst pixel in bytes - * process_last_pixel - code block that interpolate one pixel and does not - * update horizontal weight - * process_two_pixels - code block that interpolate two pixels and update - * horizontal weight - * process_four_pixels - code block that interpolate four pixels and update - * horizontal weight - * process_pixblock_head - head part of middle loop - * process_pixblock_tail - tail part of middle loop - * process_pixblock_tail_head - tail_head of middle loop - * pixblock_size - number of pixels processed in a single middle loop - * prefetch_distance - prefetch in the source image by that many pixels ahead - */ - -.macro generate_bilinear_scanline_func \ - fname, \ - src_fmt, dst_fmt, src_bpp_shift, dst_bpp_shift, \ - bilinear_process_last_pixel, \ - bilinear_process_two_pixels, \ - bilinear_process_four_pixels, \ - bilinear_process_pixblock_head, \ - bilinear_process_pixblock_tail, \ - bilinear_process_pixblock_tail_head, \ - pixblock_size, \ - prefetch_distance, \ - flags - -pixman_asm_function fname -.if pixblock_size == 8 -.elseif pixblock_size == 4 -.else - .error unsupported pixblock size -.endif - -.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0 - OUT .req r0 - TOP .req r1 - BOTTOM .req r2 - WT .req r3 - WB .req r4 - X .req r5 - UX .req r6 - WIDTH .req ip - TMP1 .req r3 - TMP2 .req r4 - PF_OFFS .req r7 - TMP3 .req r8 - TMP4 .req r9 - STRIDE .req r2 - - mov ip, sp - push {r4, r5, r6, r7, r8, r9} - mov PF_OFFS, #prefetch_distance - ldmia ip, {WB, X, UX, WIDTH} -.else - OUT .req r0 - MASK .req r1 - TOP .req r2 - BOTTOM .req r3 - WT .req r4 - WB .req r5 - X .req r6 - UX .req r7 - WIDTH .req ip - TMP1 .req r4 - TMP2 .req r5 - PF_OFFS .req r8 - TMP3 .req r9 - TMP4 .req r10 - STRIDE .req r3 - - .set prefetch_offset, prefetch_distance - - mov ip, sp - push {r4, r5, r6, r7, r8, r9, r10, ip} - mov PF_OFFS, #prefetch_distance - ldmia ip, {WT, WB, X, UX, WIDTH} -.endif - - mul PF_OFFS, PF_OFFS, UX - -.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0 - vpush {d8-d15} -.endif - - sub STRIDE, BOTTOM, TOP - .unreq BOTTOM - - cmp WIDTH, #0 - ble 3f - - vdup.u16 q12, X - vdup.u16 q13, UX - vdup.u8 d28, WT - vdup.u8 d29, WB - vadd.u16 d25, d25, d26 - - /* ensure good destination alignment */ - cmp WIDTH, #1 - blt 0f - tst OUT, #(1 << dst_bpp_shift) - beq 0f - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vadd.u16 q12, q12, q13 - bilinear_process_last_pixel - sub WIDTH, WIDTH, #1 -0: - vadd.u16 q13, q13, q13 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vadd.u16 q12, q12, q13 - - cmp WIDTH, #2 - blt 0f - tst OUT, #(1 << (dst_bpp_shift + 1)) - beq 0f - bilinear_process_two_pixels - sub WIDTH, WIDTH, #2 -0: -.if pixblock_size == 8 - cmp WIDTH, #4 - blt 0f - tst OUT, #(1 << (dst_bpp_shift + 2)) - beq 0f - bilinear_process_four_pixels - sub WIDTH, WIDTH, #4 -0: -.endif - subs WIDTH, WIDTH, #pixblock_size - blt 1f - mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift) - bilinear_process_pixblock_head - subs WIDTH, WIDTH, #pixblock_size - blt 5f -0: - bilinear_process_pixblock_tail_head - subs WIDTH, WIDTH, #pixblock_size - bge 0b -5: - bilinear_process_pixblock_tail -1: -.if pixblock_size == 8 - tst WIDTH, #4 - beq 2f - bilinear_process_four_pixels -2: -.endif - /* handle the remaining trailing pixels */ - tst WIDTH, #2 - beq 2f - bilinear_process_two_pixels -2: - tst WIDTH, #1 - beq 3f - bilinear_process_last_pixel -3: -.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0 - vpop {d8-d15} -.endif - -.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0 - pop {r4, r5, r6, r7, r8, r9} -.else - pop {r4, r5, r6, r7, r8, r9, r10, ip} -.endif - bx lr - - .unreq OUT - .unreq TOP - .unreq WT - .unreq WB - .unreq X - .unreq UX - .unreq WIDTH - .unreq TMP1 - .unreq TMP2 - .unreq PF_OFFS - .unreq TMP3 - .unreq TMP4 - .unreq STRIDE -.if ((flags) & BILINEAR_FLAG_USE_MASK) != 0 - .unreq MASK -.endif - -.endfunc - -.endm - -/* src_8888_8_8888 */ -.macro bilinear_src_8888_8_8888_process_last_pixel - bilinear_interpolate_last_pixel 8888, 8, 8888, src -.endm - -.macro bilinear_src_8888_8_8888_process_two_pixels - bilinear_interpolate_two_pixels 8888, 8, 8888, src -.endm - -.macro bilinear_src_8888_8_8888_process_four_pixels - bilinear_interpolate_four_pixels 8888, 8, 8888, src -.endm - -.macro bilinear_src_8888_8_8888_process_pixblock_head - bilinear_src_8888_8_8888_process_four_pixels -.endm - -.macro bilinear_src_8888_8_8888_process_pixblock_tail -.endm - -.macro bilinear_src_8888_8_8888_process_pixblock_tail_head - bilinear_src_8888_8_8888_process_pixblock_tail - bilinear_src_8888_8_8888_process_pixblock_head -.endm - -/* src_8888_8_0565 */ -.macro bilinear_src_8888_8_0565_process_last_pixel - bilinear_interpolate_last_pixel 8888, 8, 0565, src -.endm - -.macro bilinear_src_8888_8_0565_process_two_pixels - bilinear_interpolate_two_pixels 8888, 8, 0565, src -.endm - -.macro bilinear_src_8888_8_0565_process_four_pixels - bilinear_interpolate_four_pixels 8888, 8, 0565, src -.endm - -.macro bilinear_src_8888_8_0565_process_pixblock_head - bilinear_src_8888_8_0565_process_four_pixels -.endm - -.macro bilinear_src_8888_8_0565_process_pixblock_tail -.endm - -.macro bilinear_src_8888_8_0565_process_pixblock_tail_head - bilinear_src_8888_8_0565_process_pixblock_tail - bilinear_src_8888_8_0565_process_pixblock_head -.endm - -/* src_0565_8_x888 */ -.macro bilinear_src_0565_8_x888_process_last_pixel - bilinear_interpolate_last_pixel 0565, 8, 8888, src -.endm - -.macro bilinear_src_0565_8_x888_process_two_pixels - bilinear_interpolate_two_pixels 0565, 8, 8888, src -.endm - -.macro bilinear_src_0565_8_x888_process_four_pixels - bilinear_interpolate_four_pixels 0565, 8, 8888, src -.endm - -.macro bilinear_src_0565_8_x888_process_pixblock_head - bilinear_src_0565_8_x888_process_four_pixels -.endm - -.macro bilinear_src_0565_8_x888_process_pixblock_tail -.endm - -.macro bilinear_src_0565_8_x888_process_pixblock_tail_head - bilinear_src_0565_8_x888_process_pixblock_tail - bilinear_src_0565_8_x888_process_pixblock_head -.endm - -/* src_0565_8_0565 */ -.macro bilinear_src_0565_8_0565_process_last_pixel - bilinear_interpolate_last_pixel 0565, 8, 0565, src -.endm - -.macro bilinear_src_0565_8_0565_process_two_pixels - bilinear_interpolate_two_pixels 0565, 8, 0565, src -.endm - -.macro bilinear_src_0565_8_0565_process_four_pixels - bilinear_interpolate_four_pixels 0565, 8, 0565, src -.endm - -.macro bilinear_src_0565_8_0565_process_pixblock_head - bilinear_src_0565_8_0565_process_four_pixels -.endm - -.macro bilinear_src_0565_8_0565_process_pixblock_tail -.endm - -.macro bilinear_src_0565_8_0565_process_pixblock_tail_head - bilinear_src_0565_8_0565_process_pixblock_tail - bilinear_src_0565_8_0565_process_pixblock_head -.endm - -/* over_8888_8888 */ -.macro bilinear_over_8888_8888_process_last_pixel - bilinear_interpolate_last_pixel 8888, x, 8888, over -.endm - -.macro bilinear_over_8888_8888_process_two_pixels - bilinear_interpolate_two_pixels 8888, x, 8888, over -.endm - -.macro bilinear_over_8888_8888_process_four_pixels - bilinear_interpolate_four_pixels 8888, x, 8888, over -.endm - -.macro bilinear_over_8888_8888_process_pixblock_head - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - mov TMP2, X, asr #16 - add X, X, UX - add TMP2, TOP, TMP2, asl #2 - - vld1.32 {d22}, [TMP1], STRIDE - vld1.32 {d23}, [TMP1] - mov TMP3, X, asr #16 - add X, X, UX - add TMP3, TOP, TMP3, asl #2 - vmull.u8 q8, d22, d28 - vmlal.u8 q8, d23, d29 - - vld1.32 {d22}, [TMP2], STRIDE - vld1.32 {d23}, [TMP2] - mov TMP4, X, asr #16 - add X, X, UX - add TMP4, TOP, TMP4, asl #2 - vmull.u8 q9, d22, d28 - vmlal.u8 q9, d23, d29 - - vld1.32 {d22}, [TMP3], STRIDE - vld1.32 {d23}, [TMP3] - vmull.u8 q10, d22, d28 - vmlal.u8 q10, d23, d29 - - vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q0, d16, d30 - vmlal.u16 q0, d17, d30 - - pld [TMP4, PF_OFFS] - vld1.32 {d16}, [TMP4], STRIDE - vld1.32 {d17}, [TMP4] - pld [TMP4, PF_OFFS] - vmull.u8 q11, d16, d28 - vmlal.u8 q11, d17, d29 - - vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q1, d18, d31 - vmlal.u16 q1, d19, d31 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vadd.u16 q12, q12, q13 -.endm - -.macro bilinear_over_8888_8888_process_pixblock_tail - vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q2, d20, d30 - vmlal.u16 q2, d21, d30 - vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q3, d22, d31 - vmlal.u16 q3, d23, d31 - vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) - vld1.32 {d2, d3}, [OUT, :128] - pld [OUT, #(prefetch_offset * 4)] - vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) - vmovn.u16 d6, q0 - vmovn.u16 d7, q2 - vuzp.8 d6, d7 - vuzp.8 d2, d3 - vuzp.8 d6, d7 - vuzp.8 d2, d3 - vdup.32 d4, d7[1] - vmvn.8 d4, d4 - vmull.u8 q11, d2, d4 - vmull.u8 q2, d3, d4 - vrshr.u16 q1, q11, #8 - vrshr.u16 q10, q2, #8 - vraddhn.u16 d2, q1, q11 - vraddhn.u16 d3, q10, q2 - vqadd.u8 q3, q1, q3 - vuzp.8 d6, d7 - vuzp.8 d6, d7 - vadd.u16 q12, q12, q13 - vst1.32 {d6, d7}, [OUT, :128]! -.endm - -.macro bilinear_over_8888_8888_process_pixblock_tail_head - vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - vmlsl.u16 q2, d20, d30 - mov TMP2, X, asr #16 - add X, X, UX - add TMP2, TOP, TMP2, asl #2 - vmlal.u16 q2, d21, d30 - vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS - vld1.32 {d20}, [TMP1], STRIDE - vmlsl.u16 q3, d22, d31 - vmlal.u16 q3, d23, d31 - vld1.32 {d21}, [TMP1] - vmull.u8 q8, d20, d28 - vmlal.u8 q8, d21, d29 - vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) - vld1.32 {d2, d3}, [OUT, :128] - pld [OUT, PF_OFFS] - vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vld1.32 {d22}, [TMP2], STRIDE - vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) - vmovn.u16 d6, q0 - vld1.32 {d23}, [TMP2] - vmull.u8 q9, d22, d28 - mov TMP3, X, asr #16 - add X, X, UX - add TMP3, TOP, TMP3, asl #2 - mov TMP4, X, asr #16 - add X, X, UX - add TMP4, TOP, TMP4, asl #2 - vmlal.u8 q9, d23, d29 - vmovn.u16 d7, q2 - vld1.32 {d22}, [TMP3], STRIDE - vuzp.8 d6, d7 - vuzp.8 d2, d3 - vuzp.8 d6, d7 - vuzp.8 d2, d3 - vdup.32 d4, d7[1] - vld1.32 {d23}, [TMP3] - vmvn.8 d4, d4 - vmull.u8 q10, d22, d28 - vmlal.u8 q10, d23, d29 - vmull.u8 q11, d2, d4 - vmull.u8 q2, d3, d4 - vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q0, d16, d30 - vrshr.u16 q1, q11, #8 - vmlal.u16 q0, d17, d30 - vrshr.u16 q8, q2, #8 - vraddhn.u16 d2, q1, q11 - vraddhn.u16 d3, q8, q2 - pld [TMP4, PF_OFFS] - vld1.32 {d16}, [TMP4], STRIDE - vqadd.u8 q3, q1, q3 - vld1.32 {d17}, [TMP4] - pld [TMP4, PF_OFFS] - vmull.u8 q11, d16, d28 - vmlal.u8 q11, d17, d29 - vuzp.8 d6, d7 - vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS - vuzp.8 d6, d7 - vmlsl.u16 q1, d18, d31 - vadd.u16 q12, q12, q13 - vmlal.u16 q1, d19, d31 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vadd.u16 q12, q12, q13 - vst1.32 {d6, d7}, [OUT, :128]! -.endm - -/* over_8888_8_8888 */ -.macro bilinear_over_8888_8_8888_process_last_pixel - bilinear_interpolate_last_pixel 8888, 8, 8888, over -.endm - -.macro bilinear_over_8888_8_8888_process_two_pixels - bilinear_interpolate_two_pixels 8888, 8, 8888, over -.endm - -.macro bilinear_over_8888_8_8888_process_four_pixels - bilinear_interpolate_four_pixels 8888, 8, 8888, over -.endm - -.macro bilinear_over_8888_8_8888_process_pixblock_head - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - vld1.32 {d0}, [TMP1], STRIDE - mov TMP2, X, asr #16 - add X, X, UX - add TMP2, TOP, TMP2, asl #2 - vld1.32 {d1}, [TMP1] - mov TMP3, X, asr #16 - add X, X, UX - add TMP3, TOP, TMP3, asl #2 - vld1.32 {d2}, [TMP2], STRIDE - mov TMP4, X, asr #16 - add X, X, UX - add TMP4, TOP, TMP4, asl #2 - vld1.32 {d3}, [TMP2] - vmull.u8 q2, d0, d28 - vmull.u8 q3, d2, d28 - vmlal.u8 q2, d1, d29 - vmlal.u8 q3, d3, d29 - vshll.u16 q0, d4, #BILINEAR_INTERPOLATION_BITS - vshll.u16 q1, d6, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q0, d4, d30 - vmlsl.u16 q1, d6, d31 - vmlal.u16 q0, d5, d30 - vmlal.u16 q1, d7, d31 - vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) - vld1.32 {d2}, [TMP3], STRIDE - vld1.32 {d3}, [TMP3] - pld [TMP4, PF_OFFS] - vld1.32 {d4}, [TMP4], STRIDE - vld1.32 {d5}, [TMP4] - pld [TMP4, PF_OFFS] - vmull.u8 q3, d2, d28 - vmlal.u8 q3, d3, d29 - vmull.u8 q1, d4, d28 - vmlal.u8 q1, d5, d29 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vld1.32 {d22[0]}, [MASK]! - pld [MASK, #prefetch_offset] - vadd.u16 q12, q12, q13 - vmovn.u16 d16, q0 -.endm - -.macro bilinear_over_8888_8_8888_process_pixblock_tail - vshll.u16 q9, d6, #BILINEAR_INTERPOLATION_BITS - vshll.u16 q10, d2, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q9, d6, d30 - vmlsl.u16 q10, d2, d31 - vmlal.u16 q9, d7, d30 - vmlal.u16 q10, d3, d31 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vadd.u16 q12, q12, q13 - vdup.32 d22, d22[0] - vshrn.u32 d18, q9, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d19, q10, #(2 * BILINEAR_INTERPOLATION_BITS) - vmovn.u16 d17, q9 - vld1.32 {d18, d19}, [OUT, :128] - pld [OUT, PF_OFFS] - vuzp.8 d16, d17 - vuzp.8 d18, d19 - vuzp.8 d16, d17 - vuzp.8 d18, d19 - vmull.u8 q10, d16, d22 - vmull.u8 q11, d17, d22 - vrsra.u16 q10, q10, #8 - vrsra.u16 q11, q11, #8 - vrshrn.u16 d16, q10, #8 - vrshrn.u16 d17, q11, #8 - vdup.32 d22, d17[1] - vmvn.8 d22, d22 - vmull.u8 q10, d18, d22 - vmull.u8 q11, d19, d22 - vrshr.u16 q9, q10, #8 - vrshr.u16 q0, q11, #8 - vraddhn.u16 d18, q9, q10 - vraddhn.u16 d19, q0, q11 - vqadd.u8 q9, q8, q9 - vuzp.8 d18, d19 - vuzp.8 d18, d19 - vst1.32 {d18, d19}, [OUT, :128]! -.endm - -.macro bilinear_over_8888_8_8888_process_pixblock_tail_head - vshll.u16 q9, d6, #BILINEAR_INTERPOLATION_BITS - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - vshll.u16 q10, d2, #BILINEAR_INTERPOLATION_BITS - vld1.32 {d0}, [TMP1], STRIDE - mov TMP2, X, asr #16 - add X, X, UX - add TMP2, TOP, TMP2, asl #2 - vmlsl.u16 q9, d6, d30 - vmlsl.u16 q10, d2, d31 - vld1.32 {d1}, [TMP1] - mov TMP3, X, asr #16 - add X, X, UX - add TMP3, TOP, TMP3, asl #2 - vmlal.u16 q9, d7, d30 - vmlal.u16 q10, d3, d31 - vld1.32 {d2}, [TMP2], STRIDE - mov TMP4, X, asr #16 - add X, X, UX - add TMP4, TOP, TMP4, asl #2 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vadd.u16 q12, q12, q13 - vld1.32 {d3}, [TMP2] - vdup.32 d22, d22[0] - vshrn.u32 d18, q9, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d19, q10, #(2 * BILINEAR_INTERPOLATION_BITS) - vmull.u8 q2, d0, d28 - vmull.u8 q3, d2, d28 - vmovn.u16 d17, q9 - vld1.32 {d18, d19}, [OUT, :128] - pld [OUT, #(prefetch_offset * 4)] - vmlal.u8 q2, d1, d29 - vmlal.u8 q3, d3, d29 - vuzp.8 d16, d17 - vuzp.8 d18, d19 - vshll.u16 q0, d4, #BILINEAR_INTERPOLATION_BITS - vshll.u16 q1, d6, #BILINEAR_INTERPOLATION_BITS - vuzp.8 d16, d17 - vuzp.8 d18, d19 - vmlsl.u16 q0, d4, d30 - vmlsl.u16 q1, d6, d31 - vmull.u8 q10, d16, d22 - vmull.u8 q11, d17, d22 - vmlal.u16 q0, d5, d30 - vmlal.u16 q1, d7, d31 - vrsra.u16 q10, q10, #8 - vrsra.u16 q11, q11, #8 - vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) - vrshrn.u16 d16, q10, #8 - vrshrn.u16 d17, q11, #8 - vld1.32 {d2}, [TMP3], STRIDE - vdup.32 d22, d17[1] - vld1.32 {d3}, [TMP3] - vmvn.8 d22, d22 - pld [TMP4, PF_OFFS] - vld1.32 {d4}, [TMP4], STRIDE - vmull.u8 q10, d18, d22 - vmull.u8 q11, d19, d22 - vld1.32 {d5}, [TMP4] - pld [TMP4, PF_OFFS] - vmull.u8 q3, d2, d28 - vrshr.u16 q9, q10, #8 - vrshr.u16 q15, q11, #8 - vmlal.u8 q3, d3, d29 - vmull.u8 q1, d4, d28 - vraddhn.u16 d18, q9, q10 - vraddhn.u16 d19, q15, q11 - vmlal.u8 q1, d5, d29 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vqadd.u8 q9, q8, q9 - vld1.32 {d22[0]}, [MASK]! - vuzp.8 d18, d19 - vadd.u16 q12, q12, q13 - vuzp.8 d18, d19 - vmovn.u16 d16, q0 - vst1.32 {d18, d19}, [OUT, :128]! -.endm - -/* add_8888_8888 */ -.macro bilinear_add_8888_8888_process_last_pixel - bilinear_interpolate_last_pixel 8888, x, 8888, add -.endm - -.macro bilinear_add_8888_8888_process_two_pixels - bilinear_interpolate_two_pixels 8888, x, 8888, add -.endm - -.macro bilinear_add_8888_8888_process_four_pixels - bilinear_interpolate_four_pixels 8888, x, 8888, add -.endm - -.macro bilinear_add_8888_8888_process_pixblock_head - bilinear_add_8888_8888_process_four_pixels -.endm - -.macro bilinear_add_8888_8888_process_pixblock_tail -.endm - -.macro bilinear_add_8888_8888_process_pixblock_tail_head - bilinear_add_8888_8888_process_pixblock_tail - bilinear_add_8888_8888_process_pixblock_head -.endm - -/* add_8888_8_8888 */ -.macro bilinear_add_8888_8_8888_process_last_pixel - bilinear_interpolate_last_pixel 8888, 8, 8888, add -.endm - -.macro bilinear_add_8888_8_8888_process_two_pixels - bilinear_interpolate_two_pixels 8888, 8, 8888, add -.endm - -.macro bilinear_add_8888_8_8888_process_four_pixels - bilinear_interpolate_four_pixels 8888, 8, 8888, add -.endm - -.macro bilinear_add_8888_8_8888_process_pixblock_head - bilinear_add_8888_8_8888_process_four_pixels -.endm - -.macro bilinear_add_8888_8_8888_process_pixblock_tail -.endm - -.macro bilinear_add_8888_8_8888_process_pixblock_tail_head - bilinear_add_8888_8_8888_process_pixblock_tail - bilinear_add_8888_8_8888_process_pixblock_head -.endm - - -/* Bilinear scanline functions */ -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_neon, \ - 8888, 8888, 2, 2, \ - bilinear_src_8888_8_8888_process_last_pixel, \ - bilinear_src_8888_8_8888_process_two_pixels, \ - bilinear_src_8888_8_8888_process_four_pixels, \ - bilinear_src_8888_8_8888_process_pixblock_head, \ - bilinear_src_8888_8_8888_process_pixblock_tail, \ - bilinear_src_8888_8_8888_process_pixblock_tail_head, \ - 4, 28, BILINEAR_FLAG_USE_MASK - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_neon, \ - 8888, 0565, 2, 1, \ - bilinear_src_8888_8_0565_process_last_pixel, \ - bilinear_src_8888_8_0565_process_two_pixels, \ - bilinear_src_8888_8_0565_process_four_pixels, \ - bilinear_src_8888_8_0565_process_pixblock_head, \ - bilinear_src_8888_8_0565_process_pixblock_tail, \ - bilinear_src_8888_8_0565_process_pixblock_tail_head, \ - 4, 28, BILINEAR_FLAG_USE_MASK - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_neon, \ - 0565, 8888, 1, 2, \ - bilinear_src_0565_8_x888_process_last_pixel, \ - bilinear_src_0565_8_x888_process_two_pixels, \ - bilinear_src_0565_8_x888_process_four_pixels, \ - bilinear_src_0565_8_x888_process_pixblock_head, \ - bilinear_src_0565_8_x888_process_pixblock_tail, \ - bilinear_src_0565_8_x888_process_pixblock_tail_head, \ - 4, 28, BILINEAR_FLAG_USE_MASK - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_neon, \ - 0565, 0565, 1, 1, \ - bilinear_src_0565_8_0565_process_last_pixel, \ - bilinear_src_0565_8_0565_process_two_pixels, \ - bilinear_src_0565_8_0565_process_four_pixels, \ - bilinear_src_0565_8_0565_process_pixblock_head, \ - bilinear_src_0565_8_0565_process_pixblock_tail, \ - bilinear_src_0565_8_0565_process_pixblock_tail_head, \ - 4, 28, BILINEAR_FLAG_USE_MASK - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_neon, \ - 8888, 8888, 2, 2, \ - bilinear_over_8888_8888_process_last_pixel, \ - bilinear_over_8888_8888_process_two_pixels, \ - bilinear_over_8888_8888_process_four_pixels, \ - bilinear_over_8888_8888_process_pixblock_head, \ - bilinear_over_8888_8888_process_pixblock_tail, \ - bilinear_over_8888_8888_process_pixblock_tail_head, \ - 4, 28, 0 - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_neon, \ - 8888, 8888, 2, 2, \ - bilinear_over_8888_8_8888_process_last_pixel, \ - bilinear_over_8888_8_8888_process_two_pixels, \ - bilinear_over_8888_8_8888_process_four_pixels, \ - bilinear_over_8888_8_8888_process_pixblock_head, \ - bilinear_over_8888_8_8888_process_pixblock_tail, \ - bilinear_over_8888_8_8888_process_pixblock_tail_head, \ - 4, 28, BILINEAR_FLAG_USE_MASK - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_neon, \ - 8888, 8888, 2, 2, \ - bilinear_add_8888_8888_process_last_pixel, \ - bilinear_add_8888_8888_process_two_pixels, \ - bilinear_add_8888_8888_process_four_pixels, \ - bilinear_add_8888_8888_process_pixblock_head, \ - bilinear_add_8888_8888_process_pixblock_tail, \ - bilinear_add_8888_8888_process_pixblock_tail_head, \ - 4, 28, 0 - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_neon, \ - 8888, 8888, 2, 2, \ - bilinear_add_8888_8_8888_process_last_pixel, \ - bilinear_add_8888_8_8888_process_two_pixels, \ - bilinear_add_8888_8_8888_process_four_pixels, \ - bilinear_add_8888_8_8888_process_pixblock_head, \ - bilinear_add_8888_8_8888_process_pixblock_tail, \ - bilinear_add_8888_8_8888_process_pixblock_tail_head, \ - 4, 28, BILINEAR_FLAG_USE_MASK diff --git a/vendor/pixman/pixman/pixman-arm-neon-asm.S b/vendor/pixman/pixman/pixman-arm-neon-asm.S deleted file mode 100644 index 7e949a38f..000000000 --- a/vendor/pixman/pixman/pixman-arm-neon-asm.S +++ /dev/null @@ -1,3627 +0,0 @@ -/* - * Copyright © 2009 Nokia Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) - */ - -/* - * This file contains implementations of NEON optimized pixel processing - * functions. There is no full and detailed tutorial, but some functions - * (those which are exposing some new or interesting features) are - * extensively commented and can be used as examples. - * - * You may want to have a look at the comments for following functions: - * - pixman_composite_over_8888_0565_asm_neon - * - pixman_composite_over_n_8_0565_asm_neon - */ - -/* Prevent the stack from becoming executable for no reason... */ -#if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits -#endif - - .text - .fpu neon - .arch armv7a - .object_arch armv4 - .eabi_attribute 10, 0 /* suppress Tag_FP_arch */ - .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */ - .arm - .altmacro - .p2align 2 - -#include "pixman-private.h" -#include "pixman-arm-asm.h" -#include "pixman-arm-neon-asm.h" - -/* Global configuration options and preferences */ - -/* - * The code can optionally make use of unaligned memory accesses to improve - * performance of handling leading/trailing pixels for each scanline. - * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for - * example in linux if unaligned memory accesses are not configured to - * generate.exceptions. - */ -.set RESPECT_STRICT_ALIGNMENT, 1 - -/* - * Set default prefetch type. There is a choice between the following options: - * - * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work - * as NOP to workaround some HW bugs or for whatever other reason) - * - * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where - * advanced prefetch intruduces heavy overhead) - * - * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8 - * which can run ARM and NEON instructions simultaneously so that extra ARM - * instructions do not add (many) extra cycles, but improve prefetch efficiency) - * - * Note: some types of function can't support advanced prefetch and fallback - * to simple one (those which handle 24bpp pixels) - */ -.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED - -/* Prefetch distance in pixels for simple prefetch */ -.set PREFETCH_DISTANCE_SIMPLE, 64 - -/* - * Implementation of pixman_composite_over_8888_0565_asm_neon - * - * This function takes a8r8g8b8 source buffer, r5g6b5 destination buffer and - * performs OVER compositing operation. Function fast_composite_over_8888_0565 - * from pixman-fast-path.c does the same in C and can be used as a reference. - * - * First we need to have some NEON assembly code which can do the actual - * operation on the pixels and provide it to the template macro. - * - * Template macro quite conveniently takes care of emitting all the necessary - * code for memory reading and writing (including quite tricky cases of - * handling unaligned leading/trailing pixels), so we only need to deal with - * the data in NEON registers. - * - * NEON registers allocation in general is recommented to be the following: - * d0, d1, d2, d3 - contain loaded source pixel data - * d4, d5, d6, d7 - contain loaded destination pixels (if they are needed) - * d24, d25, d26, d27 - contain loading mask pixel data (if mask is used) - * d28, d29, d30, d31 - place for storing the result (destination pixels) - * - * As can be seen above, four 64-bit NEON registers are used for keeping - * intermediate pixel data and up to 8 pixels can be processed in one step - * for 32bpp formats (16 pixels for 16bpp, 32 pixels for 8bpp). - * - * This particular function uses the following registers allocation: - * d0, d1, d2, d3 - contain loaded source pixel data - * d4, d5 - contain loaded destination pixels (they are needed) - * d28, d29 - place for storing the result (destination pixels) - */ - -/* - * Step one. We need to have some code to do some arithmetics on pixel data. - * This is implemented as a pair of macros: '*_head' and '*_tail'. When used - * back-to-back, they take pixel data from {d0, d1, d2, d3} and {d4, d5}, - * perform all the needed calculations and write the result to {d28, d29}. - * The rationale for having two macros and not just one will be explained - * later. In practice, any single monolitic function which does the work can - * be split into two parts in any arbitrary way without affecting correctness. - * - * There is one special trick here too. Common template macro can optionally - * make our life a bit easier by doing R, G, B, A color components - * deinterleaving for 32bpp pixel formats (and this feature is used in - * 'pixman_composite_over_8888_0565_asm_neon' function). So it means that - * instead of having 8 packed pixels in {d0, d1, d2, d3} registers, we - * actually use d0 register for blue channel (a vector of eight 8-bit - * values), d1 register for green, d2 for red and d3 for alpha. This - * simple conversion can be also done with a few NEON instructions: - * - * Packed to planar conversion: - * vuzp.8 d0, d1 - * vuzp.8 d2, d3 - * vuzp.8 d1, d3 - * vuzp.8 d0, d2 - * - * Planar to packed conversion: - * vzip.8 d0, d2 - * vzip.8 d1, d3 - * vzip.8 d2, d3 - * vzip.8 d0, d1 - * - * But pixel can be loaded directly in planar format using VLD4.8 NEON - * instruction. It is 1 cycle slower than VLD1.32, so this is not always - * desirable, that's why deinterleaving is optional. - * - * But anyway, here is the code: - */ -.macro pixman_composite_over_8888_0565_process_pixblock_head - /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format - and put data into d6 - red, d7 - green, d30 - blue */ - vshrn.u16 d6, q2, #8 - vshrn.u16 d7, q2, #3 - vsli.u16 q2, q2, #5 - vsri.u8 d6, d6, #5 - vmvn.8 d3, d3 /* invert source alpha */ - vsri.u8 d7, d7, #6 - vshrn.u16 d30, q2, #2 - /* now do alpha blending, storing results in 8-bit planar format - into d16 - red, d19 - green, d18 - blue */ - vmull.u8 q10, d3, d6 - vmull.u8 q11, d3, d7 - vmull.u8 q12, d3, d30 - vrshr.u16 q13, q10, #8 - vrshr.u16 q3, q11, #8 - vrshr.u16 q15, q12, #8 - vraddhn.u16 d20, q10, q13 - vraddhn.u16 d23, q11, q3 - vraddhn.u16 d22, q12, q15 -.endm - -.macro pixman_composite_over_8888_0565_process_pixblock_tail - /* ... continue alpha blending */ - vqadd.u8 d16, d2, d20 - vqadd.u8 q9, q0, q11 - /* convert the result to r5g6b5 and store it into {d28, d29} */ - vshll.u8 q14, d16, #8 - vshll.u8 q8, d19, #8 - vshll.u8 q9, d18, #8 - vsri.u16 q14, q8, #5 - vsri.u16 q14, q9, #11 -.endm - -/* - * OK, now we got almost everything that we need. Using the above two - * macros, the work can be done right. But now we want to optimize - * it a bit. ARM Cortex-A8 is an in-order core, and benefits really - * a lot from good code scheduling and software pipelining. - * - * Let's construct some code, which will run in the core main loop. - * Some pseudo-code of the main loop will look like this: - * head - * while (...) { - * tail - * head - * } - * tail - * - * It may look a bit weird, but this setup allows to hide instruction - * latencies better and also utilize dual-issue capability more - * efficiently (make pairs of load-store and ALU instructions). - * - * So what we need now is a '*_tail_head' macro, which will be used - * in the core main loop. A trivial straightforward implementation - * of this macro would look like this: - * - * pixman_composite_over_8888_0565_process_pixblock_tail - * vst1.16 {d28, d29}, [DST_W, :128]! - * vld1.16 {d4, d5}, [DST_R, :128]! - * vld4.32 {d0, d1, d2, d3}, [SRC]! - * pixman_composite_over_8888_0565_process_pixblock_head - * cache_preload 8, 8 - * - * Now it also got some VLD/VST instructions. We simply can't move from - * processing one block of pixels to the other one with just arithmetics. - * The previously processed data needs to be written to memory and new - * data needs to be fetched. Fortunately, this main loop does not deal - * with partial leading/trailing pixels and can load/store a full block - * of pixels in a bulk. Additionally, destination buffer is already - * 16 bytes aligned here (which is good for performance). - * - * New things here are DST_R, DST_W, SRC and MASK identifiers. These - * are the aliases for ARM registers which are used as pointers for - * accessing data. We maintain separate pointers for reading and writing - * destination buffer (DST_R and DST_W). - * - * Another new thing is 'cache_preload' macro. It is used for prefetching - * data into CPU L2 cache and improve performance when dealing with large - * images which are far larger than cache size. It uses one argument - * (actually two, but they need to be the same here) - number of pixels - * in a block. Looking into 'pixman-arm-neon-asm.h' can provide some - * details about this macro. Moreover, if good performance is needed - * the code from this macro needs to be copied into '*_tail_head' macro - * and mixed with the rest of code for optimal instructions scheduling. - * We are actually doing it below. - * - * Now after all the explanations, here is the optimized code. - * Different instruction streams (originaling from '*_head', '*_tail' - * and 'cache_preload' macro) use different indentation levels for - * better readability. Actually taking the code from one of these - * indentation levels and ignoring a few VLD/VST instructions would - * result in exactly the code from '*_head', '*_tail' or 'cache_preload' - * macro! - */ - -#if 1 - -.macro pixman_composite_over_8888_0565_process_pixblock_tail_head - vqadd.u8 d16, d2, d20 - vld1.16 {d4, d5}, [DST_R, :128]! - vqadd.u8 q9, q0, q11 - vshrn.u16 d6, q2, #8 - fetch_src_pixblock - vshrn.u16 d7, q2, #3 - vsli.u16 q2, q2, #5 - vshll.u8 q14, d16, #8 - PF add PF_X, PF_X, #8 - vshll.u8 q8, d19, #8 - PF tst PF_CTL, #0xF - vsri.u8 d6, d6, #5 - PF addne PF_X, PF_X, #8 - vmvn.8 d3, d3 - PF subne PF_CTL, PF_CTL, #1 - vsri.u8 d7, d7, #6 - vshrn.u16 d30, q2, #2 - vmull.u8 q10, d3, d6 - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - vmull.u8 q11, d3, d7 - vmull.u8 q12, d3, d30 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - vsri.u16 q14, q8, #5 - PF cmp PF_X, ORIG_W - vshll.u8 q9, d18, #8 - vrshr.u16 q13, q10, #8 - PF subge PF_X, PF_X, ORIG_W - vrshr.u16 q3, q11, #8 - vrshr.u16 q15, q12, #8 - PF subges PF_CTL, PF_CTL, #0x10 - vsri.u16 q14, q9, #11 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - vraddhn.u16 d20, q10, q13 - vraddhn.u16 d23, q11, q3 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vraddhn.u16 d22, q12, q15 - vst1.16 {d28, d29}, [DST_W, :128]! -.endm - -#else - -/* If we did not care much about the performance, we would just use this... */ -.macro pixman_composite_over_8888_0565_process_pixblock_tail_head - pixman_composite_over_8888_0565_process_pixblock_tail - vst1.16 {d28, d29}, [DST_W, :128]! - vld1.16 {d4, d5}, [DST_R, :128]! - fetch_src_pixblock - pixman_composite_over_8888_0565_process_pixblock_head - cache_preload 8, 8 -.endm - -#endif - -/* - * And now the final part. We are using 'generate_composite_function' macro - * to put all the stuff together. We are specifying the name of the function - * which we want to get, number of bits per pixel for the source, mask and - * destination (0 if unused, like mask in this case). Next come some bit - * flags: - * FLAG_DST_READWRITE - tells that the destination buffer is both read - * and written, for write-only buffer we would use - * FLAG_DST_WRITEONLY flag instead - * FLAG_DEINTERLEAVE_32BPP - tells that we prefer to work with planar data - * and separate color channels for 32bpp format. - * The next things are: - * - the number of pixels processed per iteration (8 in this case, because - * that's the maximum what can fit into four 64-bit NEON registers). - * - prefetch distance, measured in pixel blocks. In this case it is 5 times - * by 8 pixels. That would be 40 pixels, or up to 160 bytes. Optimal - * prefetch distance can be selected by running some benchmarks. - * - * After that we specify some macros, these are 'default_init', - * 'default_cleanup' here which are empty (but it is possible to have custom - * init/cleanup macros to be able to save/restore some extra NEON registers - * like d8-d15 or do anything else) followed by - * 'pixman_composite_over_8888_0565_process_pixblock_head', - * 'pixman_composite_over_8888_0565_process_pixblock_tail' and - * 'pixman_composite_over_8888_0565_process_pixblock_tail_head' - * which we got implemented above. - * - * The last part is the NEON registers allocation scheme. - */ -generate_composite_function \ - pixman_composite_over_8888_0565_asm_neon, 32, 0, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_0565_process_pixblock_head, \ - pixman_composite_over_8888_0565_process_pixblock_tail, \ - pixman_composite_over_8888_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_n_0565_process_pixblock_head - /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format - and put data into d6 - red, d7 - green, d30 - blue */ - vshrn.u16 d6, q2, #8 - vshrn.u16 d7, q2, #3 - vsli.u16 q2, q2, #5 - vsri.u8 d6, d6, #5 - vsri.u8 d7, d7, #6 - vshrn.u16 d30, q2, #2 - /* now do alpha blending, storing results in 8-bit planar format - into d16 - red, d19 - green, d18 - blue */ - vmull.u8 q10, d3, d6 - vmull.u8 q11, d3, d7 - vmull.u8 q12, d3, d30 - vrshr.u16 q13, q10, #8 - vrshr.u16 q3, q11, #8 - vrshr.u16 q15, q12, #8 - vraddhn.u16 d20, q10, q13 - vraddhn.u16 d23, q11, q3 - vraddhn.u16 d22, q12, q15 -.endm - -.macro pixman_composite_over_n_0565_process_pixblock_tail - /* ... continue alpha blending */ - vqadd.u8 d16, d2, d20 - vqadd.u8 q9, q0, q11 - /* convert the result to r5g6b5 and store it into {d28, d29} */ - vshll.u8 q14, d16, #8 - vshll.u8 q8, d19, #8 - vshll.u8 q9, d18, #8 - vsri.u16 q14, q8, #5 - vsri.u16 q14, q9, #11 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_n_0565_process_pixblock_tail_head - pixman_composite_over_n_0565_process_pixblock_tail - vld1.16 {d4, d5}, [DST_R, :128]! - vst1.16 {d28, d29}, [DST_W, :128]! - pixman_composite_over_n_0565_process_pixblock_head - cache_preload 8, 8 -.endm - -.macro pixman_composite_over_n_0565_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d3[0]}, [DUMMY] - vdup.8 d0, d3[0] - vdup.8 d1, d3[1] - vdup.8 d2, d3[2] - vdup.8 d3, d3[3] - vmvn.8 d3, d3 /* invert source alpha */ -.endm - -generate_composite_function \ - pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_0565_init, \ - default_cleanup, \ - pixman_composite_over_n_0565_process_pixblock_head, \ - pixman_composite_over_n_0565_process_pixblock_tail, \ - pixman_composite_over_n_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_8888_0565_process_pixblock_head - vshll.u8 q8, d1, #8 - vshll.u8 q14, d2, #8 - vshll.u8 q9, d0, #8 -.endm - -.macro pixman_composite_src_8888_0565_process_pixblock_tail - vsri.u16 q14, q8, #5 - vsri.u16 q14, q9, #11 -.endm - -.macro pixman_composite_src_8888_0565_process_pixblock_tail_head - vsri.u16 q14, q8, #5 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - fetch_src_pixblock - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vsri.u16 q14, q9, #11 - PF cmp PF_X, ORIG_W - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - vshll.u8 q8, d1, #8 - vst1.16 {d28, d29}, [DST_W, :128]! - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 - vshll.u8 q14, d2, #8 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - vshll.u8 q9, d0, #8 -.endm - -generate_composite_function \ - pixman_composite_src_8888_0565_asm_neon, 32, 0, 16, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_8888_0565_process_pixblock_head, \ - pixman_composite_src_8888_0565_process_pixblock_tail, \ - pixman_composite_src_8888_0565_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_src_0565_8888_process_pixblock_head - vshrn.u16 d30, q0, #8 - vshrn.u16 d29, q0, #3 - vsli.u16 q0, q0, #5 - vmov.u8 d31, #255 - vsri.u8 d30, d30, #5 - vsri.u8 d29, d29, #6 - vshrn.u16 d28, q0, #2 -.endm - -.macro pixman_composite_src_0565_8888_process_pixblock_tail -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_src_0565_8888_process_pixblock_tail_head - pixman_composite_src_0565_8888_process_pixblock_tail - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - fetch_src_pixblock - pixman_composite_src_0565_8888_process_pixblock_head - cache_preload 8, 8 -.endm - -generate_composite_function \ - pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0565_8888_process_pixblock_head, \ - pixman_composite_src_0565_8888_process_pixblock_tail, \ - pixman_composite_src_0565_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_add_8_8_process_pixblock_head - vqadd.u8 q14, q0, q2 - vqadd.u8 q15, q1, q3 -.endm - -.macro pixman_composite_add_8_8_process_pixblock_tail -.endm - -.macro pixman_composite_add_8_8_process_pixblock_tail_head - fetch_src_pixblock - PF add PF_X, PF_X, #32 - PF tst PF_CTL, #0xF - vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! - PF addne PF_X, PF_X, #32 - PF subne PF_CTL, PF_CTL, #1 - vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF cmp PF_X, ORIG_W - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 - vqadd.u8 q14, q0, q2 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vqadd.u8 q15, q1, q3 -.endm - -generate_composite_function \ - pixman_composite_add_8_8_asm_neon, 8, 0, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8_8_process_pixblock_head, \ - pixman_composite_add_8_8_process_pixblock_tail, \ - pixman_composite_add_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_add_8888_8888_process_pixblock_tail_head - fetch_src_pixblock - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - vld1.32 {d4, d5, d6, d7}, [DST_R, :128]! - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vst1.32 {d28, d29, d30, d31}, [DST_W, :128]! - PF cmp PF_X, ORIG_W - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 - vqadd.u8 q14, q0, q2 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vqadd.u8 q15, q1, q3 -.endm - -generate_composite_function \ - pixman_composite_add_8888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8_8_process_pixblock_head, \ - pixman_composite_add_8_8_process_pixblock_tail, \ - pixman_composite_add_8888_8888_process_pixblock_tail_head - -generate_composite_function_single_scanline \ - pixman_composite_scanline_add_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8_8_process_pixblock_head, \ - pixman_composite_add_8_8_process_pixblock_tail, \ - pixman_composite_add_8888_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_out_reverse_8888_8888_process_pixblock_head - vmvn.8 d24, d3 /* get inverted alpha */ - /* do alpha blending */ - vmull.u8 q8, d24, d4 - vmull.u8 q9, d24, d5 - vmull.u8 q10, d24, d6 - vmull.u8 q11, d24, d7 -.endm - -.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 -.endm - -.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vrshr.u16 q14, q8, #8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - PF cmp PF_X, ORIG_W - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 - fetch_src_pixblock - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - vmvn.8 d22, d3 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF subge PF_X, PF_X, ORIG_W - vmull.u8 q8, d22, d4 - PF subges PF_CTL, PF_CTL, #0x10 - vmull.u8 q9, d22, d5 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - vmull.u8 q10, d22, d6 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vmull.u8 q11, d22, d7 -.endm - -generate_composite_function_single_scanline \ - pixman_composite_scanline_out_reverse_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_out_reverse_8888_8888_process_pixblock_head, \ - pixman_composite_out_reverse_8888_8888_process_pixblock_tail, \ - pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_8888_8888_process_pixblock_head - pixman_composite_out_reverse_8888_8888_process_pixblock_head -.endm - -.macro pixman_composite_over_8888_8888_process_pixblock_tail - pixman_composite_out_reverse_8888_8888_process_pixblock_tail - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -.macro pixman_composite_over_8888_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vrshr.u16 q14, q8, #8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - PF cmp PF_X, ORIG_W - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 - fetch_src_pixblock - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - vmvn.8 d22, d3 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF subge PF_X, PF_X, ORIG_W - vmull.u8 q8, d22, d4 - PF subges PF_CTL, PF_CTL, #0x10 - vmull.u8 q9, d22, d5 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - vmull.u8 q10, d22, d6 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vmull.u8 q11, d22, d7 -.endm - -generate_composite_function \ - pixman_composite_over_8888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_process_pixblock_tail_head - -generate_composite_function_single_scanline \ - pixman_composite_scanline_over_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_n_8888_process_pixblock_head - /* deinterleaved source pixels in {d0, d1, d2, d3} */ - /* inverted alpha in {d24} */ - /* destination pixels in {d4, d5, d6, d7} */ - vmull.u8 q8, d24, d4 - vmull.u8 q9, d24, d5 - vmull.u8 q10, d24, d6 - vmull.u8 q11, d24, d7 -.endm - -.macro pixman_composite_over_n_8888_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q2, q10, #8 - vrshr.u16 q3, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q2, q10 - vraddhn.u16 d31, q3, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -.macro pixman_composite_over_n_8888_process_pixblock_tail_head - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q2, q10, #8 - vrshr.u16 q3, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q2, q10 - vraddhn.u16 d31, q3, q11 - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vqadd.u8 q14, q0, q14 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0x0F - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vqadd.u8 q15, q1, q15 - PF cmp PF_X, ORIG_W - vmull.u8 q8, d24, d4 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - vmull.u8 q9, d24, d5 - PF subge PF_X, PF_X, ORIG_W - vmull.u8 q10, d24, d6 - PF subges PF_CTL, PF_CTL, #0x10 - vmull.u8 q11, d24, d7 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -.macro pixman_composite_over_n_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d3[0]}, [DUMMY] - vdup.8 d0, d3[0] - vdup.8 d1, d3[1] - vdup.8 d2, d3[2] - vdup.8 d3, d3[3] - vmvn.8 d24, d3 /* get inverted alpha */ -.endm - -generate_composite_function \ - pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8888_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_n_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head - vrshr.u16 q14, q8, #8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - PF cmp PF_X, ORIG_W - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 - vld4.8 {d0, d1, d2, d3}, [DST_R, :128]! - vmvn.8 d22, d3 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF subge PF_X, PF_X, ORIG_W - vmull.u8 q8, d22, d4 - PF subges PF_CTL, PF_CTL, #0x10 - vmull.u8 q9, d22, d5 - vmull.u8 q10, d22, d6 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vmull.u8 q11, d22, d7 -.endm - -.macro pixman_composite_over_reverse_n_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d7[0]}, [DUMMY] - vdup.8 d4, d7[0] - vdup.8 d5, d7[1] - vdup.8 d6, d7[2] - vdup.8 d7, d7[3] -.endm - -generate_composite_function \ - pixman_composite_over_reverse_n_8888_asm_neon, 0, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_reverse_n_8888_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 4, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_8888_8_0565_process_pixblock_head - vmull.u8 q0, d24, d8 /* IN for SRC pixels (part1) */ - vmull.u8 q1, d24, d9 - vmull.u8 q6, d24, d10 - vmull.u8 q7, d24, d11 - vshrn.u16 d6, q2, #8 /* convert DST_R data to 32-bpp (part1) */ - vshrn.u16 d7, q2, #3 - vsli.u16 q2, q2, #5 - vrshr.u16 q8, q0, #8 /* IN for SRC pixels (part2) */ - vrshr.u16 q9, q1, #8 - vrshr.u16 q10, q6, #8 - vrshr.u16 q11, q7, #8 - vraddhn.u16 d0, q0, q8 - vraddhn.u16 d1, q1, q9 - vraddhn.u16 d2, q6, q10 - vraddhn.u16 d3, q7, q11 - vsri.u8 d6, d6, #5 /* convert DST_R data to 32-bpp (part2) */ - vsri.u8 d7, d7, #6 - vmvn.8 d3, d3 - vshrn.u16 d30, q2, #2 - vmull.u8 q8, d3, d6 /* now do alpha blending */ - vmull.u8 q9, d3, d7 - vmull.u8 q10, d3, d30 -.endm - -.macro pixman_composite_over_8888_8_0565_process_pixblock_tail - /* 3 cycle bubble (after vmull.u8) */ - vrshr.u16 q13, q8, #8 - vrshr.u16 q11, q9, #8 - vrshr.u16 q15, q10, #8 - vraddhn.u16 d16, q8, q13 - vraddhn.u16 d27, q9, q11 - vraddhn.u16 d26, q10, q15 - vqadd.u8 d16, d2, d16 - /* 1 cycle bubble */ - vqadd.u8 q9, q0, q13 - vshll.u8 q14, d16, #8 /* convert to 16bpp */ - vshll.u8 q8, d19, #8 - vshll.u8 q9, d18, #8 - vsri.u16 q14, q8, #5 - /* 1 cycle bubble */ - vsri.u16 q14, q9, #11 -.endm - -.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head - vld1.16 {d4, d5}, [DST_R, :128]! - vshrn.u16 d6, q2, #8 - fetch_mask_pixblock - vshrn.u16 d7, q2, #3 - fetch_src_pixblock - vmull.u8 q6, d24, d10 - vrshr.u16 q13, q8, #8 - vrshr.u16 q11, q9, #8 - vrshr.u16 q15, q10, #8 - vraddhn.u16 d16, q8, q13 - vraddhn.u16 d27, q9, q11 - vraddhn.u16 d26, q10, q15 - vqadd.u8 d16, d2, d16 - vmull.u8 q1, d24, d9 - vqadd.u8 q9, q0, q13 - vshll.u8 q14, d16, #8 - vmull.u8 q0, d24, d8 - vshll.u8 q8, d19, #8 - vshll.u8 q9, d18, #8 - vsri.u16 q14, q8, #5 - vmull.u8 q7, d24, d11 - vsri.u16 q14, q9, #11 - - cache_preload 8, 8 - - vsli.u16 q2, q2, #5 - vrshr.u16 q8, q0, #8 - vrshr.u16 q9, q1, #8 - vrshr.u16 q10, q6, #8 - vrshr.u16 q11, q7, #8 - vraddhn.u16 d0, q0, q8 - vraddhn.u16 d1, q1, q9 - vraddhn.u16 d2, q6, q10 - vraddhn.u16 d3, q7, q11 - vsri.u8 d6, d6, #5 - vsri.u8 d7, d7, #6 - vmvn.8 d3, d3 - vshrn.u16 d30, q2, #2 - vst1.16 {d28, d29}, [DST_W, :128]! - vmull.u8 q8, d3, d6 - vmull.u8 q9, d3, d7 - vmull.u8 q10, d3, d30 -.endm - -generate_composite_function \ - pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_8_0565_process_pixblock_head, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -/* - * This function needs a special initialization of solid mask. - * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET - * offset, split into color components and replicated in d8-d11 - * registers. Additionally, this function needs all the NEON registers, - * so it has to save d8-d15 registers which are callee saved according - * to ABI. These registers are restored from 'cleanup' macro. All the - * other NEON registers are caller saved, so can be clobbered freely - * without introducing any problems. - */ -.macro pixman_composite_over_n_8_0565_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vpush {d8-d15} - vld1.32 {d11[0]}, [DUMMY] - vdup.8 d8, d11[0] - vdup.8 d9, d11[1] - vdup.8 d10, d11[2] - vdup.8 d11, d11[3] -.endm - -.macro pixman_composite_over_n_8_0565_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_n_8_0565_asm_neon, 0, 8, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8_0565_init, \ - pixman_composite_over_n_8_0565_cleanup, \ - pixman_composite_over_8888_8_0565_process_pixblock_head, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_8888_n_0565_init - add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) - vpush {d8-d15} - vld1.32 {d24[0]}, [DUMMY] - vdup.8 d24, d24[3] -.endm - -.macro pixman_composite_over_8888_n_0565_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_8888_n_0565_init, \ - pixman_composite_over_8888_n_0565_cleanup, \ - pixman_composite_over_8888_8_0565_process_pixblock_head, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_0565_0565_process_pixblock_head -.endm - -.macro pixman_composite_src_0565_0565_process_pixblock_tail -.endm - -.macro pixman_composite_src_0565_0565_process_pixblock_tail_head - vst1.16 {d0, d1, d2, d3}, [DST_W, :128]! - fetch_src_pixblock - cache_preload 16, 16 -.endm - -generate_composite_function \ - pixman_composite_src_0565_0565_asm_neon, 16, 0, 16, \ - FLAG_DST_WRITEONLY, \ - 16, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0565_0565_process_pixblock_head, \ - pixman_composite_src_0565_0565_process_pixblock_tail, \ - pixman_composite_src_0565_0565_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_n_8_process_pixblock_head -.endm - -.macro pixman_composite_src_n_8_process_pixblock_tail -.endm - -.macro pixman_composite_src_n_8_process_pixblock_tail_head - vst1.8 {d0, d1, d2, d3}, [DST_W, :128]! -.endm - -.macro pixman_composite_src_n_8_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d0[0]}, [DUMMY] - vsli.u64 d0, d0, #8 - vsli.u64 d0, d0, #16 - vsli.u64 d0, d0, #32 - vorr d1, d0, d0 - vorr q1, q0, q0 -.endm - -.macro pixman_composite_src_n_8_cleanup -.endm - -generate_composite_function \ - pixman_composite_src_n_8_asm_neon, 0, 0, 8, \ - FLAG_DST_WRITEONLY, \ - 32, /* number of pixels, processed in a single block */ \ - 0, /* prefetch distance */ \ - pixman_composite_src_n_8_init, \ - pixman_composite_src_n_8_cleanup, \ - pixman_composite_src_n_8_process_pixblock_head, \ - pixman_composite_src_n_8_process_pixblock_tail, \ - pixman_composite_src_n_8_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_n_0565_process_pixblock_head -.endm - -.macro pixman_composite_src_n_0565_process_pixblock_tail -.endm - -.macro pixman_composite_src_n_0565_process_pixblock_tail_head - vst1.16 {d0, d1, d2, d3}, [DST_W, :128]! -.endm - -.macro pixman_composite_src_n_0565_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d0[0]}, [DUMMY] - vsli.u64 d0, d0, #16 - vsli.u64 d0, d0, #32 - vorr d1, d0, d0 - vorr q1, q0, q0 -.endm - -.macro pixman_composite_src_n_0565_cleanup -.endm - -generate_composite_function \ - pixman_composite_src_n_0565_asm_neon, 0, 0, 16, \ - FLAG_DST_WRITEONLY, \ - 16, /* number of pixels, processed in a single block */ \ - 0, /* prefetch distance */ \ - pixman_composite_src_n_0565_init, \ - pixman_composite_src_n_0565_cleanup, \ - pixman_composite_src_n_0565_process_pixblock_head, \ - pixman_composite_src_n_0565_process_pixblock_tail, \ - pixman_composite_src_n_0565_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_n_8888_process_pixblock_head -.endm - -.macro pixman_composite_src_n_8888_process_pixblock_tail -.endm - -.macro pixman_composite_src_n_8888_process_pixblock_tail_head - vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! -.endm - -.macro pixman_composite_src_n_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d0[0]}, [DUMMY] - vsli.u64 d0, d0, #32 - vorr d1, d0, d0 - vorr q1, q0, q0 -.endm - -.macro pixman_composite_src_n_8888_cleanup -.endm - -generate_composite_function \ - pixman_composite_src_n_8888_asm_neon, 0, 0, 32, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 0, /* prefetch distance */ \ - pixman_composite_src_n_8888_init, \ - pixman_composite_src_n_8888_cleanup, \ - pixman_composite_src_n_8888_process_pixblock_head, \ - pixman_composite_src_n_8888_process_pixblock_tail, \ - pixman_composite_src_n_8888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_8888_8888_process_pixblock_head -.endm - -.macro pixman_composite_src_8888_8888_process_pixblock_tail -.endm - -.macro pixman_composite_src_8888_8888_process_pixblock_tail_head - vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! - fetch_src_pixblock - cache_preload 8, 8 -.endm - -generate_composite_function \ - pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_8888_8888_process_pixblock_head, \ - pixman_composite_src_8888_8888_process_pixblock_tail, \ - pixman_composite_src_8888_8888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_x888_8888_process_pixblock_head - vorr q0, q0, q2 - vorr q1, q1, q2 -.endm - -.macro pixman_composite_src_x888_8888_process_pixblock_tail -.endm - -.macro pixman_composite_src_x888_8888_process_pixblock_tail_head - vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! - fetch_src_pixblock - vorr q0, q0, q2 - vorr q1, q1, q2 - cache_preload 8, 8 -.endm - -.macro pixman_composite_src_x888_8888_init - vmov.u8 q2, #0xFF - vshl.u32 q2, q2, #24 -.endm - -generate_composite_function \ - pixman_composite_src_x888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - pixman_composite_src_x888_8888_init, \ - default_cleanup, \ - pixman_composite_src_x888_8888_process_pixblock_head, \ - pixman_composite_src_x888_8888_process_pixblock_tail, \ - pixman_composite_src_x888_8888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_n_8_8888_process_pixblock_head - /* expecting solid source in {d0, d1, d2, d3} */ - /* mask is in d24 (d25, d26, d27 are unused) */ - - /* in */ - vmull.u8 q8, d24, d0 - vmull.u8 q9, d24, d1 - vmull.u8 q10, d24, d2 - vmull.u8 q11, d24, d3 - vrsra.u16 q8, q8, #8 - vrsra.u16 q9, q9, #8 - vrsra.u16 q10, q10, #8 - vrsra.u16 q11, q11, #8 -.endm - -.macro pixman_composite_src_n_8_8888_process_pixblock_tail - vrshrn.u16 d28, q8, #8 - vrshrn.u16 d29, q9, #8 - vrshrn.u16 d30, q10, #8 - vrshrn.u16 d31, q11, #8 -.endm - -.macro pixman_composite_src_n_8_8888_process_pixblock_tail_head - fetch_mask_pixblock - PF add PF_X, PF_X, #8 - vrshrn.u16 d28, q8, #8 - PF tst PF_CTL, #0x0F - vrshrn.u16 d29, q9, #8 - PF addne PF_X, PF_X, #8 - vrshrn.u16 d30, q10, #8 - PF subne PF_CTL, PF_CTL, #1 - vrshrn.u16 d31, q11, #8 - PF cmp PF_X, ORIG_W - vmull.u8 q8, d24, d0 - PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] - vmull.u8 q9, d24, d1 - PF subge PF_X, PF_X, ORIG_W - vmull.u8 q10, d24, d2 - PF subges PF_CTL, PF_CTL, #0x10 - vmull.u8 q11, d24, d3 - PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - vrsra.u16 q8, q8, #8 - vrsra.u16 q9, q9, #8 - vrsra.u16 q10, q10, #8 - vrsra.u16 q11, q11, #8 -.endm - -.macro pixman_composite_src_n_8_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d3[0]}, [DUMMY] - vdup.8 d0, d3[0] - vdup.8 d1, d3[1] - vdup.8 d2, d3[2] - vdup.8 d3, d3[3] -.endm - -.macro pixman_composite_src_n_8_8888_cleanup -.endm - -generate_composite_function \ - pixman_composite_src_n_8_8888_asm_neon, 0, 8, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_src_n_8_8888_init, \ - pixman_composite_src_n_8_8888_cleanup, \ - pixman_composite_src_n_8_8888_process_pixblock_head, \ - pixman_composite_src_n_8_8888_process_pixblock_tail, \ - pixman_composite_src_n_8_8888_process_pixblock_tail_head, \ - -/******************************************************************************/ - -.macro pixman_composite_src_n_8_8_process_pixblock_head - vmull.u8 q0, d24, d16 - vmull.u8 q1, d25, d16 - vmull.u8 q2, d26, d16 - vmull.u8 q3, d27, d16 - vrsra.u16 q0, q0, #8 - vrsra.u16 q1, q1, #8 - vrsra.u16 q2, q2, #8 - vrsra.u16 q3, q3, #8 -.endm - -.macro pixman_composite_src_n_8_8_process_pixblock_tail - vrshrn.u16 d28, q0, #8 - vrshrn.u16 d29, q1, #8 - vrshrn.u16 d30, q2, #8 - vrshrn.u16 d31, q3, #8 -.endm - -.macro pixman_composite_src_n_8_8_process_pixblock_tail_head - fetch_mask_pixblock - PF add PF_X, PF_X, #8 - vrshrn.u16 d28, q0, #8 - PF tst PF_CTL, #0x0F - vrshrn.u16 d29, q1, #8 - PF addne PF_X, PF_X, #8 - vrshrn.u16 d30, q2, #8 - PF subne PF_CTL, PF_CTL, #1 - vrshrn.u16 d31, q3, #8 - PF cmp PF_X, ORIG_W - vmull.u8 q0, d24, d16 - PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] - vmull.u8 q1, d25, d16 - PF subge PF_X, PF_X, ORIG_W - vmull.u8 q2, d26, d16 - PF subges PF_CTL, PF_CTL, #0x10 - vmull.u8 q3, d27, d16 - PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! - vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! - vrsra.u16 q0, q0, #8 - vrsra.u16 q1, q1, #8 - vrsra.u16 q2, q2, #8 - vrsra.u16 q3, q3, #8 -.endm - -.macro pixman_composite_src_n_8_8_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d16[0]}, [DUMMY] - vdup.8 d16, d16[3] -.endm - -.macro pixman_composite_src_n_8_8_cleanup -.endm - -generate_composite_function \ - pixman_composite_src_n_8_8_asm_neon, 0, 8, 8, \ - FLAG_DST_WRITEONLY, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_src_n_8_8_init, \ - pixman_composite_src_n_8_8_cleanup, \ - pixman_composite_src_n_8_8_process_pixblock_head, \ - pixman_composite_src_n_8_8_process_pixblock_tail, \ - pixman_composite_src_n_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_n_8_8888_process_pixblock_head - /* expecting deinterleaved source data in {d8, d9, d10, d11} */ - /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ - /* and destination data in {d4, d5, d6, d7} */ - /* mask is in d24 (d25, d26, d27 are unused) */ - - /* in */ - vmull.u8 q6, d24, d8 - vmull.u8 q7, d24, d9 - vmull.u8 q8, d24, d10 - vmull.u8 q9, d24, d11 - vrshr.u16 q10, q6, #8 - vrshr.u16 q11, q7, #8 - vrshr.u16 q12, q8, #8 - vrshr.u16 q13, q9, #8 - vraddhn.u16 d0, q6, q10 - vraddhn.u16 d1, q7, q11 - vraddhn.u16 d2, q8, q12 - vraddhn.u16 d3, q9, q13 - vmvn.8 d25, d3 /* get inverted alpha */ - /* source: d0 - blue, d1 - green, d2 - red, d3 - alpha */ - /* destination: d4 - blue, d5 - green, d6 - red, d7 - alpha */ - /* now do alpha blending */ - vmull.u8 q8, d25, d4 - vmull.u8 q9, d25, d5 - vmull.u8 q10, d25, d6 - vmull.u8 q11, d25, d7 -.endm - -.macro pixman_composite_over_n_8_8888_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q6, q10, #8 - vrshr.u16 q7, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q6, q10 - vraddhn.u16 d31, q7, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -.macro pixman_composite_over_n_8_8888_process_pixblock_tail_head - vrshr.u16 q14, q8, #8 - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vrshr.u16 q15, q9, #8 - fetch_mask_pixblock - vrshr.u16 q6, q10, #8 - PF add PF_X, PF_X, #8 - vrshr.u16 q7, q11, #8 - PF tst PF_CTL, #0x0F - vraddhn.u16 d28, q14, q8 - PF addne PF_X, PF_X, #8 - vraddhn.u16 d29, q15, q9 - PF subne PF_CTL, PF_CTL, #1 - vraddhn.u16 d30, q6, q10 - PF cmp PF_X, ORIG_W - vraddhn.u16 d31, q7, q11 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - vmull.u8 q6, d24, d8 - PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] - vmull.u8 q7, d24, d9 - PF subge PF_X, PF_X, ORIG_W - vmull.u8 q8, d24, d10 - PF subges PF_CTL, PF_CTL, #0x10 - vmull.u8 q9, d24, d11 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vqadd.u8 q14, q0, q14 - PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! - vqadd.u8 q15, q1, q15 - vrshr.u16 q10, q6, #8 - vrshr.u16 q11, q7, #8 - vrshr.u16 q12, q8, #8 - vrshr.u16 q13, q9, #8 - vraddhn.u16 d0, q6, q10 - vraddhn.u16 d1, q7, q11 - vraddhn.u16 d2, q8, q12 - vraddhn.u16 d3, q9, q13 - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - vmvn.8 d25, d3 - vmull.u8 q8, d25, d4 - vmull.u8 q9, d25, d5 - vmull.u8 q10, d25, d6 - vmull.u8 q11, d25, d7 -.endm - -.macro pixman_composite_over_n_8_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vpush {d8-d15} - vld1.32 {d11[0]}, [DUMMY] - vdup.8 d8, d11[0] - vdup.8 d9, d11[1] - vdup.8 d10, d11[2] - vdup.8 d11, d11[3] -.endm - -.macro pixman_composite_over_n_8_8888_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_n_8_8888_asm_neon, 0, 8, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8_8888_init, \ - pixman_composite_over_n_8_8888_cleanup, \ - pixman_composite_over_n_8_8888_process_pixblock_head, \ - pixman_composite_over_n_8_8888_process_pixblock_tail, \ - pixman_composite_over_n_8_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_n_8_8_process_pixblock_head - vmull.u8 q0, d24, d8 - vmull.u8 q1, d25, d8 - vmull.u8 q6, d26, d8 - vmull.u8 q7, d27, d8 - vrshr.u16 q10, q0, #8 - vrshr.u16 q11, q1, #8 - vrshr.u16 q12, q6, #8 - vrshr.u16 q13, q7, #8 - vraddhn.u16 d0, q0, q10 - vraddhn.u16 d1, q1, q11 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d3, q7, q13 - vmvn.8 q12, q0 - vmvn.8 q13, q1 - vmull.u8 q8, d24, d4 - vmull.u8 q9, d25, d5 - vmull.u8 q10, d26, d6 - vmull.u8 q11, d27, d7 -.endm - -.macro pixman_composite_over_n_8_8_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_n_8_8_process_pixblock_tail_head - vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! - pixman_composite_over_n_8_8_process_pixblock_tail - fetch_mask_pixblock - cache_preload 32, 32 - vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! - pixman_composite_over_n_8_8_process_pixblock_head -.endm - -.macro pixman_composite_over_n_8_8_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vpush {d8-d15} - vld1.32 {d8[0]}, [DUMMY] - vdup.8 d8, d8[3] -.endm - -.macro pixman_composite_over_n_8_8_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8_8_init, \ - pixman_composite_over_n_8_8_cleanup, \ - pixman_composite_over_n_8_8_process_pixblock_head, \ - pixman_composite_over_n_8_8_process_pixblock_tail, \ - pixman_composite_over_n_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head - /* - * 'combine_mask_ca' replacement - * - * input: solid src (n) in {d8, d9, d10, d11} - * dest in {d4, d5, d6, d7 } - * mask in {d24, d25, d26, d27} - * output: updated src in {d0, d1, d2, d3 } - * updated mask in {d24, d25, d26, d3 } - */ - vmull.u8 q0, d24, d8 - vmull.u8 q1, d25, d9 - vmull.u8 q6, d26, d10 - vmull.u8 q7, d27, d11 - vmull.u8 q9, d11, d25 - vmull.u8 q12, d11, d24 - vmull.u8 q13, d11, d26 - vrshr.u16 q8, q0, #8 - vrshr.u16 q10, q1, #8 - vrshr.u16 q11, q6, #8 - vraddhn.u16 d0, q0, q8 - vraddhn.u16 d1, q1, q10 - vraddhn.u16 d2, q6, q11 - vrshr.u16 q11, q12, #8 - vrshr.u16 q8, q9, #8 - vrshr.u16 q6, q13, #8 - vrshr.u16 q10, q7, #8 - vraddhn.u16 d24, q12, q11 - vraddhn.u16 d25, q9, q8 - vraddhn.u16 d26, q13, q6 - vraddhn.u16 d3, q7, q10 - /* - * 'combine_over_ca' replacement - * - * output: updated dest in {d28, d29, d30, d31} - */ - vmvn.8 q12, q12 - vmvn.8 d26, d26 - vmull.u8 q8, d24, d4 - vmull.u8 q9, d25, d5 - vmvn.8 d27, d3 - vmull.u8 q10, d26, d6 - vmull.u8 q11, d27, d7 -.endm - -.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail - /* ... continue 'combine_over_ca' replacement */ - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q6, q10, #8 - vrshr.u16 q7, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q6, q10 - vraddhn.u16 d31, q7, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vrshr.u16 q6, q10, #8 - vrshr.u16 q7, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q6, q10 - vraddhn.u16 d31, q7, q11 - fetch_mask_pixblock - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 - cache_preload 8, 8 - pixman_composite_over_n_8888_8888_ca_process_pixblock_head - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -.macro pixman_composite_over_n_8888_8888_ca_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vpush {d8-d15} - vld1.32 {d11[0]}, [DUMMY] - vdup.8 d8, d11[0] - vdup.8 d9, d11[1] - vdup.8 d10, d11[2] - vdup.8 d11, d11[3] -.endm - -.macro pixman_composite_over_n_8888_8888_ca_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_n_8888_8888_ca_asm_neon, 0, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8888_8888_ca_init, \ - pixman_composite_over_n_8888_8888_ca_cleanup, \ - pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \ - pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \ - pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_head - /* - * 'combine_mask_ca' replacement - * - * input: solid src (n) in {d8, d9, d10, d11} [B, G, R, A] - * mask in {d24, d25, d26} [B, G, R] - * output: updated src in {d0, d1, d2 } [B, G, R] - * updated mask in {d24, d25, d26} [B, G, R] - */ - vmull.u8 q0, d24, d8 - vmull.u8 q1, d25, d9 - vmull.u8 q6, d26, d10 - vmull.u8 q9, d11, d25 - vmull.u8 q12, d11, d24 - vmull.u8 q13, d11, d26 - vrshr.u16 q8, q0, #8 - vrshr.u16 q10, q1, #8 - vrshr.u16 q11, q6, #8 - vraddhn.u16 d0, q0, q8 - vraddhn.u16 d1, q1, q10 - vraddhn.u16 d2, q6, q11 - vrshr.u16 q11, q12, #8 - vrshr.u16 q8, q9, #8 - vrshr.u16 q6, q13, #8 - vraddhn.u16 d24, q12, q11 - vraddhn.u16 d25, q9, q8 - /* - * convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format - * and put data into d16 - blue, d17 - green, d18 - red - */ - vshrn.u16 d17, q2, #3 - vshrn.u16 d18, q2, #8 - vraddhn.u16 d26, q13, q6 - vsli.u16 q2, q2, #5 - vsri.u8 d18, d18, #5 - vsri.u8 d17, d17, #6 - /* - * 'combine_over_ca' replacement - * - * output: updated dest in d16 - blue, d17 - green, d18 - red - */ - vmvn.8 q12, q12 - vshrn.u16 d16, q2, #2 - vmvn.8 d26, d26 - vmull.u8 q6, d16, d24 - vmull.u8 q7, d17, d25 - vmull.u8 q11, d18, d26 -.endm - -.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail - /* ... continue 'combine_over_ca' replacement */ - vrshr.u16 q10, q6, #8 - vrshr.u16 q14, q7, #8 - vrshr.u16 q15, q11, #8 - vraddhn.u16 d16, q10, q6 - vraddhn.u16 d17, q14, q7 - vraddhn.u16 d18, q15, q11 - vqadd.u8 q8, q0, q8 - vqadd.u8 d18, d2, d18 - /* - * convert the results in d16, d17, d18 to r5g6b5 and store - * them into {d28, d29} - */ - vshll.u8 q14, d18, #8 - vshll.u8 q10, d17, #8 - vshll.u8 q15, d16, #8 - vsri.u16 q14, q10, #5 - vsri.u16 q14, q15, #11 -.endm - -.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head - fetch_mask_pixblock - vrshr.u16 q10, q6, #8 - vrshr.u16 q14, q7, #8 - vld1.16 {d4, d5}, [DST_R, :128]! - vrshr.u16 q15, q11, #8 - vraddhn.u16 d16, q10, q6 - vraddhn.u16 d17, q14, q7 - vraddhn.u16 d22, q15, q11 - /* process_pixblock_head */ - /* - * 'combine_mask_ca' replacement - * - * input: solid src (n) in {d8, d9, d10, d11} [B, G, R, A] - * mask in {d24, d25, d26} [B, G, R] - * output: updated src in {d0, d1, d2 } [B, G, R] - * updated mask in {d24, d25, d26} [B, G, R] - */ - vmull.u8 q6, d26, d10 - vqadd.u8 q8, q0, q8 - vmull.u8 q0, d24, d8 - vqadd.u8 d22, d2, d22 - vmull.u8 q1, d25, d9 - /* - * convert the result in d16, d17, d22 to r5g6b5 and store - * it into {d28, d29} - */ - vshll.u8 q14, d22, #8 - vshll.u8 q10, d17, #8 - vshll.u8 q15, d16, #8 - vmull.u8 q9, d11, d25 - vsri.u16 q14, q10, #5 - vmull.u8 q12, d11, d24 - vmull.u8 q13, d11, d26 - vsri.u16 q14, q15, #11 - cache_preload 8, 8 - vrshr.u16 q8, q0, #8 - vrshr.u16 q10, q1, #8 - vrshr.u16 q11, q6, #8 - vraddhn.u16 d0, q0, q8 - vraddhn.u16 d1, q1, q10 - vraddhn.u16 d2, q6, q11 - vrshr.u16 q11, q12, #8 - vrshr.u16 q8, q9, #8 - vrshr.u16 q6, q13, #8 - vraddhn.u16 d24, q12, q11 - vraddhn.u16 d25, q9, q8 - /* - * convert 8 r5g6b5 pixel data from {d4, d5} to planar - * 8-bit format and put data into d16 - blue, d17 - green, - * d18 - red - */ - vshrn.u16 d17, q2, #3 - vshrn.u16 d18, q2, #8 - vraddhn.u16 d26, q13, q6 - vsli.u16 q2, q2, #5 - vsri.u8 d17, d17, #6 - vsri.u8 d18, d18, #5 - /* - * 'combine_over_ca' replacement - * - * output: updated dest in d16 - blue, d17 - green, d18 - red - */ - vmvn.8 q12, q12 - vshrn.u16 d16, q2, #2 - vmvn.8 d26, d26 - vmull.u8 q7, d17, d25 - vmull.u8 q6, d16, d24 - vmull.u8 q11, d18, d26 - vst1.16 {d28, d29}, [DST_W, :128]! -.endm - -.macro pixman_composite_over_n_8888_0565_ca_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vpush {d8-d15} - vld1.32 {d11[0]}, [DUMMY] - vdup.8 d8, d11[0] - vdup.8 d9, d11[1] - vdup.8 d10, d11[2] - vdup.8 d11, d11[3] -.endm - -.macro pixman_composite_over_n_8888_0565_ca_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_n_8888_0565_ca_asm_neon, 0, 32, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8888_0565_ca_init, \ - pixman_composite_over_n_8888_0565_ca_cleanup, \ - pixman_composite_over_n_8888_0565_ca_process_pixblock_head, \ - pixman_composite_over_n_8888_0565_ca_process_pixblock_tail, \ - pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_in_n_8_process_pixblock_head - /* expecting source data in {d0, d1, d2, d3} */ - /* and destination data in {d4, d5, d6, d7} */ - vmull.u8 q8, d4, d3 - vmull.u8 q9, d5, d3 - vmull.u8 q10, d6, d3 - vmull.u8 q11, d7, d3 -.endm - -.macro pixman_composite_in_n_8_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d28, q8, q14 - vraddhn.u16 d29, q9, q15 - vraddhn.u16 d30, q10, q12 - vraddhn.u16 d31, q11, q13 -.endm - -.macro pixman_composite_in_n_8_process_pixblock_tail_head - pixman_composite_in_n_8_process_pixblock_tail - vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! - cache_preload 32, 32 - pixman_composite_in_n_8_process_pixblock_head - vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -.macro pixman_composite_in_n_8_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d3[0]}, [DUMMY] - vdup.8 d3, d3[3] -.endm - -.macro pixman_composite_in_n_8_cleanup -.endm - -generate_composite_function \ - pixman_composite_in_n_8_asm_neon, 0, 0, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_in_n_8_init, \ - pixman_composite_in_n_8_cleanup, \ - pixman_composite_in_n_8_process_pixblock_head, \ - pixman_composite_in_n_8_process_pixblock_tail, \ - pixman_composite_in_n_8_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -.macro pixman_composite_add_n_8_8_process_pixblock_head - /* expecting source data in {d8, d9, d10, d11} */ - /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ - /* and destination data in {d4, d5, d6, d7} */ - /* mask is in d24, d25, d26, d27 */ - vmull.u8 q0, d24, d11 - vmull.u8 q1, d25, d11 - vmull.u8 q6, d26, d11 - vmull.u8 q7, d27, d11 - vrshr.u16 q10, q0, #8 - vrshr.u16 q11, q1, #8 - vrshr.u16 q12, q6, #8 - vrshr.u16 q13, q7, #8 - vraddhn.u16 d0, q0, q10 - vraddhn.u16 d1, q1, q11 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d3, q7, q13 - vqadd.u8 q14, q0, q2 - vqadd.u8 q15, q1, q3 -.endm - -.macro pixman_composite_add_n_8_8_process_pixblock_tail -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_add_n_8_8_process_pixblock_tail_head - pixman_composite_add_n_8_8_process_pixblock_tail - vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! - vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! - fetch_mask_pixblock - cache_preload 32, 32 - pixman_composite_add_n_8_8_process_pixblock_head -.endm - -.macro pixman_composite_add_n_8_8_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vpush {d8-d15} - vld1.32 {d11[0]}, [DUMMY] - vdup.8 d11, d11[3] -.endm - -.macro pixman_composite_add_n_8_8_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_add_n_8_8_asm_neon, 0, 8, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_add_n_8_8_init, \ - pixman_composite_add_n_8_8_cleanup, \ - pixman_composite_add_n_8_8_process_pixblock_head, \ - pixman_composite_add_n_8_8_process_pixblock_tail, \ - pixman_composite_add_n_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_add_8_8_8_process_pixblock_head - /* expecting source data in {d0, d1, d2, d3} */ - /* destination data in {d4, d5, d6, d7} */ - /* mask in {d24, d25, d26, d27} */ - vmull.u8 q8, d24, d0 - vmull.u8 q9, d25, d1 - vmull.u8 q10, d26, d2 - vmull.u8 q11, d27, d3 - vrshr.u16 q0, q8, #8 - vrshr.u16 q1, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d0, q0, q8 - vraddhn.u16 d1, q1, q9 - vraddhn.u16 d2, q12, q10 - vraddhn.u16 d3, q13, q11 - vqadd.u8 q14, q0, q2 - vqadd.u8 q15, q1, q3 -.endm - -.macro pixman_composite_add_8_8_8_process_pixblock_tail -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_add_8_8_8_process_pixblock_tail_head - pixman_composite_add_8_8_8_process_pixblock_tail - vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! - vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! - fetch_mask_pixblock - fetch_src_pixblock - cache_preload 32, 32 - pixman_composite_add_8_8_8_process_pixblock_head -.endm - -.macro pixman_composite_add_8_8_8_init -.endm - -.macro pixman_composite_add_8_8_8_cleanup -.endm - -generate_composite_function \ - pixman_composite_add_8_8_8_asm_neon, 8, 8, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_add_8_8_8_init, \ - pixman_composite_add_8_8_8_cleanup, \ - pixman_composite_add_8_8_8_process_pixblock_head, \ - pixman_composite_add_8_8_8_process_pixblock_tail, \ - pixman_composite_add_8_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_add_8888_8888_8888_process_pixblock_head - /* expecting source data in {d0, d1, d2, d3} */ - /* destination data in {d4, d5, d6, d7} */ - /* mask in {d24, d25, d26, d27} */ - vmull.u8 q8, d27, d0 - vmull.u8 q9, d27, d1 - vmull.u8 q10, d27, d2 - vmull.u8 q11, d27, d3 - /* 1 cycle bubble */ - vrsra.u16 q8, q8, #8 - vrsra.u16 q9, q9, #8 - vrsra.u16 q10, q10, #8 - vrsra.u16 q11, q11, #8 -.endm - -.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail - /* 2 cycle bubble */ - vrshrn.u16 d28, q8, #8 - vrshrn.u16 d29, q9, #8 - vrshrn.u16 d30, q10, #8 - vrshrn.u16 d31, q11, #8 - vqadd.u8 q14, q2, q14 - /* 1 cycle bubble */ - vqadd.u8 q15, q3, q15 -.endm - -.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head - fetch_src_pixblock - vrshrn.u16 d28, q8, #8 - fetch_mask_pixblock - vrshrn.u16 d29, q9, #8 - vmull.u8 q8, d27, d0 - vrshrn.u16 d30, q10, #8 - vmull.u8 q9, d27, d1 - vrshrn.u16 d31, q11, #8 - vmull.u8 q10, d27, d2 - vqadd.u8 q14, q2, q14 - vmull.u8 q11, d27, d3 - vqadd.u8 q15, q3, q15 - vrsra.u16 q8, q8, #8 - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vrsra.u16 q9, q9, #8 - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - vrsra.u16 q10, q10, #8 - - cache_preload 8, 8 - - vrsra.u16 q11, q11, #8 -.endm - -generate_composite_function \ - pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head - -generate_composite_function_single_scanline \ - pixman_composite_scanline_add_mask_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head - -/******************************************************************************/ - -generate_composite_function \ - pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 27 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_add_n_8_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d3[0]}, [DUMMY] - vdup.8 d0, d3[0] - vdup.8 d1, d3[1] - vdup.8 d2, d3[2] - vdup.8 d3, d3[3] -.endm - -.macro pixman_composite_add_n_8_8888_cleanup -.endm - -generate_composite_function \ - pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_add_n_8_8888_init, \ - pixman_composite_add_n_8_8888_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 27 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_add_8888_n_8888_init - add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) - vld1.32 {d27[0]}, [DUMMY] - vdup.8 d27, d27[3] -.endm - -.macro pixman_composite_add_8888_n_8888_cleanup -.endm - -generate_composite_function \ - pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_add_8888_n_8888_init, \ - pixman_composite_add_8888_n_8888_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 27 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head - /* expecting source data in {d0, d1, d2, d3} */ - /* destination data in {d4, d5, d6, d7} */ - /* solid mask is in d15 */ - - /* 'in' */ - vmull.u8 q8, d15, d3 - vmull.u8 q6, d15, d2 - vmull.u8 q5, d15, d1 - vmull.u8 q4, d15, d0 - vrshr.u16 q13, q8, #8 - vrshr.u16 q12, q6, #8 - vrshr.u16 q11, q5, #8 - vrshr.u16 q10, q4, #8 - vraddhn.u16 d3, q8, q13 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d1, q5, q11 - vraddhn.u16 d0, q4, q10 - vmvn.8 d24, d3 /* get inverted alpha */ - /* now do alpha blending */ - vmull.u8 q8, d24, d4 - vmull.u8 q9, d24, d5 - vmull.u8 q10, d24, d6 - vmull.u8 q11, d24, d7 -.endm - -.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail - fetch_src_pixblock - cache_preload 8, 8 - fetch_mask_pixblock - pixman_composite_out_reverse_8888_n_8888_process_pixblock_head - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -generate_composite_function_single_scanline \ - pixman_composite_scanline_out_reverse_mask_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_out_reverse_8888_n_8888_process_pixblock_head, \ - pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail, \ - pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 12 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_8888_n_8888_process_pixblock_head - pixman_composite_out_reverse_8888_n_8888_process_pixblock_head -.endm - -.macro pixman_composite_over_8888_n_8888_process_pixblock_tail - pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_8888_n_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - pixman_composite_over_8888_n_8888_process_pixblock_tail - fetch_src_pixblock - cache_preload 8, 8 - pixman_composite_over_8888_n_8888_process_pixblock_head - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -.macro pixman_composite_over_8888_n_8888_init - add DUMMY, sp, #48 - vpush {d8-d15} - vld1.32 {d15[0]}, [DUMMY] - vdup.8 d15, d15[3] -.endm - -.macro pixman_composite_over_8888_n_8888_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_8888_n_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_8888_n_8888_init, \ - pixman_composite_over_8888_n_8888_cleanup, \ - pixman_composite_over_8888_n_8888_process_pixblock_head, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail_head - -/******************************************************************************/ - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - pixman_composite_over_8888_n_8888_process_pixblock_tail - fetch_src_pixblock - cache_preload 8, 8 - fetch_mask_pixblock - pixman_composite_over_8888_n_8888_process_pixblock_head - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -generate_composite_function \ - pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_n_8888_process_pixblock_head, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 12 /* mask_basereg */ - -generate_composite_function_single_scanline \ - pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_n_8888_process_pixblock_head, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 12 /* mask_basereg */ - -/******************************************************************************/ - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - pixman_composite_over_8888_n_8888_process_pixblock_tail - fetch_src_pixblock - cache_preload 8, 8 - fetch_mask_pixblock - pixman_composite_over_8888_n_8888_process_pixblock_head - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -generate_composite_function \ - pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_n_8888_process_pixblock_head, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8_8888_process_pixblock_tail_head \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_0888_0888_process_pixblock_head -.endm - -.macro pixman_composite_src_0888_0888_process_pixblock_tail -.endm - -.macro pixman_composite_src_0888_0888_process_pixblock_tail_head - vst3.8 {d0, d1, d2}, [DST_W]! - fetch_src_pixblock - cache_preload 8, 8 -.endm - -generate_composite_function \ - pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0888_0888_process_pixblock_head, \ - pixman_composite_src_0888_0888_process_pixblock_tail, \ - pixman_composite_src_0888_0888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_0888_8888_rev_process_pixblock_head - vswp d0, d2 -.endm - -.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail -.endm - -.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head - vst4.8 {d0, d1, d2, d3}, [DST_W]! - fetch_src_pixblock - vswp d0, d2 - cache_preload 8, 8 -.endm - -.macro pixman_composite_src_0888_8888_rev_init - veor d3, d3, d3 -.endm - -generate_composite_function \ - pixman_composite_src_0888_8888_rev_asm_neon, 24, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - pixman_composite_src_0888_8888_rev_init, \ - default_cleanup, \ - pixman_composite_src_0888_8888_rev_process_pixblock_head, \ - pixman_composite_src_0888_8888_rev_process_pixblock_tail, \ - pixman_composite_src_0888_8888_rev_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_0888_0565_rev_process_pixblock_head - vshll.u8 q8, d1, #8 - vshll.u8 q9, d2, #8 -.endm - -.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail - vshll.u8 q14, d0, #8 - vsri.u16 q14, q8, #5 - vsri.u16 q14, q9, #11 -.endm - -.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head - vshll.u8 q14, d0, #8 - fetch_src_pixblock - vsri.u16 q14, q8, #5 - vsri.u16 q14, q9, #11 - vshll.u8 q8, d1, #8 - vst1.16 {d28, d29}, [DST_W, :128]! - vshll.u8 q9, d2, #8 -.endm - -generate_composite_function \ - pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0888_0565_rev_process_pixblock_head, \ - pixman_composite_src_0888_0565_rev_process_pixblock_tail, \ - pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_pixbuf_8888_process_pixblock_head - vmull.u8 q8, d3, d0 - vmull.u8 q9, d3, d1 - vmull.u8 q10, d3, d2 -.endm - -.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail - vrshr.u16 q11, q8, #8 - vswp d3, d31 - vrshr.u16 q12, q9, #8 - vrshr.u16 q13, q10, #8 - vraddhn.u16 d30, q11, q8 - vraddhn.u16 d29, q12, q9 - vraddhn.u16 d28, q13, q10 -.endm - -.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head - vrshr.u16 q11, q8, #8 - vswp d3, d31 - vrshr.u16 q12, q9, #8 - vrshr.u16 q13, q10, #8 - fetch_src_pixblock - vraddhn.u16 d30, q11, q8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vraddhn.u16 d29, q12, q9 - vraddhn.u16 d28, q13, q10 - vmull.u8 q8, d3, d0 - vmull.u8 q9, d3, d1 - vmull.u8 q10, d3, d2 - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF cmp PF_X, ORIG_W - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! -.endm - -generate_composite_function \ - pixman_composite_src_pixbuf_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_pixbuf_8888_process_pixblock_head, \ - pixman_composite_src_pixbuf_8888_process_pixblock_tail, \ - pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_rpixbuf_8888_process_pixblock_head - vmull.u8 q8, d3, d0 - vmull.u8 q9, d3, d1 - vmull.u8 q10, d3, d2 -.endm - -.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail - vrshr.u16 q11, q8, #8 - vswp d3, d31 - vrshr.u16 q12, q9, #8 - vrshr.u16 q13, q10, #8 - vraddhn.u16 d28, q11, q8 - vraddhn.u16 d29, q12, q9 - vraddhn.u16 d30, q13, q10 -.endm - -.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head - vrshr.u16 q11, q8, #8 - vswp d3, d31 - vrshr.u16 q12, q9, #8 - vrshr.u16 q13, q10, #8 - fetch_src_pixblock - vraddhn.u16 d28, q11, q8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vraddhn.u16 d29, q12, q9 - vraddhn.u16 d30, q13, q10 - vmull.u8 q8, d3, d0 - vmull.u8 q9, d3, d1 - vmull.u8 q10, d3, d2 - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF cmp PF_X, ORIG_W - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! -.endm - -generate_composite_function \ - pixman_composite_src_rpixbuf_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_rpixbuf_8888_process_pixblock_head, \ - pixman_composite_src_rpixbuf_8888_process_pixblock_tail, \ - pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_0565_8_0565_process_pixblock_head - /* mask is in d15 */ - convert_0565_to_x888 q4, d2, d1, d0 - convert_0565_to_x888 q5, d6, d5, d4 - /* source pixel data is in {d0, d1, d2, XX} */ - /* destination pixel data is in {d4, d5, d6, XX} */ - vmvn.8 d7, d15 - vmull.u8 q6, d15, d2 - vmull.u8 q5, d15, d1 - vmull.u8 q4, d15, d0 - vmull.u8 q8, d7, d4 - vmull.u8 q9, d7, d5 - vmull.u8 q13, d7, d6 - vrshr.u16 q12, q6, #8 - vrshr.u16 q11, q5, #8 - vrshr.u16 q10, q4, #8 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d1, q5, q11 - vraddhn.u16 d0, q4, q10 -.endm - -.macro pixman_composite_over_0565_8_0565_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q13, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q12, q13 - vqadd.u8 q0, q0, q14 - vqadd.u8 q1, q1, q15 - /* 32bpp result is in {d0, d1, d2, XX} */ - convert_8888_to_0565 d2, d1, d0, q14, q15, q3 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head - fetch_mask_pixblock - pixman_composite_over_0565_8_0565_process_pixblock_tail - fetch_src_pixblock - vld1.16 {d10, d11}, [DST_R, :128]! - cache_preload 8, 8 - pixman_composite_over_0565_8_0565_process_pixblock_head - vst1.16 {d28, d29}, [DST_W, :128]! -.endm - -generate_composite_function \ - pixman_composite_over_0565_8_0565_asm_neon, 16, 8, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_0565_8_0565_process_pixblock_head, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_0565_n_0565_init - add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) - vpush {d8-d15} - vld1.32 {d15[0]}, [DUMMY] - vdup.8 d15, d15[3] -.endm - -.macro pixman_composite_over_0565_n_0565_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_0565_n_0565_init, \ - pixman_composite_over_0565_n_0565_cleanup, \ - pixman_composite_over_0565_8_0565_process_pixblock_head, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_add_0565_8_0565_process_pixblock_head - /* mask is in d15 */ - convert_0565_to_x888 q4, d2, d1, d0 - convert_0565_to_x888 q5, d6, d5, d4 - /* source pixel data is in {d0, d1, d2, XX} */ - /* destination pixel data is in {d4, d5, d6, XX} */ - vmull.u8 q6, d15, d2 - vmull.u8 q5, d15, d1 - vmull.u8 q4, d15, d0 - vrshr.u16 q12, q6, #8 - vrshr.u16 q11, q5, #8 - vrshr.u16 q10, q4, #8 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d1, q5, q11 - vraddhn.u16 d0, q4, q10 -.endm - -.macro pixman_composite_add_0565_8_0565_process_pixblock_tail - vqadd.u8 q0, q0, q2 - vqadd.u8 q1, q1, q3 - /* 32bpp result is in {d0, d1, d2, XX} */ - convert_8888_to_0565 d2, d1, d0, q14, q15, q3 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head - fetch_mask_pixblock - pixman_composite_add_0565_8_0565_process_pixblock_tail - fetch_src_pixblock - vld1.16 {d10, d11}, [DST_R, :128]! - cache_preload 8, 8 - pixman_composite_add_0565_8_0565_process_pixblock_head - vst1.16 {d28, d29}, [DST_W, :128]! -.endm - -generate_composite_function \ - pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_add_0565_8_0565_process_pixblock_head, \ - pixman_composite_add_0565_8_0565_process_pixblock_tail, \ - pixman_composite_add_0565_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_out_reverse_8_0565_process_pixblock_head - /* mask is in d15 */ - convert_0565_to_x888 q5, d6, d5, d4 - /* destination pixel data is in {d4, d5, d6, xx} */ - vmvn.8 d24, d15 /* get inverted alpha */ - /* now do alpha blending */ - vmull.u8 q8, d24, d4 - vmull.u8 q9, d24, d5 - vmull.u8 q10, d24, d6 -.endm - -.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vraddhn.u16 d0, q14, q8 - vraddhn.u16 d1, q15, q9 - vraddhn.u16 d2, q12, q10 - /* 32bpp result is in {d0, d1, d2, XX} */ - convert_8888_to_0565 d2, d1, d0, q14, q15, q3 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head - fetch_src_pixblock - pixman_composite_out_reverse_8_0565_process_pixblock_tail - vld1.16 {d10, d11}, [DST_R, :128]! - cache_preload 8, 8 - pixman_composite_out_reverse_8_0565_process_pixblock_head - vst1.16 {d28, d29}, [DST_W, :128]! -.endm - -generate_composite_function \ - pixman_composite_out_reverse_8_0565_asm_neon, 8, 0, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_out_reverse_8_0565_process_pixblock_head, \ - pixman_composite_out_reverse_8_0565_process_pixblock_tail, \ - pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 15, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_out_reverse_8_8888_process_pixblock_head - /* src is in d0 */ - /* destination pixel data is in {d4, d5, d6, d7} */ - vmvn.8 d1, d0 /* get inverted alpha */ - /* now do alpha blending */ - vmull.u8 q8, d1, d4 - vmull.u8 q9, d1, d5 - vmull.u8 q10, d1, d6 - vmull.u8 q11, d1, d7 -.endm - -.macro pixman_composite_out_reverse_8_8888_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 - /* 32bpp result is in {d28, d29, d30, d31} */ -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_out_reverse_8_8888_process_pixblock_tail_head - fetch_src_pixblock - pixman_composite_out_reverse_8_8888_process_pixblock_tail - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - cache_preload 8, 8 - pixman_composite_out_reverse_8_8888_process_pixblock_head - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -generate_composite_function \ - pixman_composite_out_reverse_8_8888_asm_neon, 8, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_out_reverse_8_8888_process_pixblock_head, \ - pixman_composite_out_reverse_8_8888_process_pixblock_tail, \ - pixman_composite_out_reverse_8_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_8888_8888_OVER_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_process_pixblock_tail_head - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_8888_0565_OVER_asm_neon, 32, 0, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_0565_process_pixblock_head, \ - pixman_composite_over_8888_0565_process_pixblock_tail, \ - pixman_composite_over_8888_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_8888_0565_SRC_asm_neon, 32, 0, 16, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_8888_0565_process_pixblock_head, \ - pixman_composite_src_8888_0565_process_pixblock_tail, \ - pixman_composite_src_8888_0565_process_pixblock_tail_head - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_0565_8888_SRC_asm_neon, 16, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0565_8888_process_pixblock_head, \ - pixman_composite_src_0565_8888_process_pixblock_tail, \ - pixman_composite_src_0565_8888_process_pixblock_tail_head - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_neon, 32, 8, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_8_0565_process_pixblock_head, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 24 /* mask_basereg */ - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_neon, 16, 8, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_0565_8_0565_process_pixblock_head, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -/* - * Bilinear scaling support code which tries to provide pixel fetching, color - * format conversion, and interpolation as separate macros which can be used - * as the basic building blocks for constructing bilinear scanline functions. - */ - -.macro bilinear_load_8888 reg1, reg2, tmp - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - vld1.32 {reg1}, [TMP1], STRIDE - vld1.32 {reg2}, [TMP1] -.endm - -.macro bilinear_load_0565 reg1, reg2, tmp - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #1 - vld1.32 {reg2[0]}, [TMP1], STRIDE - vld1.32 {reg2[1]}, [TMP1] - convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp -.endm - -.macro bilinear_load_and_vertical_interpolate_two_8888 \ - acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2 - - bilinear_load_8888 reg1, reg2, tmp1 - vmull.u8 acc1, reg1, d28 - vmlal.u8 acc1, reg2, d29 - bilinear_load_8888 reg3, reg4, tmp2 - vmull.u8 acc2, reg3, d28 - vmlal.u8 acc2, reg4, d29 -.endm - -.macro bilinear_load_and_vertical_interpolate_four_8888 \ - xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ - yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi - - bilinear_load_and_vertical_interpolate_two_8888 \ - xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi - bilinear_load_and_vertical_interpolate_two_8888 \ - yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi -.endm - -.macro bilinear_load_and_vertical_interpolate_two_0565 \ - acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi - - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #1 - mov TMP2, X, asr #16 - add X, X, UX - add TMP2, TOP, TMP2, asl #1 - vld1.32 {acc2lo[0]}, [TMP1], STRIDE - vld1.32 {acc2hi[0]}, [TMP2], STRIDE - vld1.32 {acc2lo[1]}, [TMP1] - vld1.32 {acc2hi[1]}, [TMP2] - convert_0565_to_x888 acc2, reg3, reg2, reg1 - vzip.u8 reg1, reg3 - vzip.u8 reg2, reg4 - vzip.u8 reg3, reg4 - vzip.u8 reg1, reg2 - vmull.u8 acc1, reg1, d28 - vmlal.u8 acc1, reg2, d29 - vmull.u8 acc2, reg3, d28 - vmlal.u8 acc2, reg4, d29 -.endm - -.macro bilinear_load_and_vertical_interpolate_four_0565 \ - xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ - yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi - - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #1 - mov TMP2, X, asr #16 - add X, X, UX - add TMP2, TOP, TMP2, asl #1 - vld1.32 {xacc2lo[0]}, [TMP1], STRIDE - vld1.32 {xacc2hi[0]}, [TMP2], STRIDE - vld1.32 {xacc2lo[1]}, [TMP1] - vld1.32 {xacc2hi[1]}, [TMP2] - convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1 - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #1 - mov TMP2, X, asr #16 - add X, X, UX - add TMP2, TOP, TMP2, asl #1 - vld1.32 {yacc2lo[0]}, [TMP1], STRIDE - vzip.u8 xreg1, xreg3 - vld1.32 {yacc2hi[0]}, [TMP2], STRIDE - vzip.u8 xreg2, xreg4 - vld1.32 {yacc2lo[1]}, [TMP1] - vzip.u8 xreg3, xreg4 - vld1.32 {yacc2hi[1]}, [TMP2] - vzip.u8 xreg1, xreg2 - convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1 - vmull.u8 xacc1, xreg1, d28 - vzip.u8 yreg1, yreg3 - vmlal.u8 xacc1, xreg2, d29 - vzip.u8 yreg2, yreg4 - vmull.u8 xacc2, xreg3, d28 - vzip.u8 yreg3, yreg4 - vmlal.u8 xacc2, xreg4, d29 - vzip.u8 yreg1, yreg2 - vmull.u8 yacc1, yreg1, d28 - vmlal.u8 yacc1, yreg2, d29 - vmull.u8 yacc2, yreg3, d28 - vmlal.u8 yacc2, yreg4, d29 -.endm - -.macro bilinear_store_8888 numpix, tmp1, tmp2 -.if numpix == 4 - vst1.32 {d0, d1}, [OUT, :128]! -.elseif numpix == 2 - vst1.32 {d0}, [OUT, :64]! -.elseif numpix == 1 - vst1.32 {d0[0]}, [OUT, :32]! -.else - .error bilinear_store_8888 numpix is unsupported -.endif -.endm - -.macro bilinear_store_0565 numpix, tmp1, tmp2 - vuzp.u8 d0, d1 - vuzp.u8 d2, d3 - vuzp.u8 d1, d3 - vuzp.u8 d0, d2 - convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2 -.if numpix == 4 - vst1.16 {d2}, [OUT, :64]! -.elseif numpix == 2 - vst1.32 {d2[0]}, [OUT, :32]! -.elseif numpix == 1 - vst1.16 {d2[0]}, [OUT, :16]! -.else - .error bilinear_store_0565 numpix is unsupported -.endif -.endm - -.macro bilinear_interpolate_last_pixel src_fmt, dst_fmt - bilinear_load_&src_fmt d0, d1, d2 - vmull.u8 q1, d0, d28 - vmlal.u8 q1, d1, d29 - /* 5 cycles bubble */ - vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q0, d2, d30 - vmlal.u16 q0, d3, d30 - /* 5 cycles bubble */ - vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) - /* 3 cycles bubble */ - vmovn.u16 d0, q0 - /* 1 cycle bubble */ - bilinear_store_&dst_fmt 1, q2, q3 -.endm - -.macro bilinear_interpolate_two_pixels src_fmt, dst_fmt - bilinear_load_and_vertical_interpolate_two_&src_fmt \ - q1, q11, d0, d1, d20, d21, d22, d23 - vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q0, d2, d30 - vmlal.u16 q0, d3, d30 - vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q10, d22, d31 - vmlal.u16 q10, d23, d31 - vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS) - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vadd.u16 q12, q12, q13 - vmovn.u16 d0, q0 - bilinear_store_&dst_fmt 2, q2, q3 -.endm - -.macro bilinear_interpolate_four_pixels src_fmt, dst_fmt - bilinear_load_and_vertical_interpolate_four_&src_fmt \ - q1, q11, d0, d1, d20, d21, d22, d23 \ - q3, q9, d4, d5, d16, d17, d18, d19 - pld [TMP1, PF_OFFS] - sub TMP1, TMP1, STRIDE - vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q0, d2, d30 - vmlal.u16 q0, d3, d30 - vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q10, d22, d31 - vmlal.u16 q10, d23, d31 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vshll.u16 q2, d6, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q2, d6, d30 - vmlal.u16 q2, d7, d30 - vshll.u16 q8, d18, #BILINEAR_INTERPOLATION_BITS - pld [TMP2, PF_OFFS] - vmlsl.u16 q8, d18, d31 - vmlal.u16 q8, d19, d31 - vadd.u16 q12, q12, q13 - vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d5, q8, #(2 * BILINEAR_INTERPOLATION_BITS) - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vmovn.u16 d0, q0 - vmovn.u16 d1, q2 - vadd.u16 q12, q12, q13 - bilinear_store_&dst_fmt 4, q2, q3 -.endm - -.macro bilinear_interpolate_four_pixels_head src_fmt, dst_fmt -.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt - bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_head -.else - bilinear_interpolate_four_pixels src_fmt, dst_fmt -.endif -.endm - -.macro bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt -.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt - bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail -.endif -.endm - -.macro bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt -.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt - bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail_head -.else - bilinear_interpolate_four_pixels src_fmt, dst_fmt -.endif -.endm - -.macro bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt -.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt - bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_head -.else - bilinear_interpolate_four_pixels_head src_fmt, dst_fmt - bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt -.endif -.endm - -.macro bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt -.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt - bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail -.else - bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt -.endif -.endm - -.macro bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt -.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt - bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail_head -.else - bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt - bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt -.endif -.endm - -.set BILINEAR_FLAG_UNROLL_4, 0 -.set BILINEAR_FLAG_UNROLL_8, 1 -.set BILINEAR_FLAG_USE_ALL_NEON_REGS, 2 - -/* - * Main template macro for generating NEON optimized bilinear scanline - * functions. - * - * Bilinear scanline scaler macro template uses the following arguments: - * fname - name of the function to generate - * src_fmt - source color format (8888 or 0565) - * dst_fmt - destination color format (8888 or 0565) - * bpp_shift - (1 << bpp_shift) is the size of source pixel in bytes - * prefetch_distance - prefetch in the source image by that many - * pixels ahead - */ - -.macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \ - src_bpp_shift, dst_bpp_shift, \ - prefetch_distance, flags - -pixman_asm_function fname - OUT .req r0 - TOP .req r1 - BOTTOM .req r2 - WT .req r3 - WB .req r4 - X .req r5 - UX .req r6 - WIDTH .req ip - TMP1 .req r3 - TMP2 .req r4 - PF_OFFS .req r7 - TMP3 .req r8 - TMP4 .req r9 - STRIDE .req r2 - - mov ip, sp - push {r4, r5, r6, r7, r8, r9} - mov PF_OFFS, #prefetch_distance - ldmia ip, {WB, X, UX, WIDTH} - mul PF_OFFS, PF_OFFS, UX - -.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0 - vpush {d8-d15} -.endif - - sub STRIDE, BOTTOM, TOP - .unreq BOTTOM - - cmp WIDTH, #0 - ble 3f - - vdup.u16 q12, X - vdup.u16 q13, UX - vdup.u8 d28, WT - vdup.u8 d29, WB - vadd.u16 d25, d25, d26 - - /* ensure good destination alignment */ - cmp WIDTH, #1 - blt 0f - tst OUT, #(1 << dst_bpp_shift) - beq 0f - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vadd.u16 q12, q12, q13 - bilinear_interpolate_last_pixel src_fmt, dst_fmt - sub WIDTH, WIDTH, #1 -0: - vadd.u16 q13, q13, q13 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vadd.u16 q12, q12, q13 - - cmp WIDTH, #2 - blt 0f - tst OUT, #(1 << (dst_bpp_shift + 1)) - beq 0f - bilinear_interpolate_two_pixels src_fmt, dst_fmt - sub WIDTH, WIDTH, #2 -0: -.if ((flags) & BILINEAR_FLAG_UNROLL_8) != 0 -/*********** 8 pixels per iteration *****************/ - cmp WIDTH, #4 - blt 0f - tst OUT, #(1 << (dst_bpp_shift + 2)) - beq 0f - bilinear_interpolate_four_pixels src_fmt, dst_fmt - sub WIDTH, WIDTH, #4 -0: - subs WIDTH, WIDTH, #8 - blt 1f - mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift) - bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt - subs WIDTH, WIDTH, #8 - blt 5f -0: - bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt - subs WIDTH, WIDTH, #8 - bge 0b -5: - bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt -1: - tst WIDTH, #4 - beq 2f - bilinear_interpolate_four_pixels src_fmt, dst_fmt -2: -.else -/*********** 4 pixels per iteration *****************/ - subs WIDTH, WIDTH, #4 - blt 1f - mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift) - bilinear_interpolate_four_pixels_head src_fmt, dst_fmt - subs WIDTH, WIDTH, #4 - blt 5f -0: - bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt - subs WIDTH, WIDTH, #4 - bge 0b -5: - bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt -1: -/****************************************************/ -.endif - /* handle the remaining trailing pixels */ - tst WIDTH, #2 - beq 2f - bilinear_interpolate_two_pixels src_fmt, dst_fmt -2: - tst WIDTH, #1 - beq 3f - bilinear_interpolate_last_pixel src_fmt, dst_fmt -3: -.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0 - vpop {d8-d15} -.endif - pop {r4, r5, r6, r7, r8, r9} - bx lr - - .unreq OUT - .unreq TOP - .unreq WT - .unreq WB - .unreq X - .unreq UX - .unreq WIDTH - .unreq TMP1 - .unreq TMP2 - .unreq PF_OFFS - .unreq TMP3 - .unreq TMP4 - .unreq STRIDE -.endfunc - -.endm - -/*****************************************************************************/ - -.set have_bilinear_interpolate_four_pixels_8888_8888, 1 - -.macro bilinear_interpolate_four_pixels_8888_8888_head - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - mov TMP2, X, asr #16 - add X, X, UX - add TMP2, TOP, TMP2, asl #2 - - vld1.32 {d22}, [TMP1], STRIDE - vld1.32 {d23}, [TMP1] - mov TMP3, X, asr #16 - add X, X, UX - add TMP3, TOP, TMP3, asl #2 - vmull.u8 q8, d22, d28 - vmlal.u8 q8, d23, d29 - - vld1.32 {d22}, [TMP2], STRIDE - vld1.32 {d23}, [TMP2] - mov TMP4, X, asr #16 - add X, X, UX - add TMP4, TOP, TMP4, asl #2 - vmull.u8 q9, d22, d28 - vmlal.u8 q9, d23, d29 - - vld1.32 {d22}, [TMP3], STRIDE - vld1.32 {d23}, [TMP3] - vmull.u8 q10, d22, d28 - vmlal.u8 q10, d23, d29 - - vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q0, d16, d30 - vmlal.u16 q0, d17, d30 - - pld [TMP4, PF_OFFS] - vld1.32 {d16}, [TMP4], STRIDE - vld1.32 {d17}, [TMP4] - pld [TMP4, PF_OFFS] - vmull.u8 q11, d16, d28 - vmlal.u8 q11, d17, d29 - - vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q1, d18, d31 -.endm - -.macro bilinear_interpolate_four_pixels_8888_8888_tail - vmlal.u16 q1, d19, d31 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q2, d20, d30 - vmlal.u16 q2, d21, d30 - vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q3, d22, d31 - vmlal.u16 q3, d23, d31 - vadd.u16 q12, q12, q13 - vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) - vmovn.u16 d6, q0 - vmovn.u16 d7, q2 - vadd.u16 q12, q12, q13 - vst1.32 {d6, d7}, [OUT, :128]! -.endm - -.macro bilinear_interpolate_four_pixels_8888_8888_tail_head - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - mov TMP2, X, asr #16 - add X, X, UX - add TMP2, TOP, TMP2, asl #2 - vmlal.u16 q1, d19, d31 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q2, d20, d30 - vmlal.u16 q2, d21, d30 - vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS - vld1.32 {d20}, [TMP1], STRIDE - vmlsl.u16 q3, d22, d31 - vmlal.u16 q3, d23, d31 - vld1.32 {d21}, [TMP1] - vmull.u8 q8, d20, d28 - vmlal.u8 q8, d21, d29 - vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) - vld1.32 {d22}, [TMP2], STRIDE - vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) - vadd.u16 q12, q12, q13 - vld1.32 {d23}, [TMP2] - vmull.u8 q9, d22, d28 - mov TMP3, X, asr #16 - add X, X, UX - add TMP3, TOP, TMP3, asl #2 - mov TMP4, X, asr #16 - add X, X, UX - add TMP4, TOP, TMP4, asl #2 - vmlal.u8 q9, d23, d29 - vld1.32 {d22}, [TMP3], STRIDE - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vld1.32 {d23}, [TMP3] - vmull.u8 q10, d22, d28 - vmlal.u8 q10, d23, d29 - vmovn.u16 d6, q0 - vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS - vmovn.u16 d7, q2 - vmlsl.u16 q0, d16, d30 - vmlal.u16 q0, d17, d30 - pld [TMP4, PF_OFFS] - vld1.32 {d16}, [TMP4], STRIDE - vadd.u16 q12, q12, q13 - vld1.32 {d17}, [TMP4] - pld [TMP4, PF_OFFS] - vmull.u8 q11, d16, d28 - vmlal.u8 q11, d17, d29 - vst1.32 {d6, d7}, [OUT, :128]! - vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q1, d18, d31 -.endm - -/*****************************************************************************/ - -.set have_bilinear_interpolate_eight_pixels_8888_0565, 1 - -.macro bilinear_interpolate_eight_pixels_8888_0565_head - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - mov TMP2, X, asr #16 - add X, X, UX - add TMP2, TOP, TMP2, asl #2 - vld1.32 {d20}, [TMP1], STRIDE - vld1.32 {d21}, [TMP1] - vmull.u8 q8, d20, d28 - vmlal.u8 q8, d21, d29 - vld1.32 {d22}, [TMP2], STRIDE - vld1.32 {d23}, [TMP2] - vmull.u8 q9, d22, d28 - mov TMP3, X, asr #16 - add X, X, UX - add TMP3, TOP, TMP3, asl #2 - mov TMP4, X, asr #16 - add X, X, UX - add TMP4, TOP, TMP4, asl #2 - vmlal.u8 q9, d23, d29 - vld1.32 {d22}, [TMP3], STRIDE - vld1.32 {d23}, [TMP3] - vmull.u8 q10, d22, d28 - vmlal.u8 q10, d23, d29 - vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q0, d16, d30 - vmlal.u16 q0, d17, d30 - pld [TMP4, PF_OFFS] - vld1.32 {d16}, [TMP4], STRIDE - vld1.32 {d17}, [TMP4] - pld [TMP4, PF_OFFS] - vmull.u8 q11, d16, d28 - vmlal.u8 q11, d17, d29 - vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q1, d18, d31 - - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - mov TMP2, X, asr #16 - add X, X, UX - add TMP2, TOP, TMP2, asl #2 - vmlal.u16 q1, d19, d31 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q2, d20, d30 - vmlal.u16 q2, d21, d30 - vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS - vld1.32 {d20}, [TMP1], STRIDE - vmlsl.u16 q3, d22, d31 - vmlal.u16 q3, d23, d31 - vld1.32 {d21}, [TMP1] - vmull.u8 q8, d20, d28 - vmlal.u8 q8, d21, d29 - vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) - vld1.32 {d22}, [TMP2], STRIDE - vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) - vadd.u16 q12, q12, q13 - vld1.32 {d23}, [TMP2] - vmull.u8 q9, d22, d28 - mov TMP3, X, asr #16 - add X, X, UX - add TMP3, TOP, TMP3, asl #2 - mov TMP4, X, asr #16 - add X, X, UX - add TMP4, TOP, TMP4, asl #2 - vmlal.u8 q9, d23, d29 - vld1.32 {d22}, [TMP3], STRIDE - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vld1.32 {d23}, [TMP3] - vmull.u8 q10, d22, d28 - vmlal.u8 q10, d23, d29 - vmovn.u16 d8, q0 - vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS - vmovn.u16 d9, q2 - vmlsl.u16 q0, d16, d30 - vmlal.u16 q0, d17, d30 - pld [TMP4, PF_OFFS] - vld1.32 {d16}, [TMP4], STRIDE - vadd.u16 q12, q12, q13 - vld1.32 {d17}, [TMP4] - pld [TMP4, PF_OFFS] - vmull.u8 q11, d16, d28 - vmlal.u8 q11, d17, d29 - vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q1, d18, d31 -.endm - -.macro bilinear_interpolate_eight_pixels_8888_0565_tail - vmlal.u16 q1, d19, d31 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q2, d20, d30 - vmlal.u16 q2, d21, d30 - vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q3, d22, d31 - vmlal.u16 q3, d23, d31 - vadd.u16 q12, q12, q13 - vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) - vmovn.u16 d10, q0 - vmovn.u16 d11, q2 - vadd.u16 q12, q12, q13 - - vuzp.u8 d8, d9 - vuzp.u8 d10, d11 - vuzp.u8 d9, d11 - vuzp.u8 d8, d10 - vshll.u8 q6, d9, #8 - vshll.u8 q5, d10, #8 - vshll.u8 q7, d8, #8 - vsri.u16 q5, q6, #5 - vsri.u16 q5, q7, #11 - vst1.32 {d10, d11}, [OUT, :128]! -.endm - -.macro bilinear_interpolate_eight_pixels_8888_0565_tail_head - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - mov TMP2, X, asr #16 - add X, X, UX - add TMP2, TOP, TMP2, asl #2 - vmlal.u16 q1, d19, d31 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vuzp.u8 d8, d9 - vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q2, d20, d30 - vmlal.u16 q2, d21, d30 - vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS - vld1.32 {d20}, [TMP1], STRIDE - vmlsl.u16 q3, d22, d31 - vmlal.u16 q3, d23, d31 - vld1.32 {d21}, [TMP1] - vmull.u8 q8, d20, d28 - vmlal.u8 q8, d21, d29 - vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) - vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) - vld1.32 {d22}, [TMP2], STRIDE - vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) - vadd.u16 q12, q12, q13 - vld1.32 {d23}, [TMP2] - vmull.u8 q9, d22, d28 - mov TMP3, X, asr #16 - add X, X, UX - add TMP3, TOP, TMP3, asl #2 - mov TMP4, X, asr #16 - add X, X, UX - add TMP4, TOP, TMP4, asl #2 - vmlal.u8 q9, d23, d29 - vld1.32 {d22}, [TMP3], STRIDE - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vld1.32 {d23}, [TMP3] - vmull.u8 q10, d22, d28 - vmlal.u8 q10, d23, d29 - vmovn.u16 d10, q0 - vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS - vmovn.u16 d11, q2 - vmlsl.u16 q0, d16, d30 - vmlal.u16 q0, d17, d30 - pld [TMP4, PF_OFFS] - vld1.32 {d16}, [TMP4], STRIDE - vadd.u16 q12, q12, q13 - vld1.32 {d17}, [TMP4] - pld [TMP4, PF_OFFS] - vmull.u8 q11, d16, d28 - vmlal.u8 q11, d17, d29 - vuzp.u8 d10, d11 - vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS - vmlsl.u16 q1, d18, d31 - - mov TMP1, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - mov TMP2, X, asr #16 - add X, X, UX - add TMP2, TOP, TMP2, asl #2 - vmlal.u16 q1, d19, d31 - vuzp.u8 d9, d11 - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS - vuzp.u8 d8, d10 - vmlsl.u16 q2, d20, d30 - vmlal.u16 q2, d21, d30 - vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS - vld1.32 {d20}, [TMP1], STRIDE - vmlsl.u16 q3, d22, d31 - vmlal.u16 q3, d23, d31 - vld1.32 {d21}, [TMP1] - vmull.u8 q8, d20, d28 - vmlal.u8 q8, d21, d29 - vshll.u8 q6, d9, #8 - vshll.u8 q5, d10, #8 - vshll.u8 q7, d8, #8 - vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) - vsri.u16 q5, q6, #5 - vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) - vsri.u16 q5, q7, #11 - vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) - vld1.32 {d22}, [TMP2], STRIDE - vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) - vadd.u16 q12, q12, q13 - vld1.32 {d23}, [TMP2] - vmull.u8 q9, d22, d28 - mov TMP3, X, asr #16 - add X, X, UX - add TMP3, TOP, TMP3, asl #2 - mov TMP4, X, asr #16 - add X, X, UX - add TMP4, TOP, TMP4, asl #2 - vmlal.u8 q9, d23, d29 - vld1.32 {d22}, [TMP3], STRIDE - vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) - vld1.32 {d23}, [TMP3] - vmull.u8 q10, d22, d28 - vmlal.u8 q10, d23, d29 - vmovn.u16 d8, q0 - vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS - vmovn.u16 d9, q2 - vmlsl.u16 q0, d16, d30 - vmlal.u16 q0, d17, d30 - pld [TMP4, PF_OFFS] - vld1.32 {d16}, [TMP4], STRIDE - vadd.u16 q12, q12, q13 - vld1.32 {d17}, [TMP4] - pld [TMP4, PF_OFFS] - vmull.u8 q11, d16, d28 - vmlal.u8 q11, d17, d29 - vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS - vst1.32 {d10, d11}, [OUT, :128]! - vmlsl.u16 q1, d18, d31 -.endm -/*****************************************************************************/ - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, \ - 2, 2, 28, BILINEAR_FLAG_UNROLL_4 - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, \ - 2, 1, 28, BILINEAR_FLAG_UNROLL_8 | BILINEAR_FLAG_USE_ALL_NEON_REGS - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, \ - 1, 2, 28, BILINEAR_FLAG_UNROLL_4 - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, \ - 1, 1, 28, BILINEAR_FLAG_UNROLL_4 diff --git a/vendor/pixman/pixman/pixman-arm-neon-asm.h b/vendor/pixman/pixman/pixman-arm-neon-asm.h deleted file mode 100644 index bdcf6a9d4..000000000 --- a/vendor/pixman/pixman/pixman-arm-neon-asm.h +++ /dev/null @@ -1,1184 +0,0 @@ -/* - * Copyright © 2009 Nokia Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) - */ - -/* - * This file contains a macro ('generate_composite_function') which can - * construct 2D image processing functions, based on a common template. - * Any combinations of source, destination and mask images with 8bpp, - * 16bpp, 24bpp, 32bpp color formats are supported. - * - * This macro takes care of: - * - handling of leading and trailing unaligned pixels - * - doing most of the work related to L2 cache preload - * - encourages the use of software pipelining for better instructions - * scheduling - * - * The user of this macro has to provide some configuration parameters - * (bit depths for the images, prefetch distance, etc.) and a set of - * macros, which should implement basic code chunks responsible for - * pixels processing. See 'pixman-arm-neon-asm.S' file for the usage - * examples. - * - * TODO: - * - try overlapped pixel method (from Ian Rickards) when processing - * exactly two blocks of pixels - * - maybe add an option to do reverse scanline processing - */ - -/* - * Bit flags for 'generate_composite_function' macro which are used - * to tune generated functions behavior. - */ -.set FLAG_DST_WRITEONLY, 0 -.set FLAG_DST_READWRITE, 1 -.set FLAG_DEINTERLEAVE_32BPP, 2 - -/* - * Offset in stack where mask and source pointer/stride can be accessed - * from 'init' macro. This is useful for doing special handling for solid mask. - */ -.set ARGS_STACK_OFFSET, 40 - -/* - * Constants for selecting preferable prefetch type. - */ -.set PREFETCH_TYPE_NONE, 0 /* No prefetch at all */ -.set PREFETCH_TYPE_SIMPLE, 1 /* A simple, fixed-distance-ahead prefetch */ -.set PREFETCH_TYPE_ADVANCED, 2 /* Advanced fine-grained prefetch */ - -/* - * Definitions of supplementary pixld/pixst macros (for partial load/store of - * pixel data). - */ - -.macro pixldst1 op, elem_size, reg1, mem_operand, abits -.if abits > 0 - op&.&elem_size {d®1}, [&mem_operand&, :&abits&]! -.else - op&.&elem_size {d®1}, [&mem_operand&]! -.endif -.endm - -.macro pixldst2 op, elem_size, reg1, reg2, mem_operand, abits -.if abits > 0 - op&.&elem_size {d®1, d®2}, [&mem_operand&, :&abits&]! -.else - op&.&elem_size {d®1, d®2}, [&mem_operand&]! -.endif -.endm - -.macro pixldst4 op, elem_size, reg1, reg2, reg3, reg4, mem_operand, abits -.if abits > 0 - op&.&elem_size {d®1, d®2, d®3, d®4}, [&mem_operand&, :&abits&]! -.else - op&.&elem_size {d®1, d®2, d®3, d®4}, [&mem_operand&]! -.endif -.endm - -.macro pixldst0 op, elem_size, reg1, idx, mem_operand, abits - op&.&elem_size {d®1[idx]}, [&mem_operand&]! -.endm - -.macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand - op&.&elem_size {d®1, d®2, d®3}, [&mem_operand&]! -.endm - -.macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand - op&.&elem_size {d®1[idx], d®2[idx], d®3[idx]}, [&mem_operand&]! -.endm - -.macro pixldst numbytes, op, elem_size, basereg, mem_operand, abits -.if numbytes == 32 - pixldst4 op, elem_size, %(basereg+4), %(basereg+5), \ - %(basereg+6), %(basereg+7), mem_operand, abits -.elseif numbytes == 16 - pixldst2 op, elem_size, %(basereg+2), %(basereg+3), mem_operand, abits -.elseif numbytes == 8 - pixldst1 op, elem_size, %(basereg+1), mem_operand, abits -.elseif numbytes == 4 - .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 32) - pixldst0 op, 32, %(basereg+0), 1, mem_operand, abits - .elseif elem_size == 16 - pixldst0 op, 16, %(basereg+0), 2, mem_operand, abits - pixldst0 op, 16, %(basereg+0), 3, mem_operand, abits - .else - pixldst0 op, 8, %(basereg+0), 4, mem_operand, abits - pixldst0 op, 8, %(basereg+0), 5, mem_operand, abits - pixldst0 op, 8, %(basereg+0), 6, mem_operand, abits - pixldst0 op, 8, %(basereg+0), 7, mem_operand, abits - .endif -.elseif numbytes == 2 - .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 16) - pixldst0 op, 16, %(basereg+0), 1, mem_operand, abits - .else - pixldst0 op, 8, %(basereg+0), 2, mem_operand, abits - pixldst0 op, 8, %(basereg+0), 3, mem_operand, abits - .endif -.elseif numbytes == 1 - pixldst0 op, 8, %(basereg+0), 1, mem_operand, abits -.else - .error "unsupported size: numbytes" -.endif -.endm - -.macro pixld numpix, bpp, basereg, mem_operand, abits=0 -.if bpp > 0 -.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) - pixldst4 vld4, 8, %(basereg+4), %(basereg+5), \ - %(basereg+6), %(basereg+7), mem_operand, abits -.elseif (bpp == 24) && (numpix == 8) - pixldst3 vld3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand -.elseif (bpp == 24) && (numpix == 4) - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand -.elseif (bpp == 24) && (numpix == 2) - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand -.elseif (bpp == 24) && (numpix == 1) - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand -.else - pixldst %(numpix * bpp / 8), vld1, %(bpp), basereg, mem_operand, abits -.endif -.endif -.endm - -.macro pixst numpix, bpp, basereg, mem_operand, abits=0 -.if bpp > 0 -.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) - pixldst4 vst4, 8, %(basereg+4), %(basereg+5), \ - %(basereg+6), %(basereg+7), mem_operand, abits -.elseif (bpp == 24) && (numpix == 8) - pixldst3 vst3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand -.elseif (bpp == 24) && (numpix == 4) - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand -.elseif (bpp == 24) && (numpix == 2) - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand -.elseif (bpp == 24) && (numpix == 1) - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand -.else - pixldst %(numpix * bpp / 8), vst1, %(bpp), basereg, mem_operand, abits -.endif -.endif -.endm - -.macro pixld_a numpix, bpp, basereg, mem_operand -.if (bpp * numpix) <= 128 - pixld numpix, bpp, basereg, mem_operand, %(bpp * numpix) -.else - pixld numpix, bpp, basereg, mem_operand, 128 -.endif -.endm - -.macro pixst_a numpix, bpp, basereg, mem_operand -.if (bpp * numpix) <= 128 - pixst numpix, bpp, basereg, mem_operand, %(bpp * numpix) -.else - pixst numpix, bpp, basereg, mem_operand, 128 -.endif -.endm - -/* - * Pixel fetcher for nearest scaling (needs TMP1, TMP2, VX, UNIT_X register - * aliases to be defined) - */ -.macro pixld1_s elem_size, reg1, mem_operand -.if elem_size == 16 - mov TMP1, VX, asr #16 - adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED - bpl 5b - add TMP1, mem_operand, TMP1, asl #1 - mov TMP2, VX, asr #16 - adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED - bpl 5b - add TMP2, mem_operand, TMP2, asl #1 - vld1.16 {d®1&[0]}, [TMP1, :16] - mov TMP1, VX, asr #16 - adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED - bpl 5b - add TMP1, mem_operand, TMP1, asl #1 - vld1.16 {d®1&[1]}, [TMP2, :16] - mov TMP2, VX, asr #16 - adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED - bpl 5b - add TMP2, mem_operand, TMP2, asl #1 - vld1.16 {d®1&[2]}, [TMP1, :16] - vld1.16 {d®1&[3]}, [TMP2, :16] -.elseif elem_size == 32 - mov TMP1, VX, asr #16 - adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED - bpl 5b - add TMP1, mem_operand, TMP1, asl #2 - mov TMP2, VX, asr #16 - adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED - bpl 5b - add TMP2, mem_operand, TMP2, asl #2 - vld1.32 {d®1&[0]}, [TMP1, :32] - vld1.32 {d®1&[1]}, [TMP2, :32] -.else - .error "unsupported" -.endif -.endm - -.macro pixld2_s elem_size, reg1, reg2, mem_operand -.if 0 /* elem_size == 32 */ - mov TMP1, VX, asr #16 - add VX, VX, UNIT_X, asl #1 - add TMP1, mem_operand, TMP1, asl #2 - mov TMP2, VX, asr #16 - sub VX, VX, UNIT_X - add TMP2, mem_operand, TMP2, asl #2 - vld1.32 {d®1&[0]}, [TMP1, :32] - mov TMP1, VX, asr #16 - add VX, VX, UNIT_X, asl #1 - add TMP1, mem_operand, TMP1, asl #2 - vld1.32 {d®2&[0]}, [TMP2, :32] - mov TMP2, VX, asr #16 - add VX, VX, UNIT_X - add TMP2, mem_operand, TMP2, asl #2 - vld1.32 {d®1&[1]}, [TMP1, :32] - vld1.32 {d®2&[1]}, [TMP2, :32] -.else - pixld1_s elem_size, reg1, mem_operand - pixld1_s elem_size, reg2, mem_operand -.endif -.endm - -.macro pixld0_s elem_size, reg1, idx, mem_operand -.if elem_size == 16 - mov TMP1, VX, asr #16 - adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED - bpl 5b - add TMP1, mem_operand, TMP1, asl #1 - vld1.16 {d®1&[idx]}, [TMP1, :16] -.elseif elem_size == 32 - mov TMP1, VX, asr #16 - adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED - bpl 5b - add TMP1, mem_operand, TMP1, asl #2 - vld1.32 {d®1&[idx]}, [TMP1, :32] -.endif -.endm - -.macro pixld_s_internal numbytes, elem_size, basereg, mem_operand -.if numbytes == 32 - pixld2_s elem_size, %(basereg+4), %(basereg+5), mem_operand - pixld2_s elem_size, %(basereg+6), %(basereg+7), mem_operand - pixdeinterleave elem_size, %(basereg+4) -.elseif numbytes == 16 - pixld2_s elem_size, %(basereg+2), %(basereg+3), mem_operand -.elseif numbytes == 8 - pixld1_s elem_size, %(basereg+1), mem_operand -.elseif numbytes == 4 - .if elem_size == 32 - pixld0_s elem_size, %(basereg+0), 1, mem_operand - .elseif elem_size == 16 - pixld0_s elem_size, %(basereg+0), 2, mem_operand - pixld0_s elem_size, %(basereg+0), 3, mem_operand - .else - pixld0_s elem_size, %(basereg+0), 4, mem_operand - pixld0_s elem_size, %(basereg+0), 5, mem_operand - pixld0_s elem_size, %(basereg+0), 6, mem_operand - pixld0_s elem_size, %(basereg+0), 7, mem_operand - .endif -.elseif numbytes == 2 - .if elem_size == 16 - pixld0_s elem_size, %(basereg+0), 1, mem_operand - .else - pixld0_s elem_size, %(basereg+0), 2, mem_operand - pixld0_s elem_size, %(basereg+0), 3, mem_operand - .endif -.elseif numbytes == 1 - pixld0_s elem_size, %(basereg+0), 1, mem_operand -.else - .error "unsupported size: numbytes" -.endif -.endm - -.macro pixld_s numpix, bpp, basereg, mem_operand -.if bpp > 0 - pixld_s_internal %(numpix * bpp / 8), %(bpp), basereg, mem_operand -.endif -.endm - -.macro vuzp8 reg1, reg2 - vuzp.8 d®1, d®2 -.endm - -.macro vzip8 reg1, reg2 - vzip.8 d®1, d®2 -.endm - -/* deinterleave B, G, R, A channels for eight 32bpp pixels in 4 registers */ -.macro pixdeinterleave bpp, basereg -.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0) - vuzp8 %(basereg+0), %(basereg+1) - vuzp8 %(basereg+2), %(basereg+3) - vuzp8 %(basereg+1), %(basereg+3) - vuzp8 %(basereg+0), %(basereg+2) -.endif -.endm - -/* interleave B, G, R, A channels for eight 32bpp pixels in 4 registers */ -.macro pixinterleave bpp, basereg -.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0) - vzip8 %(basereg+0), %(basereg+2) - vzip8 %(basereg+1), %(basereg+3) - vzip8 %(basereg+2), %(basereg+3) - vzip8 %(basereg+0), %(basereg+1) -.endif -.endm - -/* - * This is a macro for implementing cache preload. The main idea is that - * cache preload logic is mostly independent from the rest of pixels - * processing code. It starts at the top left pixel and moves forward - * across pixels and can jump across scanlines. Prefetch distance is - * handled in an 'incremental' way: it starts from 0 and advances to the - * optimal distance over time. After reaching optimal prefetch distance, - * it is kept constant. There are some checks which prevent prefetching - * unneeded pixel lines below the image (but it still can prefetch a bit - * more data on the right side of the image - not a big issue and may - * be actually helpful when rendering text glyphs). Additional trick is - * the use of LDR instruction for prefetch instead of PLD when moving to - * the next line, the point is that we have a high chance of getting TLB - * miss in this case, and PLD would be useless. - * - * This sounds like it may introduce a noticeable overhead (when working with - * fully cached data). But in reality, due to having a separate pipeline and - * instruction queue for NEON unit in ARM Cortex-A8, normal ARM code can - * execute simultaneously with NEON and be completely shadowed by it. Thus - * we get no performance overhead at all (*). This looks like a very nice - * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher, - * but still can implement some rather advanced prefetch logic in software - * for almost zero cost! - * - * (*) The overhead of the prefetcher is visible when running some trivial - * pixels processing like simple copy. Anyway, having prefetch is a must - * when working with the graphics data. - */ -.macro PF a, x:vararg -.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_ADVANCED) - a x -.endif -.endm - -.macro cache_preload std_increment, boost_increment -.if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0) -.if regs_shortage - PF ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */ -.endif -.if std_increment != 0 - PF add PF_X, PF_X, #std_increment -.endif - PF tst PF_CTL, #0xF - PF addne PF_X, PF_X, #boost_increment - PF subne PF_CTL, PF_CTL, #1 - PF cmp PF_X, ORIG_W -.if src_bpp_shift >= 0 - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] -.endif -.if dst_r_bpp != 0 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] -.endif -.if mask_bpp_shift >= 0 - PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] -.endif - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 -.if src_bpp_shift >= 0 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! -.endif -.if dst_r_bpp != 0 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! -.endif -.if mask_bpp_shift >= 0 - PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! -.endif -.endif -.endm - -.macro cache_preload_simple -.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_SIMPLE) -.if src_bpp > 0 - pld [SRC, #(PREFETCH_DISTANCE_SIMPLE * src_bpp / 8)] -.endif -.if dst_r_bpp > 0 - pld [DST_R, #(PREFETCH_DISTANCE_SIMPLE * dst_r_bpp / 8)] -.endif -.if mask_bpp > 0 - pld [MASK, #(PREFETCH_DISTANCE_SIMPLE * mask_bpp / 8)] -.endif -.endif -.endm - -.macro fetch_mask_pixblock - pixld pixblock_size, mask_bpp, \ - (mask_basereg - pixblock_size * mask_bpp / 64), MASK -.endm - -/* - * Macro which is used to process leading pixels until destination - * pointer is properly aligned (at 16 bytes boundary). When destination - * buffer uses 16bpp format, this is unnecessary, or even pointless. - */ -.macro ensure_destination_ptr_alignment process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head -.if dst_w_bpp != 24 - tst DST_R, #0xF - beq 2f - -.irp lowbit, 1, 2, 4, 8, 16 -local skip1 -.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp)) -.if lowbit < 16 /* we don't need more than 16-byte alignment */ - tst DST_R, #lowbit - beq 1f -.endif - pixld_src (lowbit * 8 / dst_w_bpp), src_bpp, src_basereg, SRC - pixld (lowbit * 8 / dst_w_bpp), mask_bpp, mask_basereg, MASK -.if dst_r_bpp > 0 - pixld_a (lowbit * 8 / dst_r_bpp), dst_r_bpp, dst_r_basereg, DST_R -.else - add DST_R, DST_R, #lowbit -.endif - PF add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp) - sub W, W, #(lowbit * 8 / dst_w_bpp) -1: -.endif -.endr - pixdeinterleave src_bpp, src_basereg - pixdeinterleave mask_bpp, mask_basereg - pixdeinterleave dst_r_bpp, dst_r_basereg - - process_pixblock_head - cache_preload 0, pixblock_size - cache_preload_simple - process_pixblock_tail - - pixinterleave dst_w_bpp, dst_w_basereg -.irp lowbit, 1, 2, 4, 8, 16 -.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp)) -.if lowbit < 16 /* we don't need more than 16-byte alignment */ - tst DST_W, #lowbit - beq 1f -.endif - pixst_a (lowbit * 8 / dst_w_bpp), dst_w_bpp, dst_w_basereg, DST_W -1: -.endif -.endr -.endif -2: -.endm - -/* - * Special code for processing up to (pixblock_size - 1) remaining - * trailing pixels. As SIMD processing performs operation on - * pixblock_size pixels, anything smaller than this has to be loaded - * and stored in a special way. Loading and storing of pixel data is - * performed in such a way that we fill some 'slots' in the NEON - * registers (some slots naturally are unused), then perform compositing - * operation as usual. In the end, the data is taken from these 'slots' - * and saved to memory. - * - * cache_preload_flag - allows to suppress prefetch if - * set to 0 - * dst_aligned_flag - selects whether destination buffer - * is aligned - */ -.macro process_trailing_pixels cache_preload_flag, \ - dst_aligned_flag, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - tst W, #(pixblock_size - 1) - beq 2f -.irp chunk_size, 16, 8, 4, 2, 1 -.if pixblock_size > chunk_size - tst W, #chunk_size - beq 1f - pixld_src chunk_size, src_bpp, src_basereg, SRC - pixld chunk_size, mask_bpp, mask_basereg, MASK -.if dst_aligned_flag != 0 - pixld_a chunk_size, dst_r_bpp, dst_r_basereg, DST_R -.else - pixld chunk_size, dst_r_bpp, dst_r_basereg, DST_R -.endif -.if cache_preload_flag != 0 - PF add PF_X, PF_X, #chunk_size -.endif -1: -.endif -.endr - pixdeinterleave src_bpp, src_basereg - pixdeinterleave mask_bpp, mask_basereg - pixdeinterleave dst_r_bpp, dst_r_basereg - - process_pixblock_head -.if cache_preload_flag != 0 - cache_preload 0, pixblock_size - cache_preload_simple -.endif - process_pixblock_tail - pixinterleave dst_w_bpp, dst_w_basereg -.irp chunk_size, 16, 8, 4, 2, 1 -.if pixblock_size > chunk_size - tst W, #chunk_size - beq 1f -.if dst_aligned_flag != 0 - pixst_a chunk_size, dst_w_bpp, dst_w_basereg, DST_W -.else - pixst chunk_size, dst_w_bpp, dst_w_basereg, DST_W -.endif -1: -.endif -.endr -2: -.endm - -/* - * Macro, which performs all the needed operations to switch to the next - * scanline and start the next loop iteration unless all the scanlines - * are already processed. - */ -.macro advance_to_next_scanline start_of_loop_label -.if regs_shortage - ldrd W, [sp] /* load W and H (width and height) from stack */ -.else - mov W, ORIG_W -.endif - add DST_W, DST_W, DST_STRIDE, lsl #dst_bpp_shift -.if src_bpp != 0 - add SRC, SRC, SRC_STRIDE, lsl #src_bpp_shift -.endif -.if mask_bpp != 0 - add MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift -.endif -.if (dst_w_bpp != 24) - sub DST_W, DST_W, W, lsl #dst_bpp_shift -.endif -.if (src_bpp != 24) && (src_bpp != 0) - sub SRC, SRC, W, lsl #src_bpp_shift -.endif -.if (mask_bpp != 24) && (mask_bpp != 0) - sub MASK, MASK, W, lsl #mask_bpp_shift -.endif - subs H, H, #1 - mov DST_R, DST_W -.if regs_shortage - str H, [sp, #4] /* save updated height to stack */ -.endif - bge start_of_loop_label -.endm - -/* - * Registers are allocated in the following way by default: - * d0, d1, d2, d3 - reserved for loading source pixel data - * d4, d5, d6, d7 - reserved for loading destination pixel data - * d24, d25, d26, d27 - reserved for loading mask pixel data - * d28, d29, d30, d31 - final destination pixel data for writeback to memory - */ -.macro generate_composite_function fname, \ - src_bpp_, \ - mask_bpp_, \ - dst_w_bpp_, \ - flags, \ - pixblock_size_, \ - prefetch_distance, \ - init, \ - cleanup, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head, \ - dst_w_basereg_ = 28, \ - dst_r_basereg_ = 4, \ - src_basereg_ = 0, \ - mask_basereg_ = 24 - - pixman_asm_function fname - - push {r4-r12, lr} /* save all registers */ - -/* - * Select prefetch type for this function. If prefetch distance is - * set to 0 or one of the color formats is 24bpp, SIMPLE prefetch - * has to be used instead of ADVANCED. - */ - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_DEFAULT -.if prefetch_distance == 0 - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE -.elseif (PREFETCH_TYPE_CURRENT > PREFETCH_TYPE_SIMPLE) && \ - ((src_bpp_ == 24) || (mask_bpp_ == 24) || (dst_w_bpp_ == 24)) - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_SIMPLE -.endif - -/* - * Make some macro arguments globally visible and accessible - * from other macros - */ - .set src_bpp, src_bpp_ - .set mask_bpp, mask_bpp_ - .set dst_w_bpp, dst_w_bpp_ - .set pixblock_size, pixblock_size_ - .set dst_w_basereg, dst_w_basereg_ - .set dst_r_basereg, dst_r_basereg_ - .set src_basereg, src_basereg_ - .set mask_basereg, mask_basereg_ - - .macro pixld_src x:vararg - pixld x - .endm - .macro fetch_src_pixblock - pixld_src pixblock_size, src_bpp, \ - (src_basereg - pixblock_size * src_bpp / 64), SRC - .endm -/* - * Assign symbolic names to registers - */ - W .req r0 /* width (is updated during processing) */ - H .req r1 /* height (is updated during processing) */ - DST_W .req r2 /* destination buffer pointer for writes */ - DST_STRIDE .req r3 /* destination image stride */ - SRC .req r4 /* source buffer pointer */ - SRC_STRIDE .req r5 /* source image stride */ - DST_R .req r6 /* destination buffer pointer for reads */ - - MASK .req r7 /* mask pointer */ - MASK_STRIDE .req r8 /* mask stride */ - - PF_CTL .req r9 /* combined lines counter and prefetch */ - /* distance increment counter */ - PF_X .req r10 /* pixel index in a scanline for current */ - /* pretetch position */ - PF_SRC .req r11 /* pointer to source scanline start */ - /* for prefetch purposes */ - PF_DST .req r12 /* pointer to destination scanline start */ - /* for prefetch purposes */ - PF_MASK .req r14 /* pointer to mask scanline start */ - /* for prefetch purposes */ -/* - * Check whether we have enough registers for all the local variables. - * If we don't have enough registers, original width and height are - * kept on top of stack (and 'regs_shortage' variable is set to indicate - * this for the rest of code). Even if there are enough registers, the - * allocation scheme may be a bit different depending on whether source - * or mask is not used. - */ -.if (PREFETCH_TYPE_CURRENT < PREFETCH_TYPE_ADVANCED) - ORIG_W .req r10 /* saved original width */ - DUMMY .req r12 /* temporary register */ - .set regs_shortage, 0 -.elseif mask_bpp == 0 - ORIG_W .req r7 /* saved original width */ - DUMMY .req r8 /* temporary register */ - .set regs_shortage, 0 -.elseif src_bpp == 0 - ORIG_W .req r4 /* saved original width */ - DUMMY .req r5 /* temporary register */ - .set regs_shortage, 0 -.else - ORIG_W .req r1 /* saved original width */ - DUMMY .req r1 /* temporary register */ - .set regs_shortage, 1 -.endif - - .set mask_bpp_shift, -1 -.if src_bpp == 32 - .set src_bpp_shift, 2 -.elseif src_bpp == 24 - .set src_bpp_shift, 0 -.elseif src_bpp == 16 - .set src_bpp_shift, 1 -.elseif src_bpp == 8 - .set src_bpp_shift, 0 -.elseif src_bpp == 0 - .set src_bpp_shift, -1 -.else - .error "requested src bpp (src_bpp) is not supported" -.endif -.if mask_bpp == 32 - .set mask_bpp_shift, 2 -.elseif mask_bpp == 24 - .set mask_bpp_shift, 0 -.elseif mask_bpp == 8 - .set mask_bpp_shift, 0 -.elseif mask_bpp == 0 - .set mask_bpp_shift, -1 -.else - .error "requested mask bpp (mask_bpp) is not supported" -.endif -.if dst_w_bpp == 32 - .set dst_bpp_shift, 2 -.elseif dst_w_bpp == 24 - .set dst_bpp_shift, 0 -.elseif dst_w_bpp == 16 - .set dst_bpp_shift, 1 -.elseif dst_w_bpp == 8 - .set dst_bpp_shift, 0 -.else - .error "requested dst bpp (dst_w_bpp) is not supported" -.endif - -.if (((flags) & FLAG_DST_READWRITE) != 0) - .set dst_r_bpp, dst_w_bpp -.else - .set dst_r_bpp, 0 -.endif -.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0) - .set DEINTERLEAVE_32BPP_ENABLED, 1 -.else - .set DEINTERLEAVE_32BPP_ENABLED, 0 -.endif - -.if prefetch_distance < 0 || prefetch_distance > 15 - .error "invalid prefetch distance (prefetch_distance)" -.endif - -.if src_bpp > 0 - ldr SRC, [sp, #40] -.endif -.if mask_bpp > 0 - ldr MASK, [sp, #48] -.endif - PF mov PF_X, #0 -.if src_bpp > 0 - ldr SRC_STRIDE, [sp, #44] -.endif -.if mask_bpp > 0 - ldr MASK_STRIDE, [sp, #52] -.endif - mov DST_R, DST_W - -.if src_bpp == 24 - sub SRC_STRIDE, SRC_STRIDE, W - sub SRC_STRIDE, SRC_STRIDE, W, lsl #1 -.endif -.if mask_bpp == 24 - sub MASK_STRIDE, MASK_STRIDE, W - sub MASK_STRIDE, MASK_STRIDE, W, lsl #1 -.endif -.if dst_w_bpp == 24 - sub DST_STRIDE, DST_STRIDE, W - sub DST_STRIDE, DST_STRIDE, W, lsl #1 -.endif - -/* - * Setup advanced prefetcher initial state - */ - PF mov PF_SRC, SRC - PF mov PF_DST, DST_R - PF mov PF_MASK, MASK - /* PF_CTL = prefetch_distance | ((h - 1) << 4) */ - PF mov PF_CTL, H, lsl #4 - PF add PF_CTL, #(prefetch_distance - 0x10) - - init -.if regs_shortage - push {r0, r1} -.endif - subs H, H, #1 -.if regs_shortage - str H, [sp, #4] /* save updated height to stack */ -.else - mov ORIG_W, W -.endif - blt 9f - cmp W, #(pixblock_size * 2) - blt 8f -/* - * This is the start of the pipelined loop, which if optimized for - * long scanlines - */ -0: - ensure_destination_ptr_alignment process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - - /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */ - pixld_a pixblock_size, dst_r_bpp, \ - (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R - fetch_src_pixblock - pixld pixblock_size, mask_bpp, \ - (mask_basereg - pixblock_size * mask_bpp / 64), MASK - PF add PF_X, PF_X, #pixblock_size - process_pixblock_head - cache_preload 0, pixblock_size - cache_preload_simple - subs W, W, #(pixblock_size * 2) - blt 2f -1: - process_pixblock_tail_head - cache_preload_simple - subs W, W, #pixblock_size - bge 1b -2: - process_pixblock_tail - pixst_a pixblock_size, dst_w_bpp, \ - (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W - - /* Process the remaining trailing pixels in the scanline */ - process_trailing_pixels 1, 1, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - advance_to_next_scanline 0b - -.if regs_shortage - pop {r0, r1} -.endif - cleanup - pop {r4-r12, pc} /* exit */ -/* - * This is the start of the loop, designed to process images with small width - * (less than pixblock_size * 2 pixels). In this case neither pipelining - * nor prefetch are used. - */ -8: - /* Process exactly pixblock_size pixels if needed */ - tst W, #pixblock_size - beq 1f - pixld pixblock_size, dst_r_bpp, \ - (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R - fetch_src_pixblock - pixld pixblock_size, mask_bpp, \ - (mask_basereg - pixblock_size * mask_bpp / 64), MASK - process_pixblock_head - process_pixblock_tail - pixst pixblock_size, dst_w_bpp, \ - (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W -1: - /* Process the remaining trailing pixels in the scanline */ - process_trailing_pixels 0, 0, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - advance_to_next_scanline 8b -9: -.if regs_shortage - pop {r0, r1} -.endif - cleanup - pop {r4-r12, pc} /* exit */ - - .purgem fetch_src_pixblock - .purgem pixld_src - - .unreq SRC - .unreq MASK - .unreq DST_R - .unreq DST_W - .unreq ORIG_W - .unreq W - .unreq H - .unreq SRC_STRIDE - .unreq DST_STRIDE - .unreq MASK_STRIDE - .unreq PF_CTL - .unreq PF_X - .unreq PF_SRC - .unreq PF_DST - .unreq PF_MASK - .unreq DUMMY - .endfunc -.endm - -/* - * A simplified variant of function generation template for a single - * scanline processing (for implementing pixman combine functions) - */ -.macro generate_composite_function_scanline use_nearest_scaling, \ - fname, \ - src_bpp_, \ - mask_bpp_, \ - dst_w_bpp_, \ - flags, \ - pixblock_size_, \ - init, \ - cleanup, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head, \ - dst_w_basereg_ = 28, \ - dst_r_basereg_ = 4, \ - src_basereg_ = 0, \ - mask_basereg_ = 24 - - pixman_asm_function fname - - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE -/* - * Make some macro arguments globally visible and accessible - * from other macros - */ - .set src_bpp, src_bpp_ - .set mask_bpp, mask_bpp_ - .set dst_w_bpp, dst_w_bpp_ - .set pixblock_size, pixblock_size_ - .set dst_w_basereg, dst_w_basereg_ - .set dst_r_basereg, dst_r_basereg_ - .set src_basereg, src_basereg_ - .set mask_basereg, mask_basereg_ - -.if use_nearest_scaling != 0 - /* - * Assign symbolic names to registers for nearest scaling - */ - W .req r0 - DST_W .req r1 - SRC .req r2 - VX .req r3 - UNIT_X .req ip - MASK .req lr - TMP1 .req r4 - TMP2 .req r5 - DST_R .req r6 - SRC_WIDTH_FIXED .req r7 - - .macro pixld_src x:vararg - pixld_s x - .endm - - ldr UNIT_X, [sp] - push {r4-r8, lr} - ldr SRC_WIDTH_FIXED, [sp, #(24 + 4)] - .if mask_bpp != 0 - ldr MASK, [sp, #(24 + 8)] - .endif -.else - /* - * Assign symbolic names to registers - */ - W .req r0 /* width (is updated during processing) */ - DST_W .req r1 /* destination buffer pointer for writes */ - SRC .req r2 /* source buffer pointer */ - DST_R .req ip /* destination buffer pointer for reads */ - MASK .req r3 /* mask pointer */ - - .macro pixld_src x:vararg - pixld x - .endm -.endif - -.if (((flags) & FLAG_DST_READWRITE) != 0) - .set dst_r_bpp, dst_w_bpp -.else - .set dst_r_bpp, 0 -.endif -.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0) - .set DEINTERLEAVE_32BPP_ENABLED, 1 -.else - .set DEINTERLEAVE_32BPP_ENABLED, 0 -.endif - - .macro fetch_src_pixblock - pixld_src pixblock_size, src_bpp, \ - (src_basereg - pixblock_size * src_bpp / 64), SRC - .endm - - init - mov DST_R, DST_W - - cmp W, #pixblock_size - blt 8f - - ensure_destination_ptr_alignment process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - - subs W, W, #pixblock_size - blt 7f - - /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */ - pixld_a pixblock_size, dst_r_bpp, \ - (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R - fetch_src_pixblock - pixld pixblock_size, mask_bpp, \ - (mask_basereg - pixblock_size * mask_bpp / 64), MASK - process_pixblock_head - subs W, W, #pixblock_size - blt 2f -1: - process_pixblock_tail_head - subs W, W, #pixblock_size - bge 1b -2: - process_pixblock_tail - pixst_a pixblock_size, dst_w_bpp, \ - (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W -7: - /* Process the remaining trailing pixels in the scanline (dst aligned) */ - process_trailing_pixels 0, 1, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - - cleanup -.if use_nearest_scaling != 0 - pop {r4-r8, pc} /* exit */ -.else - bx lr /* exit */ -.endif -8: - /* Process the remaining trailing pixels in the scanline (dst unaligned) */ - process_trailing_pixels 0, 0, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - - cleanup - -.if use_nearest_scaling != 0 - pop {r4-r8, pc} /* exit */ - - .unreq DST_R - .unreq SRC - .unreq W - .unreq VX - .unreq UNIT_X - .unreq TMP1 - .unreq TMP2 - .unreq DST_W - .unreq MASK - .unreq SRC_WIDTH_FIXED - -.else - bx lr /* exit */ - - .unreq SRC - .unreq MASK - .unreq DST_R - .unreq DST_W - .unreq W -.endif - - .purgem fetch_src_pixblock - .purgem pixld_src - - .endfunc -.endm - -.macro generate_composite_function_single_scanline x:vararg - generate_composite_function_scanline 0, x -.endm - -.macro generate_composite_function_nearest_scanline x:vararg - generate_composite_function_scanline 1, x -.endm - -/* Default prologue/epilogue, nothing special needs to be done */ - -.macro default_init -.endm - -.macro default_cleanup -.endm - -/* - * Prologue/epilogue variant which additionally saves/restores d8-d15 - * registers (they need to be saved/restored by callee according to ABI). - * This is required if the code needs to use all the NEON registers. - */ - -.macro default_init_need_all_regs - vpush {d8-d15} -.endm - -.macro default_cleanup_need_all_regs - vpop {d8-d15} -.endm - -/******************************************************************************/ - -/* - * Conversion of 8 r5g6b6 pixels packed in 128-bit register (in) - * into a planar a8r8g8b8 format (with a, r, g, b color components - * stored into 64-bit registers out_a, out_r, out_g, out_b respectively). - * - * Warning: the conversion is destructive and the original - * value (in) is lost. - */ -.macro convert_0565_to_8888 in, out_a, out_r, out_g, out_b - vshrn.u16 out_r, in, #8 - vshrn.u16 out_g, in, #3 - vsli.u16 in, in, #5 - vmov.u8 out_a, #255 - vsri.u8 out_r, out_r, #5 - vsri.u8 out_g, out_g, #6 - vshrn.u16 out_b, in, #2 -.endm - -.macro convert_0565_to_x888 in, out_r, out_g, out_b - vshrn.u16 out_r, in, #8 - vshrn.u16 out_g, in, #3 - vsli.u16 in, in, #5 - vsri.u8 out_r, out_r, #5 - vsri.u8 out_g, out_g, #6 - vshrn.u16 out_b, in, #2 -.endm - -/* - * Conversion from planar a8r8g8b8 format (with a, r, g, b color components - * in 64-bit registers in_a, in_r, in_g, in_b respectively) into 8 r5g6b6 - * pixels packed in 128-bit register (out). Requires two temporary 128-bit - * registers (tmp1, tmp2) - */ -.macro convert_8888_to_0565 in_r, in_g, in_b, out, tmp1, tmp2 - vshll.u8 tmp1, in_g, #8 - vshll.u8 out, in_r, #8 - vshll.u8 tmp2, in_b, #8 - vsri.u16 out, tmp1, #5 - vsri.u16 out, tmp2, #11 -.endm - -/* - * Conversion of four r5g6b5 pixels (in) to four x8r8g8b8 pixels - * returned in (out0, out1) registers pair. Requires one temporary - * 64-bit register (tmp). 'out1' and 'in' may overlap, the original - * value from 'in' is lost - */ -.macro convert_four_0565_to_x888_packed in, out0, out1, tmp - vshl.u16 out0, in, #5 /* G top 6 bits */ - vshl.u16 tmp, in, #11 /* B top 5 bits */ - vsri.u16 in, in, #5 /* R is ready in top bits */ - vsri.u16 out0, out0, #6 /* G is ready in top bits */ - vsri.u16 tmp, tmp, #5 /* B is ready in top bits */ - vshr.u16 out1, in, #8 /* R is in place */ - vsri.u16 out0, tmp, #8 /* G & B is in place */ - vzip.u16 out0, out1 /* everything is in place */ -.endm diff --git a/vendor/pixman/pixman/pixman-arm-neon.c b/vendor/pixman/pixman/pixman-arm-neon.c deleted file mode 100644 index 103f1c2db..000000000 --- a/vendor/pixman/pixman/pixman-arm-neon.c +++ /dev/null @@ -1,493 +0,0 @@ -/* - * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of ARM Ltd not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. ARM Ltd makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Ian Rickards (ian.rickards@arm.com) - * Author: Jonathan Morton (jonathan.morton@movial.com) - * Author: Markku Vire (markku.vire@movial.com) - * - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include "pixman-private.h" -#include "pixman-arm-common.h" - -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565, - uint16_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888, - uint8_t, 3, uint8_t, 3) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565, - uint32_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888, - uint16_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev, - uint8_t, 3, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev, - uint8_t, 3, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565, - uint32_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565, - uint8_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_8888, - uint8_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565, - uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888, - uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888, - uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8, - uint8_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565, - uint8_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888, - uint8_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_0565_ca, - uint32_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888, - uint8_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8888, - uint8_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8, - uint8_t, 1, uint8_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565, - uint32_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565, - uint16_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888, - uint32_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, - uint8_t, 1, uint8_t, 1, uint8_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565, - uint16_t, 1, uint8_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888, - uint32_t, 1, uint8_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888, - uint32_t, 1, uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888, - uint32_t, 1, uint8_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888, - uint32_t, 1, uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565, - uint32_t, 1, uint8_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565, - uint16_t, 1, uint8_t, 1, uint16_t, 1) - -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER, - uint32_t, uint32_t) -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER, - uint32_t, uint16_t) -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC, - uint32_t, uint16_t) -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC, - uint16_t, uint32_t) - -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565, - OVER, uint32_t, uint16_t) -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565, - OVER, uint16_t, uint16_t) - -PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC, - uint32_t, uint32_t) -PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC, - uint32_t, uint16_t) -PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC, - uint16_t, uint32_t) -PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC, - uint16_t, uint16_t) -PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, OVER, - uint32_t, uint32_t) -PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, ADD, - uint32_t, uint32_t) - -PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_8888, SRC, - uint32_t, uint32_t) -PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_0565, SRC, - uint32_t, uint16_t) -PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_x888, SRC, - uint16_t, uint32_t) -PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_0565, SRC, - uint16_t, uint16_t) -PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, OVER, - uint32_t, uint32_t) -PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, ADD, - uint32_t, uint32_t) - -void -pixman_composite_src_n_8_asm_neon (int32_t w, - int32_t h, - uint8_t *dst, - int32_t dst_stride, - uint8_t src); - -void -pixman_composite_src_n_0565_asm_neon (int32_t w, - int32_t h, - uint16_t *dst, - int32_t dst_stride, - uint16_t src); - -void -pixman_composite_src_n_8888_asm_neon (int32_t w, - int32_t h, - uint32_t *dst, - int32_t dst_stride, - uint32_t src); - -static pixman_bool_t -arm_neon_fill (pixman_implementation_t *imp, - uint32_t * bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t _xor) -{ - /* stride is always multiple of 32bit units in pixman */ - int32_t byte_stride = stride * sizeof(uint32_t); - - switch (bpp) - { - case 8: - pixman_composite_src_n_8_asm_neon ( - width, - height, - (uint8_t *)(((char *) bits) + y * byte_stride + x), - byte_stride, - _xor & 0xff); - return TRUE; - case 16: - pixman_composite_src_n_0565_asm_neon ( - width, - height, - (uint16_t *)(((char *) bits) + y * byte_stride + x * 2), - byte_stride / 2, - _xor & 0xffff); - return TRUE; - case 32: - pixman_composite_src_n_8888_asm_neon ( - width, - height, - (uint32_t *)(((char *) bits) + y * byte_stride + x * 4), - byte_stride / 4, - _xor); - return TRUE; - default: - return FALSE; - } -} - -static pixman_bool_t -arm_neon_blt (pixman_implementation_t *imp, - uint32_t * src_bits, - uint32_t * dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dest_x, - int dest_y, - int width, - int height) -{ - if (src_bpp != dst_bpp) - return FALSE; - - switch (src_bpp) - { - case 16: - pixman_composite_src_0565_0565_asm_neon ( - width, height, - (uint16_t *)(((char *) dst_bits) + - dest_y * dst_stride * 4 + dest_x * 2), dst_stride * 2, - (uint16_t *)(((char *) src_bits) + - src_y * src_stride * 4 + src_x * 2), src_stride * 2); - return TRUE; - case 32: - pixman_composite_src_8888_8888_asm_neon ( - width, height, - (uint32_t *)(((char *) dst_bits) + - dest_y * dst_stride * 4 + dest_x * 4), dst_stride, - (uint32_t *)(((char *) src_bits) + - src_y * src_stride * 4 + src_x * 4), src_stride); - return TRUE; - default: - return FALSE; - } -} - -static const pixman_fast_path_t arm_neon_fast_paths[] = -{ - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, neon_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, neon_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, neon_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, neon_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, neon_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888), - PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev), - PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev), - PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888), - PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, neon_composite_src_rpixbuf_8888), - PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, neon_composite_src_rpixbuf_8888), - PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, neon_composite_src_pixbuf_8888), - PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, neon_composite_src_n_8_8888), - PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, neon_composite_src_n_8_8888), - PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, neon_composite_src_n_8_8888), - PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, neon_composite_src_n_8_8888), - PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8, neon_composite_src_n_8_8), - - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, neon_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, neon_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, neon_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, neon_composite_over_n_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, neon_composite_over_n_8888), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, neon_composite_over_n_8888_0565_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, neon_composite_over_n_8888_0565_ca), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565), - PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, neon_composite_over_0565_n_0565), - PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, neon_composite_over_0565_n_0565), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, neon_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, r5g6b5, neon_composite_over_8888_8_0565), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, b5g6r5, neon_composite_over_8888_8_0565), - PIXMAN_STD_FAST_PATH (OVER, r5g6b5, a8, r5g6b5, neon_composite_over_0565_8_0565), - PIXMAN_STD_FAST_PATH (OVER, b5g6r5, a8, b5g6r5, neon_composite_over_0565_8_0565), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, x8r8g8b8, neon_composite_over_8888_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, neon_composite_over_8888_0565), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, neon_composite_over_8888_0565), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, neon_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, neon_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, neon_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, neon_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, neon_composite_add_n_8_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, neon_composite_add_n_8_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, neon_composite_add_n_8_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, neon_composite_add_n_8_8888), - PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8), - PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565), - PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565), - PIXMAN_STD_FAST_PATH (ADD, x8r8g8b8, a8, x8r8g8b8, neon_composite_add_8888_8_8888), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, x8r8g8b8, neon_composite_add_8888_8_8888), - PIXMAN_STD_FAST_PATH (ADD, x8b8g8r8, a8, x8b8g8r8, neon_composite_add_8888_8_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, x8b8g8r8, neon_composite_add_8888_8_8888), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888), - PIXMAN_STD_FAST_PATH (ADD, x8r8g8b8, a8r8g8b8, x8r8g8b8, neon_composite_add_8888_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, x8r8g8b8, neon_composite_add_8888_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, x8r8g8b8, solid, x8r8g8b8, neon_composite_add_8888_n_8888), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, x8r8g8b8, neon_composite_add_8888_n_8888), - PIXMAN_STD_FAST_PATH (ADD, x8b8g8r8, solid, x8b8g8r8, neon_composite_add_8888_n_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, x8b8g8r8, neon_composite_add_8888_n_8888), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, neon_composite_add_8888_n_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, neon_composite_add_8888_n_8888), - PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8), - PIXMAN_STD_FAST_PATH (ADD, x8r8g8b8, null, x8r8g8b8, neon_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, x8r8g8b8, neon_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, x8b8g8r8, null, x8b8g8r8, neon_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, x8b8g8r8, neon_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (IN, solid, null, a8, neon_composite_in_n_8), - PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888), - PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888), - PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565), - PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565), - PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, x8r8g8b8, neon_composite_out_reverse_8_8888), - PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8r8g8b8, neon_composite_out_reverse_8_8888), - PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, x8b8g8r8, neon_composite_out_reverse_8_8888), - PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8b8g8r8, neon_composite_out_reverse_8_8888), - - SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888), - SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888), - SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888), - SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888), - - SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565), - SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565), - - SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), - SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), - SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565), - SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565), - - SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888), - SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888), - /* Note: NONE repeat is not supported yet */ - SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), - SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), - SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), - SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), - - PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565), - PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565), - - PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565), - PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565), - - SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888), - - SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), - SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), - - SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888), - SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565), - - SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888), - - SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (ADD, x8r8g8b8, x8r8g8b8, neon_8888_8888), - - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8_8888), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8_8888), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8_8888), - - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_8_0565), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_8_0565), - - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8_x888), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_8_0565), - - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8_8888), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8_8888), - - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8_8888), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8_8888), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, x8r8g8b8, x8r8g8b8, neon_8888_8_8888), - - { PIXMAN_OP_NONE }, -}; - -#define BIND_COMBINE_U(name) \ -void \ -pixman_composite_scanline_##name##_mask_asm_neon (int32_t w, \ - const uint32_t *dst, \ - const uint32_t *src, \ - const uint32_t *mask); \ - \ -void \ -pixman_composite_scanline_##name##_asm_neon (int32_t w, \ - const uint32_t *dst, \ - const uint32_t *src); \ - \ -static void \ -neon_combine_##name##_u (pixman_implementation_t *imp, \ - pixman_op_t op, \ - uint32_t * dest, \ - const uint32_t * src, \ - const uint32_t * mask, \ - int width) \ -{ \ - if (mask) \ - pixman_composite_scanline_##name##_mask_asm_neon (width, dest, \ - src, mask); \ - else \ - pixman_composite_scanline_##name##_asm_neon (width, dest, src); \ -} - -BIND_COMBINE_U (over) -BIND_COMBINE_U (add) -BIND_COMBINE_U (out_reverse) - -pixman_implementation_t * -_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback) -{ - pixman_implementation_t *imp = - _pixman_implementation_create (fallback, arm_neon_fast_paths); - - imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u; - imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u; - imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u; - - imp->blt = arm_neon_blt; - imp->fill = arm_neon_fill; - - return imp; -} diff --git a/vendor/pixman/pixman/pixman-arm-simd-asm-scaled.S b/vendor/pixman/pixman/pixman-arm-simd-asm-scaled.S deleted file mode 100644 index e050292e0..000000000 --- a/vendor/pixman/pixman/pixman-arm-simd-asm-scaled.S +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright © 2008 Mozilla Corporation - * Copyright © 2010 Nokia Corporation - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Mozilla Corporation not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Mozilla Corporation makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Jeff Muizelaar (jeff@infidigm.net) - * - */ - -/* Prevent the stack from becoming executable */ -#if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits -#endif - - .text - .arch armv6 - .object_arch armv4 - .arm - .altmacro - .p2align 2 - -#include "pixman-arm-asm.h" - -/* - * Note: This code is only using armv5te instructions (not even armv6), - * but is scheduled for ARM Cortex-A8 pipeline. So it might need to - * be split into a few variants, tuned for each microarchitecture. - * - * TODO: In order to get good performance on ARM9/ARM11 cores (which don't - * have efficient write combining), it needs to be changed to use 16-byte - * aligned writes using STM instruction. - * - * Nearest scanline scaler macro template uses the following arguments: - * fname - name of the function to generate - * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes - * t - type suffix for LDR/STR instructions - * prefetch_distance - prefetch in the source image by that many - * pixels ahead - * prefetch_braking_distance - stop prefetching when that many pixels are - * remaining before the end of scanline - */ - -.macro generate_nearest_scanline_func fname, bpp_shift, t, \ - prefetch_distance, \ - prefetch_braking_distance - -pixman_asm_function fname - W .req r0 - DST .req r1 - SRC .req r2 - VX .req r3 - UNIT_X .req ip - TMP1 .req r4 - TMP2 .req r5 - VXMASK .req r6 - PF_OFFS .req r7 - SRC_WIDTH_FIXED .req r8 - - ldr UNIT_X, [sp] - push {r4, r5, r6, r7, r8, r10} - mvn VXMASK, #((1 << bpp_shift) - 1) - ldr SRC_WIDTH_FIXED, [sp, #28] - - /* define helper macro */ - .macro scale_2_pixels - ldr&t TMP1, [SRC, TMP1] - and TMP2, VXMASK, VX, asr #(16 - bpp_shift) - adds VX, VX, UNIT_X - str&t TMP1, [DST], #(1 << bpp_shift) -9: subpls VX, VX, SRC_WIDTH_FIXED - bpl 9b - - ldr&t TMP2, [SRC, TMP2] - and TMP1, VXMASK, VX, asr #(16 - bpp_shift) - adds VX, VX, UNIT_X - str&t TMP2, [DST], #(1 << bpp_shift) -9: subpls VX, VX, SRC_WIDTH_FIXED - bpl 9b - .endm - - /* now do the scaling */ - and TMP1, VXMASK, VX, asr #(16 - bpp_shift) - adds VX, VX, UNIT_X -9: subpls VX, VX, SRC_WIDTH_FIXED - bpl 9b - subs W, W, #(8 + prefetch_braking_distance) - blt 2f - /* calculate prefetch offset */ - mov PF_OFFS, #prefetch_distance - mla PF_OFFS, UNIT_X, PF_OFFS, VX -1: /* main loop, process 8 pixels per iteration with prefetch */ - pld [SRC, PF_OFFS, asr #(16 - bpp_shift)] - add PF_OFFS, UNIT_X, lsl #3 - scale_2_pixels - scale_2_pixels - scale_2_pixels - scale_2_pixels - subs W, W, #8 - bge 1b -2: - subs W, W, #(4 - 8 - prefetch_braking_distance) - blt 2f -1: /* process the remaining pixels */ - scale_2_pixels - scale_2_pixels - subs W, W, #4 - bge 1b -2: - tst W, #2 - beq 2f - scale_2_pixels -2: - tst W, #1 - ldrne&t TMP1, [SRC, TMP1] - strne&t TMP1, [DST] - /* cleanup helper macro */ - .purgem scale_2_pixels - .unreq DST - .unreq SRC - .unreq W - .unreq VX - .unreq UNIT_X - .unreq TMP1 - .unreq TMP2 - .unreq VXMASK - .unreq PF_OFFS - .unreq SRC_WIDTH_FIXED - /* return */ - pop {r4, r5, r6, r7, r8, r10} - bx lr -.endfunc -.endm - -generate_nearest_scanline_func \ - pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32 - -generate_nearest_scanline_func \ - pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32 diff --git a/vendor/pixman/pixman/pixman-arm-simd-asm.S b/vendor/pixman/pixman/pixman-arm-simd-asm.S deleted file mode 100644 index a74a0a8f3..000000000 --- a/vendor/pixman/pixman/pixman-arm-simd-asm.S +++ /dev/null @@ -1,1179 +0,0 @@ -/* - * Copyright © 2012 Raspberry Pi Foundation - * Copyright © 2012 RISC OS Open Ltd - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of the copyright holders not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. The copyright holders make no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Ben Avison (bavison@riscosopen.org) - * - */ - -/* Prevent the stack from becoming executable */ -#if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits -#endif - - .text - .arch armv6 - .object_arch armv4 - .arm - .altmacro - .p2align 2 - -#include "pixman-arm-asm.h" -#include "pixman-arm-simd-asm.h" - -/* A head macro should do all processing which results in an output of up to - * 16 bytes, as far as the final load instruction. The corresponding tail macro - * should complete the processing of the up-to-16 bytes. The calling macro will - * sometimes choose to insert a preload or a decrement of X between them. - * cond ARM condition code for code block - * numbytes Number of output bytes that should be generated this time - * firstreg First WK register in which to place output - * unaligned_src Whether to use non-wordaligned loads of source image - * unaligned_mask Whether to use non-wordaligned loads of mask image - * preload If outputting 16 bytes causes 64 bytes to be read, whether an extra preload should be output - */ - -.macro blit_init - line_saved_regs STRIDE_D, STRIDE_S -.endm - -.macro blit_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload - pixld cond, numbytes, firstreg, SRC, unaligned_src -.endm - -.macro blit_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment - WK4 .req STRIDE_D - WK5 .req STRIDE_S - WK6 .req MASK - WK7 .req STRIDE_M -110: pixld , 16, 0, SRC, unaligned_src - pixld , 16, 4, SRC, unaligned_src - pld [SRC, SCRATCH] - pixst , 16, 0, DST - pixst , 16, 4, DST - subs X, X, #32*8/src_bpp - bhs 110b - .unreq WK4 - .unreq WK5 - .unreq WK6 - .unreq WK7 -.endm - -generate_composite_function \ - pixman_composite_src_8888_8888_asm_armv6, 32, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \ - 4, /* prefetch distance */ \ - blit_init, \ - nop_macro, /* newline */ \ - nop_macro, /* cleanup */ \ - blit_process_head, \ - nop_macro, /* process tail */ \ - blit_inner_loop - -generate_composite_function \ - pixman_composite_src_0565_0565_asm_armv6, 16, 0, 16, \ - FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \ - 4, /* prefetch distance */ \ - blit_init, \ - nop_macro, /* newline */ \ - nop_macro, /* cleanup */ \ - blit_process_head, \ - nop_macro, /* process tail */ \ - blit_inner_loop - -generate_composite_function \ - pixman_composite_src_8_8_asm_armv6, 8, 0, 8, \ - FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \ - 3, /* prefetch distance */ \ - blit_init, \ - nop_macro, /* newline */ \ - nop_macro, /* cleanup */ \ - blit_process_head, \ - nop_macro, /* process tail */ \ - blit_inner_loop - -/******************************************************************************/ - -.macro src_n_8888_init - ldr SRC, [sp, #ARGS_STACK_OFFSET] - mov STRIDE_S, SRC - mov MASK, SRC - mov STRIDE_M, SRC -.endm - -.macro src_n_0565_init - ldrh SRC, [sp, #ARGS_STACK_OFFSET] - orr SRC, SRC, lsl #16 - mov STRIDE_S, SRC - mov MASK, SRC - mov STRIDE_M, SRC -.endm - -.macro src_n_8_init - ldrb SRC, [sp, #ARGS_STACK_OFFSET] - orr SRC, SRC, lsl #8 - orr SRC, SRC, lsl #16 - mov STRIDE_S, SRC - mov MASK, SRC - mov STRIDE_M, SRC -.endm - -.macro fill_process_tail cond, numbytes, firstreg - WK4 .req SRC - WK5 .req STRIDE_S - WK6 .req MASK - WK7 .req STRIDE_M - pixst cond, numbytes, 4, DST - .unreq WK4 - .unreq WK5 - .unreq WK6 - .unreq WK7 -.endm - -generate_composite_function \ - pixman_composite_src_n_8888_asm_armv6, 0, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \ - 0, /* prefetch distance doesn't apply */ \ - src_n_8888_init \ - nop_macro, /* newline */ \ - nop_macro /* cleanup */ \ - nop_macro /* process head */ \ - fill_process_tail - -generate_composite_function \ - pixman_composite_src_n_0565_asm_armv6, 0, 0, 16, \ - FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \ - 0, /* prefetch distance doesn't apply */ \ - src_n_0565_init \ - nop_macro, /* newline */ \ - nop_macro /* cleanup */ \ - nop_macro /* process head */ \ - fill_process_tail - -generate_composite_function \ - pixman_composite_src_n_8_asm_armv6, 0, 0, 8, \ - FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \ - 0, /* prefetch distance doesn't apply */ \ - src_n_8_init \ - nop_macro, /* newline */ \ - nop_macro /* cleanup */ \ - nop_macro /* process head */ \ - fill_process_tail - -/******************************************************************************/ - -.macro src_x888_8888_pixel, cond, reg - orr&cond WK®, WK®, #0xFF000000 -.endm - -.macro pixman_composite_src_x888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload - pixld cond, numbytes, firstreg, SRC, unaligned_src -.endm - -.macro pixman_composite_src_x888_8888_process_tail cond, numbytes, firstreg - src_x888_8888_pixel cond, %(firstreg+0) - .if numbytes >= 8 - src_x888_8888_pixel cond, %(firstreg+1) - .if numbytes == 16 - src_x888_8888_pixel cond, %(firstreg+2) - src_x888_8888_pixel cond, %(firstreg+3) - .endif - .endif -.endm - -generate_composite_function \ - pixman_composite_src_x888_8888_asm_armv6, 32, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_SCRATCH, \ - 3, /* prefetch distance */ \ - nop_macro, /* init */ \ - nop_macro, /* newline */ \ - nop_macro, /* cleanup */ \ - pixman_composite_src_x888_8888_process_head, \ - pixman_composite_src_x888_8888_process_tail - -/******************************************************************************/ - -.macro src_0565_8888_init - /* Hold loop invariants in MASK and STRIDE_M */ - ldr MASK, =0x07E007E0 - mov STRIDE_M, #0xFF000000 - /* Set GE[3:0] to 1010 so SEL instructions do what we want */ - ldr SCRATCH, =0x80008000 - uadd8 SCRATCH, SCRATCH, SCRATCH -.endm - -.macro src_0565_8888_2pixels, reg1, reg2 - and SCRATCH, WK®1, MASK @ 00000GGGGGG0000000000gggggg00000 - bic WK®2, WK®1, MASK @ RRRRR000000BBBBBrrrrr000000bbbbb - orr SCRATCH, SCRATCH, SCRATCH, lsr #6 @ 00000GGGGGGGGGGGG0000ggggggggggg - mov WK®1, WK®2, lsl #16 @ rrrrr000000bbbbb0000000000000000 - mov SCRATCH, SCRATCH, ror #19 @ GGGG0000ggggggggggg00000GGGGGGGG - bic WK®2, WK®2, WK®1, lsr #16 @ RRRRR000000BBBBB0000000000000000 - orr WK®1, WK®1, WK®1, lsr #5 @ rrrrrrrrrr0bbbbbbbbbb00000000000 - orr WK®2, WK®2, WK®2, lsr #5 @ RRRRRRRRRR0BBBBBBBBBB00000000000 - pkhtb WK®1, WK®1, WK®1, asr #5 @ rrrrrrrr--------bbbbbbbb-------- - sel WK®1, WK®1, SCRATCH @ rrrrrrrrggggggggbbbbbbbb-------- - mov SCRATCH, SCRATCH, ror #16 @ ggg00000GGGGGGGGGGGG0000gggggggg - pkhtb WK®2, WK®2, WK®2, asr #5 @ RRRRRRRR--------BBBBBBBB-------- - sel WK®2, WK®2, SCRATCH @ RRRRRRRRGGGGGGGGBBBBBBBB-------- - orr WK®1, STRIDE_M, WK®1, lsr #8 @ 11111111rrrrrrrrggggggggbbbbbbbb - orr WK®2, STRIDE_M, WK®2, lsr #8 @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB -.endm - -/* This version doesn't need STRIDE_M, but is one instruction longer. - It would however be preferable for an XRGB target, since we could knock off the last 2 instructions, but is that a common case? - and SCRATCH, WK®1, MASK @ 00000GGGGGG0000000000gggggg00000 - bic WK®1, WK®1, MASK @ RRRRR000000BBBBBrrrrr000000bbbbb - orr SCRATCH, SCRATCH, SCRATCH, lsr #6 @ 00000GGGGGGGGGGGG0000ggggggggggg - mov WK®2, WK®1, lsr #16 @ 0000000000000000RRRRR000000BBBBB - mov SCRATCH, SCRATCH, ror #27 @ GGGGGGGGGGGG0000ggggggggggg00000 - bic WK®1, WK®1, WK®2, lsl #16 @ 0000000000000000rrrrr000000bbbbb - mov WK®2, WK®2, lsl #3 @ 0000000000000RRRRR000000BBBBB000 - mov WK®1, WK®1, lsl #3 @ 0000000000000rrrrr000000bbbbb000 - orr WK®2, WK®2, WK®2, lsr #5 @ 0000000000000RRRRRRRRRR0BBBBBBBB - orr WK®1, WK®1, WK®1, lsr #5 @ 0000000000000rrrrrrrrrr0bbbbbbbb - pkhbt WK®2, WK®2, WK®2, lsl #5 @ --------RRRRRRRR--------BBBBBBBB - pkhbt WK®1, WK®1, WK®1, lsl #5 @ --------rrrrrrrr--------bbbbbbbb - sel WK®2, SCRATCH, WK®2 @ --------RRRRRRRRGGGGGGGGBBBBBBBB - sel WK®1, SCRATCH, WK®1 @ --------rrrrrrrrggggggggbbbbbbbb - orr WK®2, WK®2, #0xFF000000 @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB - orr WK®1, WK®1, #0xFF000000 @ 11111111rrrrrrrrggggggggbbbbbbbb -*/ - -.macro src_0565_8888_1pixel, reg - bic SCRATCH, WK®, MASK @ 0000000000000000rrrrr000000bbbbb - and WK®, WK®, MASK @ 000000000000000000000gggggg00000 - mov SCRATCH, SCRATCH, lsl #3 @ 0000000000000rrrrr000000bbbbb000 - mov WK®, WK®, lsl #5 @ 0000000000000000gggggg0000000000 - orr SCRATCH, SCRATCH, SCRATCH, lsr #5 @ 0000000000000rrrrrrrrrr0bbbbbbbb - orr WK®, WK®, WK®, lsr #6 @ 000000000000000gggggggggggg00000 - pkhbt SCRATCH, SCRATCH, SCRATCH, lsl #5 @ --------rrrrrrrr--------bbbbbbbb - sel WK®, WK®, SCRATCH @ --------rrrrrrrrggggggggbbbbbbbb - orr WK®, WK®, #0xFF000000 @ 11111111rrrrrrrrggggggggbbbbbbbb -.endm - -.macro src_0565_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload - .if numbytes == 16 - pixldst ld,, 8, firstreg, %(firstreg+2),,, SRC, unaligned_src - .elseif numbytes == 8 - pixld , 4, firstreg, SRC, unaligned_src - .elseif numbytes == 4 - pixld , 2, firstreg, SRC, unaligned_src - .endif -.endm - -.macro src_0565_8888_process_tail cond, numbytes, firstreg - .if numbytes == 16 - src_0565_8888_2pixels firstreg, %(firstreg+1) - src_0565_8888_2pixels %(firstreg+2), %(firstreg+3) - .elseif numbytes == 8 - src_0565_8888_2pixels firstreg, %(firstreg+1) - .else - src_0565_8888_1pixel firstreg - .endif -.endm - -generate_composite_function \ - pixman_composite_src_0565_8888_asm_armv6, 16, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER, \ - 3, /* prefetch distance */ \ - src_0565_8888_init, \ - nop_macro, /* newline */ \ - nop_macro, /* cleanup */ \ - src_0565_8888_process_head, \ - src_0565_8888_process_tail - -/******************************************************************************/ - -.macro src_x888_0565_init - /* Hold loop invariant in MASK */ - ldr MASK, =0x001F001F - line_saved_regs STRIDE_S, ORIG_W -.endm - -.macro src_x888_0565_1pixel s, d - and WK&d, MASK, WK&s, lsr #3 @ 00000000000rrrrr00000000000bbbbb - and STRIDE_S, WK&s, #0xFC00 @ 0000000000000000gggggg0000000000 - orr WK&d, WK&d, WK&d, lsr #5 @ 00000000000-----rrrrr000000bbbbb - orr WK&d, WK&d, STRIDE_S, lsr #5 @ 00000000000-----rrrrrggggggbbbbb - /* Top 16 bits are discarded during the following STRH */ -.endm - -.macro src_x888_0565_2pixels slo, shi, d, tmp - and SCRATCH, WK&shi, #0xFC00 @ 0000000000000000GGGGGG0000000000 - and WK&tmp, MASK, WK&shi, lsr #3 @ 00000000000RRRRR00000000000BBBBB - and WK&shi, MASK, WK&slo, lsr #3 @ 00000000000rrrrr00000000000bbbbb - orr WK&tmp, WK&tmp, WK&tmp, lsr #5 @ 00000000000-----RRRRR000000BBBBB - orr WK&tmp, WK&tmp, SCRATCH, lsr #5 @ 00000000000-----RRRRRGGGGGGBBBBB - and SCRATCH, WK&slo, #0xFC00 @ 0000000000000000gggggg0000000000 - orr WK&shi, WK&shi, WK&shi, lsr #5 @ 00000000000-----rrrrr000000bbbbb - orr WK&shi, WK&shi, SCRATCH, lsr #5 @ 00000000000-----rrrrrggggggbbbbb - pkhbt WK&d, WK&shi, WK&tmp, lsl #16 @ RRRRRGGGGGGBBBBBrrrrrggggggbbbbb -.endm - -.macro src_x888_0565_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload - WK4 .req STRIDE_S - WK5 .req STRIDE_M - WK6 .req WK3 - WK7 .req ORIG_W - .if numbytes == 16 - pixld , 16, 4, SRC, 0 - src_x888_0565_2pixels 4, 5, 0, 0 - pixld , 8, 4, SRC, 0 - src_x888_0565_2pixels 6, 7, 1, 1 - pixld , 8, 6, SRC, 0 - .else - pixld , numbytes*2, 4, SRC, 0 - .endif -.endm - -.macro src_x888_0565_process_tail cond, numbytes, firstreg - .if numbytes == 16 - src_x888_0565_2pixels 4, 5, 2, 2 - src_x888_0565_2pixels 6, 7, 3, 4 - .elseif numbytes == 8 - src_x888_0565_2pixels 4, 5, 1, 1 - src_x888_0565_2pixels 6, 7, 2, 2 - .elseif numbytes == 4 - src_x888_0565_2pixels 4, 5, 1, 1 - .else - src_x888_0565_1pixel 4, 1 - .endif - .if numbytes == 16 - pixst , numbytes, 0, DST - .else - pixst , numbytes, 1, DST - .endif - .unreq WK4 - .unreq WK5 - .unreq WK6 - .unreq WK7 -.endm - -generate_composite_function \ - pixman_composite_src_x888_0565_asm_armv6, 32, 0, 16, \ - FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \ - 3, /* prefetch distance */ \ - src_x888_0565_init, \ - nop_macro, /* newline */ \ - nop_macro, /* cleanup */ \ - src_x888_0565_process_head, \ - src_x888_0565_process_tail - -/******************************************************************************/ - -.macro add_8_8_8pixels cond, dst1, dst2 - uqadd8&cond WK&dst1, WK&dst1, MASK - uqadd8&cond WK&dst2, WK&dst2, STRIDE_M -.endm - -.macro add_8_8_4pixels cond, dst - uqadd8&cond WK&dst, WK&dst, MASK -.endm - -.macro add_8_8_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload - WK4 .req MASK - WK5 .req STRIDE_M - .if numbytes == 16 - pixld cond, 8, 4, SRC, unaligned_src - pixld cond, 16, firstreg, DST, 0 - add_8_8_8pixels cond, firstreg, %(firstreg+1) - pixld cond, 8, 4, SRC, unaligned_src - .else - pixld cond, numbytes, 4, SRC, unaligned_src - pixld cond, numbytes, firstreg, DST, 0 - .endif - .unreq WK4 - .unreq WK5 -.endm - -.macro add_8_8_process_tail cond, numbytes, firstreg - .if numbytes == 16 - add_8_8_8pixels cond, %(firstreg+2), %(firstreg+3) - .elseif numbytes == 8 - add_8_8_8pixels cond, firstreg, %(firstreg+1) - .else - add_8_8_4pixels cond, firstreg - .endif -.endm - -generate_composite_function \ - pixman_composite_add_8_8_asm_armv6, 8, 0, 8, \ - FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_PRESERVES_SCRATCH, \ - 2, /* prefetch distance */ \ - nop_macro, /* init */ \ - nop_macro, /* newline */ \ - nop_macro, /* cleanup */ \ - add_8_8_process_head, \ - add_8_8_process_tail - -/******************************************************************************/ - -.macro over_8888_8888_init - /* Hold loop invariant in MASK */ - ldr MASK, =0x00800080 - /* Set GE[3:0] to 0101 so SEL instructions do what we want */ - uadd8 SCRATCH, MASK, MASK - line_saved_regs STRIDE_D, STRIDE_S, ORIG_W -.endm - -.macro over_8888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload - WK4 .req STRIDE_D - WK5 .req STRIDE_S - WK6 .req STRIDE_M - WK7 .req ORIG_W - pixld , numbytes, %(4+firstreg), SRC, unaligned_src - pixld , numbytes, firstreg, DST, 0 - .unreq WK4 - .unreq WK5 - .unreq WK6 - .unreq WK7 -.endm - -.macro over_8888_8888_check_transparent numbytes, reg0, reg1, reg2, reg3 - /* Since these colours a premultiplied by alpha, only 0 indicates transparent (any other colour with 0 in the alpha byte is luminous) */ - teq WK®0, #0 - .if numbytes > 4 - teqeq WK®1, #0 - .if numbytes > 8 - teqeq WK®2, #0 - teqeq WK®3, #0 - .endif - .endif -.endm - -.macro over_8888_8888_prepare next - mov WK&next, WK&next, lsr #24 -.endm - -.macro over_8888_8888_1pixel src, dst, offset, next - /* src = destination component multiplier */ - rsb WK&src, WK&src, #255 - /* Split even/odd bytes of dst into SCRATCH/dst */ - uxtb16 SCRATCH, WK&dst - uxtb16 WK&dst, WK&dst, ror #8 - /* Multiply through, adding 0.5 to the upper byte of result for rounding */ - mla SCRATCH, SCRATCH, WK&src, MASK - mla WK&dst, WK&dst, WK&src, MASK - /* Where we would have had a stall between the result of the first MLA and the shifter input, - * reload the complete source pixel */ - ldr WK&src, [SRC, #offset] - /* Multiply by 257/256 to approximate 256/255 */ - uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8 - /* In this stall, start processing the next pixel */ - .if offset < -4 - mov WK&next, WK&next, lsr #24 - .endif - uxtab16 WK&dst, WK&dst, WK&dst, ror #8 - /* Recombine even/odd bytes of multiplied destination */ - mov SCRATCH, SCRATCH, ror #8 - sel WK&dst, SCRATCH, WK&dst - /* Saturated add of source to multiplied destination */ - uqadd8 WK&dst, WK&dst, WK&src -.endm - -.macro over_8888_8888_process_tail cond, numbytes, firstreg - WK4 .req STRIDE_D - WK5 .req STRIDE_S - WK6 .req STRIDE_M - WK7 .req ORIG_W - over_8888_8888_check_transparent numbytes, %(4+firstreg), %(5+firstreg), %(6+firstreg), %(7+firstreg) - beq 10f - over_8888_8888_prepare %(4+firstreg) - .set PROCESS_REG, firstreg - .set PROCESS_OFF, -numbytes - .rept numbytes / 4 - over_8888_8888_1pixel %(4+PROCESS_REG), %(0+PROCESS_REG), PROCESS_OFF, %(5+PROCESS_REG) - .set PROCESS_REG, PROCESS_REG+1 - .set PROCESS_OFF, PROCESS_OFF+4 - .endr - pixst , numbytes, firstreg, DST -10: - .unreq WK4 - .unreq WK5 - .unreq WK6 - .unreq WK7 -.endm - -generate_composite_function \ - pixman_composite_over_8888_8888_asm_armv6, 32, 0, 32 \ - FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \ - 2, /* prefetch distance */ \ - over_8888_8888_init, \ - nop_macro, /* newline */ \ - nop_macro, /* cleanup */ \ - over_8888_8888_process_head, \ - over_8888_8888_process_tail - -/******************************************************************************/ - -/* Multiply each byte of a word by a byte. - * Useful when there aren't any obvious ways to fill the stalls with other instructions. - * word Register containing 4 bytes - * byte Register containing byte multiplier (bits 8-31 must be 0) - * tmp Scratch register - * half Register containing the constant 0x00800080 - * GE[3:0] bits must contain 0101 - */ -.macro mul_8888_8 word, byte, tmp, half - /* Split even/odd bytes of word apart */ - uxtb16 tmp, word - uxtb16 word, word, ror #8 - /* Multiply bytes together with rounding, then by 257/256 */ - mla tmp, tmp, byte, half - mla word, word, byte, half /* 1 stall follows */ - uxtab16 tmp, tmp, tmp, ror #8 /* 1 stall follows */ - uxtab16 word, word, word, ror #8 - /* Recombine bytes */ - mov tmp, tmp, ror #8 - sel word, tmp, word -.endm - -/******************************************************************************/ - -.macro over_8888_n_8888_init - /* Mask is constant */ - ldr MASK, [sp, #ARGS_STACK_OFFSET+8] - /* Hold loop invariant in STRIDE_M */ - ldr STRIDE_M, =0x00800080 - /* We only want the alpha bits of the constant mask */ - mov MASK, MASK, lsr #24 - /* Set GE[3:0] to 0101 so SEL instructions do what we want */ - uadd8 SCRATCH, STRIDE_M, STRIDE_M - line_saved_regs Y, STRIDE_D, STRIDE_S, ORIG_W -.endm - -.macro over_8888_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload - WK4 .req Y - WK5 .req STRIDE_D - WK6 .req STRIDE_S - WK7 .req ORIG_W - pixld , numbytes, %(4+(firstreg%2)), SRC, unaligned_src - pixld , numbytes, firstreg, DST, 0 - .unreq WK4 - .unreq WK5 - .unreq WK6 - .unreq WK7 -.endm - -.macro over_8888_n_8888_1pixel src, dst - mul_8888_8 WK&src, MASK, SCRATCH, STRIDE_M - sub WK7, WK6, WK&src, lsr #24 - mul_8888_8 WK&dst, WK7, SCRATCH, STRIDE_M - uqadd8 WK&dst, WK&dst, WK&src -.endm - -.macro over_8888_n_8888_process_tail cond, numbytes, firstreg - WK4 .req Y - WK5 .req STRIDE_D - WK6 .req STRIDE_S - WK7 .req ORIG_W - over_8888_8888_check_transparent numbytes, %(4+(firstreg%2)), %(5+(firstreg%2)), %(6+firstreg), %(7+firstreg) - beq 10f - mov WK6, #255 - .set PROCESS_REG, firstreg - .rept numbytes / 4 - .if numbytes == 16 && PROCESS_REG == 2 - /* We're using WK6 and WK7 as temporaries, so half way through - * 4 pixels, reload the second two source pixels but this time - * into WK4 and WK5 */ - ldmdb SRC, {WK4, WK5} - .endif - over_8888_n_8888_1pixel %(4+(PROCESS_REG%2)), %(PROCESS_REG) - .set PROCESS_REG, PROCESS_REG+1 - .endr - pixst , numbytes, firstreg, DST -10: - .unreq WK4 - .unreq WK5 - .unreq WK6 - .unreq WK7 -.endm - -generate_composite_function \ - pixman_composite_over_8888_n_8888_asm_armv6, 32, 0, 32 \ - FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \ - 2, /* prefetch distance */ \ - over_8888_n_8888_init, \ - nop_macro, /* newline */ \ - nop_macro, /* cleanup */ \ - over_8888_n_8888_process_head, \ - over_8888_n_8888_process_tail - -/******************************************************************************/ - -.macro over_n_8_8888_init - /* Source is constant, but splitting it into even/odd bytes is a loop invariant */ - ldr SRC, [sp, #ARGS_STACK_OFFSET] - /* Not enough registers to hold this constant, but we still use it here to set GE[3:0] */ - ldr SCRATCH, =0x00800080 - uxtb16 STRIDE_S, SRC - uxtb16 SRC, SRC, ror #8 - /* Set GE[3:0] to 0101 so SEL instructions do what we want */ - uadd8 SCRATCH, SCRATCH, SCRATCH - line_saved_regs Y, STRIDE_D, STRIDE_M, ORIG_W -.endm - -.macro over_n_8_8888_newline - ldr STRIDE_D, =0x00800080 - b 1f - .ltorg -1: -.endm - -.macro over_n_8_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload - WK4 .req STRIDE_M - pixld , numbytes/4, 4, MASK, unaligned_mask - pixld , numbytes, firstreg, DST, 0 - .unreq WK4 -.endm - -.macro over_n_8_8888_1pixel src, dst - uxtb Y, WK4, ror #src*8 - /* Trailing part of multiplication of source */ - mla SCRATCH, STRIDE_S, Y, STRIDE_D - mla Y, SRC, Y, STRIDE_D - mov ORIG_W, #255 - uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8 - uxtab16 Y, Y, Y, ror #8 - mov SCRATCH, SCRATCH, ror #8 - sub ORIG_W, ORIG_W, Y, lsr #24 - sel Y, SCRATCH, Y - /* Then multiply the destination */ - mul_8888_8 WK&dst, ORIG_W, SCRATCH, STRIDE_D - uqadd8 WK&dst, WK&dst, Y -.endm - -.macro over_n_8_8888_process_tail cond, numbytes, firstreg - WK4 .req STRIDE_M - teq WK4, #0 - beq 10f - .set PROCESS_REG, firstreg - .rept numbytes / 4 - over_n_8_8888_1pixel %(PROCESS_REG-firstreg), %(PROCESS_REG) - .set PROCESS_REG, PROCESS_REG+1 - .endr - pixst , numbytes, firstreg, DST -10: - .unreq WK4 -.endm - -generate_composite_function \ - pixman_composite_over_n_8_8888_asm_armv6, 0, 8, 32 \ - FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \ - 2, /* prefetch distance */ \ - over_n_8_8888_init, \ - over_n_8_8888_newline, \ - nop_macro, /* cleanup */ \ - over_n_8_8888_process_head, \ - over_n_8_8888_process_tail - -/******************************************************************************/ - -.macro over_reverse_n_8888_init - ldr SRC, [sp, #ARGS_STACK_OFFSET] - ldr MASK, =0x00800080 - /* Split source pixel into RB/AG parts */ - uxtb16 STRIDE_S, SRC - uxtb16 STRIDE_M, SRC, ror #8 - /* Set GE[3:0] to 0101 so SEL instructions do what we want */ - uadd8 SCRATCH, MASK, MASK - line_saved_regs STRIDE_D, ORIG_W -.endm - -.macro over_reverse_n_8888_newline - mov STRIDE_D, #0xFF -.endm - -.macro over_reverse_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload - pixld , numbytes, firstreg, DST, 0 -.endm - -.macro over_reverse_n_8888_1pixel d, is_only - teq WK&d, #0 - beq 8f /* replace with source */ - bics ORIG_W, STRIDE_D, WK&d, lsr #24 - .if is_only == 1 - beq 49f /* skip store */ - .else - beq 9f /* write same value back */ - .endif - mla SCRATCH, STRIDE_S, ORIG_W, MASK /* red/blue */ - mla ORIG_W, STRIDE_M, ORIG_W, MASK /* alpha/green */ - uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8 - uxtab16 ORIG_W, ORIG_W, ORIG_W, ror #8 - mov SCRATCH, SCRATCH, ror #8 - sel ORIG_W, SCRATCH, ORIG_W - uqadd8 WK&d, WK&d, ORIG_W - b 9f -8: mov WK&d, SRC -9: -.endm - -.macro over_reverse_n_8888_tail numbytes, reg1, reg2, reg3, reg4 - .if numbytes == 4 - over_reverse_n_8888_1pixel reg1, 1 - .else - and SCRATCH, WK®1, WK®2 - .if numbytes == 16 - and SCRATCH, SCRATCH, WK®3 - and SCRATCH, SCRATCH, WK®4 - .endif - mvns SCRATCH, SCRATCH, asr #24 - beq 49f /* skip store if all opaque */ - over_reverse_n_8888_1pixel reg1, 0 - over_reverse_n_8888_1pixel reg2, 0 - .if numbytes == 16 - over_reverse_n_8888_1pixel reg3, 0 - over_reverse_n_8888_1pixel reg4, 0 - .endif - .endif - pixst , numbytes, reg1, DST -49: -.endm - -.macro over_reverse_n_8888_process_tail cond, numbytes, firstreg - over_reverse_n_8888_tail numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3) -.endm - -generate_composite_function \ - pixman_composite_over_reverse_n_8888_asm_armv6, 0, 0, 32 \ - FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \ - 3, /* prefetch distance */ \ - over_reverse_n_8888_init, \ - over_reverse_n_8888_newline, \ - nop_macro, /* cleanup */ \ - over_reverse_n_8888_process_head, \ - over_reverse_n_8888_process_tail - -/******************************************************************************/ - -.macro over_white_8888_8888_ca_init - HALF .req SRC - TMP0 .req STRIDE_D - TMP1 .req STRIDE_S - TMP2 .req STRIDE_M - TMP3 .req ORIG_W - WK4 .req SCRATCH - line_saved_regs STRIDE_D, STRIDE_M, ORIG_W - ldr SCRATCH, =0x800080 - mov HALF, #0x80 - /* Set GE[3:0] to 0101 so SEL instructions do what we want */ - uadd8 SCRATCH, SCRATCH, SCRATCH - .set DST_PRELOAD_BIAS, 8 -.endm - -.macro over_white_8888_8888_ca_cleanup - .set DST_PRELOAD_BIAS, 0 - .unreq HALF - .unreq TMP0 - .unreq TMP1 - .unreq TMP2 - .unreq TMP3 - .unreq WK4 -.endm - -.macro over_white_8888_8888_ca_combine m, d - uxtb16 TMP1, TMP0 /* rb_notmask */ - uxtb16 TMP2, d /* rb_dest; 1 stall follows */ - smlatt TMP3, TMP2, TMP1, HALF /* red */ - smlabb TMP2, TMP2, TMP1, HALF /* blue */ - uxtb16 TMP0, TMP0, ror #8 /* ag_notmask */ - uxtb16 TMP1, d, ror #8 /* ag_dest; 1 stall follows */ - smlatt d, TMP1, TMP0, HALF /* alpha */ - smlabb TMP1, TMP1, TMP0, HALF /* green */ - pkhbt TMP0, TMP2, TMP3, lsl #16 /* rb; 1 stall follows */ - pkhbt TMP1, TMP1, d, lsl #16 /* ag */ - uxtab16 TMP0, TMP0, TMP0, ror #8 - uxtab16 TMP1, TMP1, TMP1, ror #8 - mov TMP0, TMP0, ror #8 - sel d, TMP0, TMP1 - uqadd8 d, d, m /* d is a late result */ -.endm - -.macro over_white_8888_8888_ca_1pixel_head - pixld , 4, 1, MASK, 0 - pixld , 4, 3, DST, 0 -.endm - -.macro over_white_8888_8888_ca_1pixel_tail - mvn TMP0, WK1 - teq WK1, WK1, asr #32 - bne 01f - bcc 03f - mov WK3, WK1 - b 02f -01: over_white_8888_8888_ca_combine WK1, WK3 -02: pixst , 4, 3, DST -03: -.endm - -.macro over_white_8888_8888_ca_2pixels_head - pixld , 8, 1, MASK, 0 -.endm - -.macro over_white_8888_8888_ca_2pixels_tail - pixld , 8, 3, DST - mvn TMP0, WK1 - teq WK1, WK1, asr #32 - bne 01f - movcs WK3, WK1 - bcs 02f - teq WK2, #0 - beq 05f - b 02f -01: over_white_8888_8888_ca_combine WK1, WK3 -02: mvn TMP0, WK2 - teq WK2, WK2, asr #32 - bne 03f - movcs WK4, WK2 - b 04f -03: over_white_8888_8888_ca_combine WK2, WK4 -04: pixst , 8, 3, DST -05: -.endm - -.macro over_white_8888_8888_ca_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload - .if numbytes == 4 - over_white_8888_8888_ca_1pixel_head - .else - .if numbytes == 16 - over_white_8888_8888_ca_2pixels_head - over_white_8888_8888_ca_2pixels_tail - .endif - over_white_8888_8888_ca_2pixels_head - .endif -.endm - -.macro over_white_8888_8888_ca_process_tail cond, numbytes, firstreg - .if numbytes == 4 - over_white_8888_8888_ca_1pixel_tail - .else - over_white_8888_8888_ca_2pixels_tail - .endif -.endm - -generate_composite_function \ - pixman_composite_over_white_8888_8888_ca_asm_armv6, 0, 32, 32 \ - FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH \ - 2, /* prefetch distance */ \ - over_white_8888_8888_ca_init, \ - nop_macro, /* newline */ \ - over_white_8888_8888_ca_cleanup, \ - over_white_8888_8888_ca_process_head, \ - over_white_8888_8888_ca_process_tail - - -.macro over_n_8888_8888_ca_init - /* Set up constants. RB_SRC and AG_SRC are in registers; - * RB_FLDS, A_SRC, and the two HALF values need to go on the - * stack (and the ful SRC value is already there) */ - ldr SCRATCH, [sp, #ARGS_STACK_OFFSET] - mov WK0, #0x00FF0000 - orr WK0, WK0, #0xFF /* RB_FLDS (0x00FF00FF) */ - mov WK1, #0x80 /* HALF default value */ - mov WK2, SCRATCH, lsr #24 /* A_SRC */ - orr WK3, WK1, WK1, lsl #16 /* HALF alternate value (0x00800080) */ - push {WK0-WK3} - .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET+16 - uxtb16 SRC, SCRATCH - uxtb16 STRIDE_S, SCRATCH, ror #8 - - /* Set GE[3:0] to 0101 so SEL instructions do what we want */ - uadd8 SCRATCH, WK3, WK3 - - .unreq WK0 - .unreq WK1 - .unreq WK2 - .unreq WK3 - WK0 .req Y - WK1 .req STRIDE_D - RB_SRC .req SRC - AG_SRC .req STRIDE_S - WK2 .req STRIDE_M - RB_FLDS .req r8 /* the reloaded constants have to be at consecutive registers starting at an even one */ - A_SRC .req r8 - HALF .req r9 - WK3 .req r10 - WK4 .req r11 - WK5 .req SCRATCH - WK6 .req ORIG_W - - line_saved_regs Y, STRIDE_D, STRIDE_M, ORIG_W -.endm - -.macro over_n_8888_8888_ca_cleanup - add sp, sp, #16 - .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET-16 - - .unreq WK0 - .unreq WK1 - .unreq RB_SRC - .unreq AG_SRC - .unreq WK2 - .unreq RB_FLDS - .unreq A_SRC - .unreq HALF - .unreq WK3 - .unreq WK4 - .unreq WK5 - .unreq WK6 - WK0 .req r8 - WK1 .req r9 - WK2 .req r10 - WK3 .req r11 -.endm - -.macro over_n_8888_8888_ca_1pixel_head - pixld , 4, 6, MASK, 0 - pixld , 4, 0, DST, 0 -.endm - -.macro over_n_8888_8888_ca_1pixel_tail - ldrd A_SRC, HALF, [sp, #LOCALS_STACK_OFFSET+8] - uxtb16 WK1, WK6 /* rb_mask (first step of hard case placed in what would otherwise be a stall) */ - teq WK6, WK6, asr #32 /* Zc if transparent, ZC if opaque */ - bne 20f - bcc 40f - /* Mask is fully opaque (all channels) */ - ldr WK6, [sp, #ARGS_STACK_OFFSET] /* get SRC back */ - eors A_SRC, A_SRC, #0xFF - bne 10f - /* Source is also opaque - same as src_8888_8888 */ - mov WK0, WK6 - b 30f -10: /* Same as over_8888_8888 */ - mul_8888_8 WK0, A_SRC, WK5, HALF - uqadd8 WK0, WK0, WK6 - b 30f -20: /* No simplifications possible - do it the hard way */ - uxtb16 WK2, WK6, ror #8 /* ag_mask */ - mla WK3, WK1, A_SRC, HALF /* rb_mul; 2 cycles */ - mla WK4, WK2, A_SRC, HALF /* ag_mul; 2 cycles */ - ldrd RB_FLDS, HALF, [sp, #LOCALS_STACK_OFFSET] - uxtb16 WK5, WK0 /* rb_dest */ - uxtab16 WK3, WK3, WK3, ror #8 - uxtb16 WK6, WK0, ror #8 /* ag_dest */ - uxtab16 WK4, WK4, WK4, ror #8 - smlatt WK0, RB_SRC, WK1, HALF /* red1 */ - smlabb WK1, RB_SRC, WK1, HALF /* blue1 */ - bic WK3, RB_FLDS, WK3, lsr #8 - bic WK4, RB_FLDS, WK4, lsr #8 - pkhbt WK1, WK1, WK0, lsl #16 /* rb1 */ - smlatt WK0, WK5, WK3, HALF /* red2 */ - smlabb WK3, WK5, WK3, HALF /* blue2 */ - uxtab16 WK1, WK1, WK1, ror #8 - smlatt WK5, AG_SRC, WK2, HALF /* alpha1 */ - pkhbt WK3, WK3, WK0, lsl #16 /* rb2 */ - smlabb WK0, AG_SRC, WK2, HALF /* green1 */ - smlatt WK2, WK6, WK4, HALF /* alpha2 */ - smlabb WK4, WK6, WK4, HALF /* green2 */ - pkhbt WK0, WK0, WK5, lsl #16 /* ag1 */ - uxtab16 WK3, WK3, WK3, ror #8 - pkhbt WK4, WK4, WK2, lsl #16 /* ag2 */ - uxtab16 WK0, WK0, WK0, ror #8 - uxtab16 WK4, WK4, WK4, ror #8 - mov WK1, WK1, ror #8 - mov WK3, WK3, ror #8 - sel WK2, WK1, WK0 /* recombine source*mask */ - sel WK1, WK3, WK4 /* recombine dest*(1-source_alpha*mask) */ - uqadd8 WK0, WK1, WK2 /* followed by 1 stall */ -30: /* The destination buffer is already in the L1 cache, so - * there's little point in amalgamating writes */ - pixst , 4, 0, DST -40: -.endm - -.macro over_n_8888_8888_ca_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload - .rept (numbytes / 4) - 1 - over_n_8888_8888_ca_1pixel_head - over_n_8888_8888_ca_1pixel_tail - .endr - over_n_8888_8888_ca_1pixel_head -.endm - -.macro over_n_8888_8888_ca_process_tail cond, numbytes, firstreg - over_n_8888_8888_ca_1pixel_tail -.endm - -pixman_asm_function pixman_composite_over_n_8888_8888_ca_asm_armv6 - ldr ip, [sp] - cmp ip, #-1 - beq pixman_composite_over_white_8888_8888_ca_asm_armv6 - /* else drop through... */ - .endfunc -generate_composite_function \ - pixman_composite_over_n_8888_8888_ca_asm_armv6_helper, 0, 32, 32 \ - FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH | FLAG_PROCESS_CORRUPTS_WK0 \ - 2, /* prefetch distance */ \ - over_n_8888_8888_ca_init, \ - nop_macro, /* newline */ \ - over_n_8888_8888_ca_cleanup, \ - over_n_8888_8888_ca_process_head, \ - over_n_8888_8888_ca_process_tail - -/******************************************************************************/ - -.macro in_reverse_8888_8888_init - /* Hold loop invariant in MASK */ - ldr MASK, =0x00800080 - /* Set GE[3:0] to 0101 so SEL instructions do what we want */ - uadd8 SCRATCH, MASK, MASK - /* Offset the source pointer: we only need the alpha bytes */ - add SRC, SRC, #3 - line_saved_regs ORIG_W -.endm - -.macro in_reverse_8888_8888_head numbytes, reg1, reg2, reg3 - ldrb ORIG_W, [SRC], #4 - .if numbytes >= 8 - ldrb WK®1, [SRC], #4 - .if numbytes == 16 - ldrb WK®2, [SRC], #4 - ldrb WK®3, [SRC], #4 - .endif - .endif - add DST, DST, #numbytes -.endm - -.macro in_reverse_8888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload - in_reverse_8888_8888_head numbytes, firstreg, %(firstreg+1), %(firstreg+2) -.endm - -.macro in_reverse_8888_8888_1pixel s, d, offset, is_only - .if is_only != 1 - movs s, ORIG_W - .if offset != 0 - ldrb ORIG_W, [SRC, #offset] - .endif - beq 01f - teq STRIDE_M, #0xFF - beq 02f - .endif - uxtb16 SCRATCH, d /* rb_dest */ - uxtb16 d, d, ror #8 /* ag_dest */ - mla SCRATCH, SCRATCH, s, MASK - mla d, d, s, MASK - uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8 - uxtab16 d, d, d, ror #8 - mov SCRATCH, SCRATCH, ror #8 - sel d, SCRATCH, d - b 02f - .if offset == 0 -48: /* Last mov d,#0 of the set - used as part of shortcut for - * source values all 0 */ - .endif -01: mov d, #0 -02: -.endm - -.macro in_reverse_8888_8888_tail numbytes, reg1, reg2, reg3, reg4 - .if numbytes == 4 - teq ORIG_W, ORIG_W, asr #32 - ldrne WK®1, [DST, #-4] - .elseif numbytes == 8 - teq ORIG_W, WK®1 - teqeq ORIG_W, ORIG_W, asr #32 /* all 0 or all -1? */ - ldmnedb DST, {WK®1-WK®2} - .else - teq ORIG_W, WK®1 - teqeq ORIG_W, WK®2 - teqeq ORIG_W, WK®3 - teqeq ORIG_W, ORIG_W, asr #32 /* all 0 or all -1? */ - ldmnedb DST, {WK®1-WK®4} - .endif - cmnne DST, #0 /* clear C if NE */ - bcs 49f /* no writes to dest if source all -1 */ - beq 48f /* set dest to all 0 if source all 0 */ - .if numbytes == 4 - in_reverse_8888_8888_1pixel ORIG_W, WK®1, 0, 1 - str WK®1, [DST, #-4] - .elseif numbytes == 8 - in_reverse_8888_8888_1pixel STRIDE_M, WK®1, -4, 0 - in_reverse_8888_8888_1pixel STRIDE_M, WK®2, 0, 0 - stmdb DST, {WK®1-WK®2} - .else - in_reverse_8888_8888_1pixel STRIDE_M, WK®1, -12, 0 - in_reverse_8888_8888_1pixel STRIDE_M, WK®2, -8, 0 - in_reverse_8888_8888_1pixel STRIDE_M, WK®3, -4, 0 - in_reverse_8888_8888_1pixel STRIDE_M, WK®4, 0, 0 - stmdb DST, {WK®1-WK®4} - .endif -49: -.endm - -.macro in_reverse_8888_8888_process_tail cond, numbytes, firstreg - in_reverse_8888_8888_tail numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3) -.endm - -generate_composite_function \ - pixman_composite_in_reverse_8888_8888_asm_armv6, 32, 0, 32 \ - FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH | FLAG_NO_PRELOAD_DST \ - 2, /* prefetch distance */ \ - in_reverse_8888_8888_init, \ - nop_macro, /* newline */ \ - nop_macro, /* cleanup */ \ - in_reverse_8888_8888_process_head, \ - in_reverse_8888_8888_process_tail - -/******************************************************************************/ - -.macro over_n_8888_init - ldr SRC, [sp, #ARGS_STACK_OFFSET] - /* Hold loop invariant in MASK */ - ldr MASK, =0x00800080 - /* Hold multiplier for destination in STRIDE_M */ - mov STRIDE_M, #255 - sub STRIDE_M, STRIDE_M, SRC, lsr #24 - /* Set GE[3:0] to 0101 so SEL instructions do what we want */ - uadd8 SCRATCH, MASK, MASK -.endm - -.macro over_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload - pixld , numbytes, firstreg, DST, 0 -.endm - -.macro over_n_8888_1pixel dst - mul_8888_8 WK&dst, STRIDE_M, SCRATCH, MASK - uqadd8 WK&dst, WK&dst, SRC -.endm - -.macro over_n_8888_process_tail cond, numbytes, firstreg - .set PROCESS_REG, firstreg - .rept numbytes / 4 - over_n_8888_1pixel %(PROCESS_REG) - .set PROCESS_REG, PROCESS_REG+1 - .endr - pixst , numbytes, firstreg, DST -.endm - -generate_composite_function \ - pixman_composite_over_n_8888_asm_armv6, 0, 0, 32 \ - FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE \ - 2, /* prefetch distance */ \ - over_n_8888_init, \ - nop_macro, /* newline */ \ - nop_macro, /* cleanup */ \ - over_n_8888_process_head, \ - over_n_8888_process_tail - -/******************************************************************************/ diff --git a/vendor/pixman/pixman/pixman-arm-simd-asm.h b/vendor/pixman/pixman/pixman-arm-simd-asm.h deleted file mode 100644 index da153c3f5..000000000 --- a/vendor/pixman/pixman/pixman-arm-simd-asm.h +++ /dev/null @@ -1,966 +0,0 @@ -/* - * Copyright © 2012 Raspberry Pi Foundation - * Copyright © 2012 RISC OS Open Ltd - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of the copyright holders not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. The copyright holders make no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Ben Avison (bavison@riscosopen.org) - * - */ - -/* - * Because the alignment of pixel data to cachelines, and even the number of - * cachelines per row can vary from row to row, and because of the need to - * preload each scanline once and only once, this prefetch strategy treats - * each row of pixels independently. When a pixel row is long enough, there - * are three distinct phases of prefetch: - * * an inner loop section, where each time a cacheline of data is - * processed, another cacheline is preloaded (the exact distance ahead is - * determined empirically using profiling results from lowlevel-blt-bench) - * * a leading section, where enough cachelines are preloaded to ensure no - * cachelines escape being preloaded when the inner loop starts - * * a trailing section, where a limited number (0 or more) of cachelines - * are preloaded to deal with data (if any) that hangs off the end of the - * last iteration of the inner loop, plus any trailing bytes that were not - * enough to make up one whole iteration of the inner loop - * - * There are (in general) three distinct code paths, selected between - * depending upon how long the pixel row is. If it is long enough that there - * is at least one iteration of the inner loop (as described above) then - * this is described as the "wide" case. If it is shorter than that, but - * there are still enough bytes output that there is at least one 16-byte- - * long, 16-byte-aligned write to the destination (the optimum type of - * write), then this is the "medium" case. If it is not even this long, then - * this is the "narrow" case, and there is no attempt to align writes to - * 16-byte boundaries. In the "medium" and "narrow" cases, all the - * cachelines containing data from the pixel row are prefetched up-front. - */ - -/* - * Determine whether we put the arguments on the stack for debugging. - */ -#undef DEBUG_PARAMS - -/* - * Bit flags for 'generate_composite_function' macro which are used - * to tune generated functions behavior. - */ -.set FLAG_DST_WRITEONLY, 0 -.set FLAG_DST_READWRITE, 1 -.set FLAG_COND_EXEC, 0 -.set FLAG_BRANCH_OVER, 2 -.set FLAG_PROCESS_PRESERVES_PSR, 0 -.set FLAG_PROCESS_CORRUPTS_PSR, 4 -.set FLAG_PROCESS_DOESNT_STORE, 0 -.set FLAG_PROCESS_DOES_STORE, 8 /* usually because it needs to conditionally skip it */ -.set FLAG_NO_SPILL_LINE_VARS, 0 -.set FLAG_SPILL_LINE_VARS_WIDE, 16 -.set FLAG_SPILL_LINE_VARS_NON_WIDE, 32 -.set FLAG_SPILL_LINE_VARS, 48 -.set FLAG_PROCESS_CORRUPTS_SCRATCH, 0 -.set FLAG_PROCESS_PRESERVES_SCRATCH, 64 -.set FLAG_PROCESS_PRESERVES_WK0, 0 -.set FLAG_PROCESS_CORRUPTS_WK0, 128 /* if possible, use the specified register(s) instead so WK0 can hold number of leading pixels */ -.set FLAG_PRELOAD_DST, 0 -.set FLAG_NO_PRELOAD_DST, 256 - -/* - * Number of bytes by which to adjust preload offset of destination - * buffer (allows preload instruction to be moved before the load(s)) - */ -.set DST_PRELOAD_BIAS, 0 - -/* - * Offset into stack where mask and source pointer/stride can be accessed. - */ -#ifdef DEBUG_PARAMS -.set ARGS_STACK_OFFSET, (9*4+9*4) -#else -.set ARGS_STACK_OFFSET, (9*4) -#endif - -/* - * Offset into stack where space allocated during init macro can be accessed. - */ -.set LOCALS_STACK_OFFSET, 0 - -/* - * Constants for selecting preferable prefetch type. - */ -.set PREFETCH_TYPE_NONE, 0 -.set PREFETCH_TYPE_STANDARD, 1 - -/* - * Definitions of macros for load/store of pixel data. - */ - -.macro pixldst op, cond=al, numbytes, reg0, reg1, reg2, reg3, base, unaligned=0 - .if numbytes == 16 - .if unaligned == 1 - op&r&cond WK®0, [base], #4 - op&r&cond WK®1, [base], #4 - op&r&cond WK®2, [base], #4 - op&r&cond WK®3, [base], #4 - .else - op&m&cond&ia base!, {WK®0,WK®1,WK®2,WK®3} - .endif - .elseif numbytes == 8 - .if unaligned == 1 - op&r&cond WK®0, [base], #4 - op&r&cond WK®1, [base], #4 - .else - op&m&cond&ia base!, {WK®0,WK®1} - .endif - .elseif numbytes == 4 - op&r&cond WK®0, [base], #4 - .elseif numbytes == 2 - op&r&cond&h WK®0, [base], #2 - .elseif numbytes == 1 - op&r&cond&b WK®0, [base], #1 - .else - .error "unsupported size: numbytes" - .endif -.endm - -.macro pixst_baseupdated cond, numbytes, reg0, reg1, reg2, reg3, base - .if numbytes == 16 - stm&cond&db base, {WK®0,WK®1,WK®2,WK®3} - .elseif numbytes == 8 - stm&cond&db base, {WK®0,WK®1} - .elseif numbytes == 4 - str&cond WK®0, [base, #-4] - .elseif numbytes == 2 - str&cond&h WK®0, [base, #-2] - .elseif numbytes == 1 - str&cond&b WK®0, [base, #-1] - .else - .error "unsupported size: numbytes" - .endif -.endm - -.macro pixld cond, numbytes, firstreg, base, unaligned - pixldst ld, cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base, unaligned -.endm - -.macro pixst cond, numbytes, firstreg, base - .if (flags) & FLAG_DST_READWRITE - pixst_baseupdated cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base - .else - pixldst st, cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base - .endif -.endm - -.macro PF a, x:vararg - .if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_STANDARD) - a x - .endif -.endm - - -.macro preload_leading_step1 bpp, ptr, base -/* If the destination is already 16-byte aligned, then we need to preload - * between 0 and prefetch_distance (inclusive) cache lines ahead so there - * are no gaps when the inner loop starts. - */ - .if bpp > 0 - PF bic, ptr, base, #31 - .set OFFSET, 0 - .rept prefetch_distance+1 - PF pld, [ptr, #OFFSET] - .set OFFSET, OFFSET+32 - .endr - .endif -.endm - -.macro preload_leading_step2 bpp, bpp_shift, ptr, base -/* However, if the destination is not 16-byte aligned, we may need to - * preload more cache lines than that. The question we need to ask is: - * are the bytes corresponding to the leading pixels more than the amount - * by which the source pointer will be rounded down for preloading, and if - * so, by how many cache lines? Effectively, we want to calculate - * leading_bytes = ((-dst)&15)*src_bpp/dst_bpp - * inner_loop_offset = (src+leading_bytes)&31 - * extra_needed = leading_bytes - inner_loop_offset - * and test if extra_needed is <= 0, <= 32, or > 32 (where > 32 is only - * possible when there are 4 src bytes for every 1 dst byte). - */ - .if bpp > 0 - .ifc base,DST - /* The test can be simplified further when preloading the destination */ - PF tst, base, #16 - PF beq, 61f - .else - .if bpp/dst_w_bpp == 4 - PF add, SCRATCH, base, WK0, lsl #bpp_shift-dst_bpp_shift - PF and, SCRATCH, SCRATCH, #31 - PF rsb, SCRATCH, SCRATCH, WK0, lsl #bpp_shift-dst_bpp_shift - PF sub, SCRATCH, SCRATCH, #1 /* so now ranges are -16..-1 / 0..31 / 32..63 */ - PF movs, SCRATCH, SCRATCH, lsl #32-6 /* so this sets NC / nc / Nc */ - PF bcs, 61f - PF bpl, 60f - PF pld, [ptr, #32*(prefetch_distance+2)] - .else - PF mov, SCRATCH, base, lsl #32-5 - PF add, SCRATCH, SCRATCH, WK0, lsl #32-5+bpp_shift-dst_bpp_shift - PF rsbs, SCRATCH, SCRATCH, WK0, lsl #32-5+bpp_shift-dst_bpp_shift - PF bls, 61f - .endif - .endif -60: PF pld, [ptr, #32*(prefetch_distance+1)] -61: - .endif -.endm - -#define IS_END_OF_GROUP(INDEX,SIZE) ((SIZE) < 2 || ((INDEX) & ~((INDEX)+1)) & ((SIZE)/2)) -.macro preload_middle bpp, base, scratch_holds_offset - .if bpp > 0 - /* prefetch distance = 256/bpp, stm distance = 128/dst_w_bpp */ - .if IS_END_OF_GROUP(SUBBLOCK,256/128*dst_w_bpp/bpp) - .if scratch_holds_offset - PF pld, [base, SCRATCH] - .else - PF bic, SCRATCH, base, #31 - PF pld, [SCRATCH, #32*prefetch_distance] - .endif - .endif - .endif -.endm - -.macro preload_trailing bpp, bpp_shift, base - .if bpp > 0 - .if bpp*pix_per_block > 256 - /* Calculations are more complex if more than one fetch per block */ - PF and, WK1, base, #31 - PF add, WK1, WK1, WK0, lsl #bpp_shift - PF add, WK1, WK1, #32*(bpp*pix_per_block/256-1)*(prefetch_distance+1) - PF bic, SCRATCH, base, #31 -80: PF pld, [SCRATCH, #32*(prefetch_distance+1)] - PF add, SCRATCH, SCRATCH, #32 - PF subs, WK1, WK1, #32 - PF bhi, 80b - .else - /* If exactly one fetch per block, then we need either 0, 1 or 2 extra preloads */ - PF mov, SCRATCH, base, lsl #32-5 - PF adds, SCRATCH, SCRATCH, X, lsl #32-5+bpp_shift - PF adceqs, SCRATCH, SCRATCH, #0 - /* The instruction above has two effects: ensures Z is only - * set if C was clear (so Z indicates that both shifted quantities - * were 0), and clears C if Z was set (so C indicates that the sum - * of the shifted quantities was greater and not equal to 32) */ - PF beq, 82f - PF bic, SCRATCH, base, #31 - PF bcc, 81f - PF pld, [SCRATCH, #32*(prefetch_distance+2)] -81: PF pld, [SCRATCH, #32*(prefetch_distance+1)] -82: - .endif - .endif -.endm - - -.macro preload_line narrow_case, bpp, bpp_shift, base -/* "narrow_case" - just means that the macro was invoked from the "narrow" - * code path rather than the "medium" one - because in the narrow case, - * the row of pixels is known to output no more than 30 bytes, then - * (assuming the source pixels are no wider than the the destination - * pixels) they cannot possibly straddle more than 2 32-byte cachelines, - * meaning there's no need for a loop. - * "bpp" - number of bits per pixel in the channel (source, mask or - * destination) that's being preloaded, or 0 if this channel is not used - * for reading - * "bpp_shift" - log2 of ("bpp"/8) (except if "bpp"=0 of course) - * "base" - base address register of channel to preload (SRC, MASK or DST) - */ - .if bpp > 0 - .if narrow_case && (bpp <= dst_w_bpp) - /* In these cases, each line for each channel is in either 1 or 2 cache lines */ - PF bic, WK0, base, #31 - PF pld, [WK0] - PF add, WK1, base, X, LSL #bpp_shift - PF sub, WK1, WK1, #1 - PF bic, WK1, WK1, #31 - PF cmp, WK1, WK0 - PF beq, 90f - PF pld, [WK1] -90: - .else - PF bic, WK0, base, #31 - PF pld, [WK0] - PF add, WK1, base, X, lsl #bpp_shift - PF sub, WK1, WK1, #1 - PF bic, WK1, WK1, #31 - PF cmp, WK1, WK0 - PF beq, 92f -91: PF add, WK0, WK0, #32 - PF cmp, WK0, WK1 - PF pld, [WK0] - PF bne, 91b -92: - .endif - .endif -.endm - - -.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx - process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 - .if decrementx - sub&cond X, X, #8*numbytes/dst_w_bpp - .endif - process_tail cond, numbytes, firstreg - .if !((flags) & FLAG_PROCESS_DOES_STORE) - pixst cond, numbytes, firstreg, DST - .endif -.endm - -.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx - .if (flags) & FLAG_BRANCH_OVER - .ifc cond,mi - bpl 100f - .endif - .ifc cond,cs - bcc 100f - .endif - .ifc cond,ne - beq 100f - .endif - conditional_process1_helper , process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx -100: - .else - conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx - .endif -.endm - -.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx - .if (flags) & (FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE) - /* Can't interleave reads and writes */ - test - conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx - .if (flags) & FLAG_PROCESS_CORRUPTS_PSR - test - .endif - conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx - .else - /* Can interleave reads and writes for better scheduling */ - test - process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 - process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 - .if decrementx - sub&cond1 X, X, #8*numbytes1/dst_w_bpp - sub&cond2 X, X, #8*numbytes2/dst_w_bpp - .endif - process_tail cond1, numbytes1, firstreg1 - process_tail cond2, numbytes2, firstreg2 - pixst cond1, numbytes1, firstreg1, DST - pixst cond2, numbytes2, firstreg2, DST - .endif -.endm - - -.macro test_bits_1_0_ptr - .if (flags) & FLAG_PROCESS_CORRUPTS_WK0 - movs SCRATCH, X, lsl #32-1 /* C,N = bits 1,0 of DST */ - .else - movs SCRATCH, WK0, lsl #32-1 /* C,N = bits 1,0 of DST */ - .endif -.endm - -.macro test_bits_3_2_ptr - .if (flags) & FLAG_PROCESS_CORRUPTS_WK0 - movs SCRATCH, X, lsl #32-3 /* C,N = bits 3, 2 of DST */ - .else - movs SCRATCH, WK0, lsl #32-3 /* C,N = bits 3, 2 of DST */ - .endif -.endm - -.macro leading_15bytes process_head, process_tail - /* On entry, WK0 bits 0-3 = number of bytes until destination is 16-byte aligned */ - .set DECREMENT_X, 1 - .if (flags) & FLAG_PROCESS_CORRUPTS_WK0 - .set DECREMENT_X, 0 - sub X, X, WK0, lsr #dst_bpp_shift - str X, [sp, #LINE_SAVED_REG_COUNT*4] - mov X, WK0 - .endif - /* Use unaligned loads in all cases for simplicity */ - .if dst_w_bpp == 8 - conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X - .elseif dst_w_bpp == 16 - test_bits_1_0_ptr - conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X - .endif - conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X - .if (flags) & FLAG_PROCESS_CORRUPTS_WK0 - ldr X, [sp, #LINE_SAVED_REG_COUNT*4] - .endif -.endm - -.macro test_bits_3_2_pix - movs SCRATCH, X, lsl #dst_bpp_shift+32-3 -.endm - -.macro test_bits_1_0_pix - .if dst_w_bpp == 8 - movs SCRATCH, X, lsl #dst_bpp_shift+32-1 - .else - movs SCRATCH, X, lsr #1 - .endif -.endm - -.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask - conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 - .if dst_w_bpp == 16 - test_bits_1_0_pix - conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 - .elseif dst_w_bpp == 8 - conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 - .endif -.endm - - -.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment -110: - .set SUBBLOCK, 0 /* this is a count of STMs; there can be up to 8 STMs per block */ - .rept pix_per_block*dst_w_bpp/128 - process_head , 16, 0, unaligned_src, unaligned_mask, 1 - .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) - preload_middle src_bpp, SRC, 1 - .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) - preload_middle mask_bpp, MASK, 1 - .else - preload_middle src_bpp, SRC, 0 - preload_middle mask_bpp, MASK, 0 - .endif - .if (dst_r_bpp > 0) && ((SUBBLOCK % 2) == 0) && (((flags) & FLAG_NO_PRELOAD_DST) == 0) - /* Because we know that writes are 16-byte aligned, it's relatively easy to ensure that - * destination prefetches are 32-byte aligned. It's also the easiest channel to offset - * preloads for, to achieve staggered prefetches for multiple channels, because there are - * always two STMs per prefetch, so there is always an opposite STM on which to put the - * preload. Note, no need to BIC the base register here */ - PF pld, [DST, #32*prefetch_distance - dst_alignment] - .endif - process_tail , 16, 0 - .if !((flags) & FLAG_PROCESS_DOES_STORE) - pixst , 16, 0, DST - .endif - .set SUBBLOCK, SUBBLOCK+1 - .endr - subs X, X, #pix_per_block - bhs 110b -.endm - -.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask - /* Destination now 16-byte aligned; we have at least one block before we have to stop preloading */ - .if dst_r_bpp > 0 - tst DST, #16 - bne 111f - process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 16 + DST_PRELOAD_BIAS - b 112f -111: - .endif - process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 0 + DST_PRELOAD_BIAS -112: - /* Just before the final (prefetch_distance+1) 32-byte blocks, deal with final preloads */ - .if (src_bpp*pix_per_block > 256) || (mask_bpp*pix_per_block > 256) || (dst_r_bpp*pix_per_block > 256) - PF and, WK0, X, #pix_per_block-1 - .endif - preload_trailing src_bpp, src_bpp_shift, SRC - preload_trailing mask_bpp, mask_bpp_shift, MASK - .if ((flags) & FLAG_NO_PRELOAD_DST) == 0 - preload_trailing dst_r_bpp, dst_bpp_shift, DST - .endif - add X, X, #(prefetch_distance+2)*pix_per_block - 128/dst_w_bpp - /* The remainder of the line is handled identically to the medium case */ - medium_case_inner_loop_and_trailing_pixels process_head, process_tail,, exit_label, unaligned_src, unaligned_mask -.endm - -.macro medium_case_inner_loop_and_trailing_pixels process_head, process_tail, unused, exit_label, unaligned_src, unaligned_mask -120: - process_head , 16, 0, unaligned_src, unaligned_mask, 0 - process_tail , 16, 0 - .if !((flags) & FLAG_PROCESS_DOES_STORE) - pixst , 16, 0, DST - .endif - subs X, X, #128/dst_w_bpp - bhs 120b - /* Trailing pixels */ - tst X, #128/dst_w_bpp - 1 - beq exit_label - trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask -.endm - -.macro narrow_case_inner_loop_and_trailing_pixels process_head, process_tail, unused, exit_label, unaligned_src, unaligned_mask - tst X, #16*8/dst_w_bpp - conditional_process1 ne, process_head, process_tail, 16, 0, unaligned_src, unaligned_mask, 0 - /* Trailing pixels */ - /* In narrow case, it's relatively unlikely to be aligned, so let's do without a branch here */ - trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask -.endm - -.macro switch_on_alignment action, process_head, process_tail, process_inner_loop, exit_label - /* Note that if we're reading the destination, it's already guaranteed to be aligned at this point */ - .if mask_bpp == 8 || mask_bpp == 16 - tst MASK, #3 - bne 141f - .endif - .if src_bpp == 8 || src_bpp == 16 - tst SRC, #3 - bne 140f - .endif - action process_head, process_tail, process_inner_loop, exit_label, 0, 0 - .if src_bpp == 8 || src_bpp == 16 - b exit_label -140: - action process_head, process_tail, process_inner_loop, exit_label, 1, 0 - .endif - .if mask_bpp == 8 || mask_bpp == 16 - b exit_label -141: - .if src_bpp == 8 || src_bpp == 16 - tst SRC, #3 - bne 142f - .endif - action process_head, process_tail, process_inner_loop, exit_label, 0, 1 - .if src_bpp == 8 || src_bpp == 16 - b exit_label -142: - action process_head, process_tail, process_inner_loop, exit_label, 1, 1 - .endif - .endif -.endm - - -.macro end_of_line restore_x, vars_spilled, loop_label, last_one - .if vars_spilled - /* Sadly, GAS doesn't seem have an equivalent of the DCI directive? */ - /* This is ldmia sp,{} */ - .word 0xE89D0000 | LINE_SAVED_REGS - .endif - subs Y, Y, #1 - .if vars_spilled - .if (LINE_SAVED_REGS) & (1<<1) - str Y, [sp] - .endif - .endif - add DST, DST, STRIDE_D - .if src_bpp > 0 - add SRC, SRC, STRIDE_S - .endif - .if mask_bpp > 0 - add MASK, MASK, STRIDE_M - .endif - .if restore_x - mov X, ORIG_W - .endif - bhs loop_label - .ifc "last_one","" - .if vars_spilled - b 197f - .else - b 198f - .endif - .else - .if (!vars_spilled) && ((flags) & FLAG_SPILL_LINE_VARS) - b 198f - .endif - .endif -.endm - - -.macro generate_composite_function fname, \ - src_bpp_, \ - mask_bpp_, \ - dst_w_bpp_, \ - flags_, \ - prefetch_distance_, \ - init, \ - newline, \ - cleanup, \ - process_head, \ - process_tail, \ - process_inner_loop - - pixman_asm_function fname - -/* - * Make some macro arguments globally visible and accessible - * from other macros - */ - .set src_bpp, src_bpp_ - .set mask_bpp, mask_bpp_ - .set dst_w_bpp, dst_w_bpp_ - .set flags, flags_ - .set prefetch_distance, prefetch_distance_ - -/* - * Select prefetch type for this function. - */ - .if prefetch_distance == 0 - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE - .else - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_STANDARD - .endif - - .if src_bpp == 32 - .set src_bpp_shift, 2 - .elseif src_bpp == 24 - .set src_bpp_shift, 0 - .elseif src_bpp == 16 - .set src_bpp_shift, 1 - .elseif src_bpp == 8 - .set src_bpp_shift, 0 - .elseif src_bpp == 0 - .set src_bpp_shift, -1 - .else - .error "requested src bpp (src_bpp) is not supported" - .endif - - .if mask_bpp == 32 - .set mask_bpp_shift, 2 - .elseif mask_bpp == 24 - .set mask_bpp_shift, 0 - .elseif mask_bpp == 8 - .set mask_bpp_shift, 0 - .elseif mask_bpp == 0 - .set mask_bpp_shift, -1 - .else - .error "requested mask bpp (mask_bpp) is not supported" - .endif - - .if dst_w_bpp == 32 - .set dst_bpp_shift, 2 - .elseif dst_w_bpp == 24 - .set dst_bpp_shift, 0 - .elseif dst_w_bpp == 16 - .set dst_bpp_shift, 1 - .elseif dst_w_bpp == 8 - .set dst_bpp_shift, 0 - .else - .error "requested dst bpp (dst_w_bpp) is not supported" - .endif - - .if (((flags) & FLAG_DST_READWRITE) != 0) - .set dst_r_bpp, dst_w_bpp - .else - .set dst_r_bpp, 0 - .endif - - .set pix_per_block, 16*8/dst_w_bpp - .if src_bpp != 0 - .if 32*8/src_bpp > pix_per_block - .set pix_per_block, 32*8/src_bpp - .endif - .endif - .if mask_bpp != 0 - .if 32*8/mask_bpp > pix_per_block - .set pix_per_block, 32*8/mask_bpp - .endif - .endif - .if dst_r_bpp != 0 - .if 32*8/dst_r_bpp > pix_per_block - .set pix_per_block, 32*8/dst_r_bpp - .endif - .endif - -/* The standard entry conditions set up by pixman-arm-common.h are: - * r0 = width (pixels) - * r1 = height (rows) - * r2 = pointer to top-left pixel of destination - * r3 = destination stride (pixels) - * [sp] = source pixel value, or pointer to top-left pixel of source - * [sp,#4] = 0 or source stride (pixels) - * The following arguments are unused for non-mask operations - * [sp,#8] = mask pixel value, or pointer to top-left pixel of mask - * [sp,#12] = 0 or mask stride (pixels) - */ - -/* - * Assign symbolic names to registers - */ - X .req r0 /* pixels to go on this line */ - Y .req r1 /* lines to go */ - DST .req r2 /* destination pixel pointer */ - STRIDE_D .req r3 /* destination stride (bytes, minus width) */ - SRC .req r4 /* source pixel pointer */ - STRIDE_S .req r5 /* source stride (bytes, minus width) */ - MASK .req r6 /* mask pixel pointer (if applicable) */ - STRIDE_M .req r7 /* mask stride (bytes, minus width) */ - WK0 .req r8 /* pixel data registers */ - WK1 .req r9 - WK2 .req r10 - WK3 .req r11 - SCRATCH .req r12 - ORIG_W .req r14 /* width (pixels) */ - - push {r4-r11, lr} /* save all registers */ - - subs Y, Y, #1 - blo 199f - -#ifdef DEBUG_PARAMS - sub sp, sp, #9*4 -#endif - - .if src_bpp > 0 - ldr SRC, [sp, #ARGS_STACK_OFFSET] - ldr STRIDE_S, [sp, #ARGS_STACK_OFFSET+4] - .endif - .if mask_bpp > 0 - ldr MASK, [sp, #ARGS_STACK_OFFSET+8] - ldr STRIDE_M, [sp, #ARGS_STACK_OFFSET+12] - .endif - -#ifdef DEBUG_PARAMS - add Y, Y, #1 - stmia sp, {r0-r7,pc} - sub Y, Y, #1 -#endif - - init - - .if (flags) & FLAG_PROCESS_CORRUPTS_WK0 - /* Reserve a word in which to store X during leading pixels */ - sub sp, sp, #4 - .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET+4 - .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET+4 - .endif - - lsl STRIDE_D, #dst_bpp_shift /* stride in bytes */ - sub STRIDE_D, STRIDE_D, X, lsl #dst_bpp_shift - .if src_bpp > 0 - lsl STRIDE_S, #src_bpp_shift - sub STRIDE_S, STRIDE_S, X, lsl #src_bpp_shift - .endif - .if mask_bpp > 0 - lsl STRIDE_M, #mask_bpp_shift - sub STRIDE_M, STRIDE_M, X, lsl #mask_bpp_shift - .endif - - /* Are we not even wide enough to have one 16-byte aligned 16-byte block write? */ - cmp X, #2*16*8/dst_w_bpp - 1 - blo 170f - .if src_bpp || mask_bpp || dst_r_bpp /* Wide and medium cases are the same for fill */ - /* To preload ahead on the current line, we need at least (prefetch_distance+2) 32-byte blocks on all prefetch channels */ - cmp X, #(prefetch_distance+3)*pix_per_block - 1 - blo 160f - - /* Wide case */ - /* Adjust X so that the decrement instruction can also test for - * inner loop termination. We want it to stop when there are - * (prefetch_distance+1) complete blocks to go. */ - sub X, X, #(prefetch_distance+2)*pix_per_block - mov ORIG_W, X - .if (flags) & FLAG_SPILL_LINE_VARS_WIDE - /* This is stmdb sp!,{} */ - .word 0xE92D0000 | LINE_SAVED_REGS - .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4 - .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4 - .endif -151: /* New line */ - newline - preload_leading_step1 src_bpp, WK1, SRC - preload_leading_step1 mask_bpp, WK2, MASK - .if ((flags) & FLAG_NO_PRELOAD_DST) == 0 - preload_leading_step1 dst_r_bpp, WK3, DST - .endif - - ands WK0, DST, #15 - beq 154f - rsb WK0, WK0, #16 /* number of leading bytes until destination aligned */ - - preload_leading_step2 src_bpp, src_bpp_shift, WK1, SRC - preload_leading_step2 mask_bpp, mask_bpp_shift, WK2, MASK - .if ((flags) & FLAG_NO_PRELOAD_DST) == 0 - preload_leading_step2 dst_r_bpp, dst_bpp_shift, WK3, DST - .endif - - leading_15bytes process_head, process_tail - -154: /* Destination now 16-byte aligned; we have at least one prefetch on each channel as well as at least one 16-byte output block */ - .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) - and SCRATCH, SRC, #31 - rsb SCRATCH, SCRATCH, #32*prefetch_distance - .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) - and SCRATCH, MASK, #31 - rsb SCRATCH, SCRATCH, #32*prefetch_distance - .endif - .ifc "process_inner_loop","" - switch_on_alignment wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, wide_case_inner_loop, 157f - .else - switch_on_alignment wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, process_inner_loop, 157f - .endif - -157: /* Check for another line */ - end_of_line 1, %((flags) & FLAG_SPILL_LINE_VARS_WIDE), 151b - .if (flags) & FLAG_SPILL_LINE_VARS_WIDE - .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4 - .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4 - .endif - .endif - - .ltorg - -160: /* Medium case */ - mov ORIG_W, X - .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE - /* This is stmdb sp!,{} */ - .word 0xE92D0000 | LINE_SAVED_REGS - .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4 - .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4 - .endif -161: /* New line */ - newline - preload_line 0, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */ - preload_line 0, mask_bpp, mask_bpp_shift, MASK - .if ((flags) & FLAG_NO_PRELOAD_DST) == 0 - preload_line 0, dst_r_bpp, dst_bpp_shift, DST - .endif - - sub X, X, #128/dst_w_bpp /* simplifies inner loop termination */ - ands WK0, DST, #15 - beq 164f - rsb WK0, WK0, #16 /* number of leading bytes until destination aligned */ - - leading_15bytes process_head, process_tail - -164: /* Destination now 16-byte aligned; we have at least one 16-byte output block */ - switch_on_alignment medium_case_inner_loop_and_trailing_pixels, process_head, process_tail,, 167f - -167: /* Check for another line */ - end_of_line 1, %((flags) & FLAG_SPILL_LINE_VARS_NON_WIDE), 161b - - .ltorg - -170: /* Narrow case, less than 31 bytes, so no guarantee of at least one 16-byte block */ - .if dst_w_bpp < 32 - mov ORIG_W, X - .endif - .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE - /* This is stmdb sp!,{} */ - .word 0xE92D0000 | LINE_SAVED_REGS - .endif -171: /* New line */ - newline - preload_line 1, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */ - preload_line 1, mask_bpp, mask_bpp_shift, MASK - .if ((flags) & FLAG_NO_PRELOAD_DST) == 0 - preload_line 1, dst_r_bpp, dst_bpp_shift, DST - .endif - - .if dst_w_bpp == 8 - tst DST, #3 - beq 174f -172: subs X, X, #1 - blo 177f - process_head , 1, 0, 1, 1, 0 - process_tail , 1, 0 - .if !((flags) & FLAG_PROCESS_DOES_STORE) - pixst , 1, 0, DST - .endif - tst DST, #3 - bne 172b - .elseif dst_w_bpp == 16 - tst DST, #2 - beq 174f - subs X, X, #1 - blo 177f - process_head , 2, 0, 1, 1, 0 - process_tail , 2, 0 - .if !((flags) & FLAG_PROCESS_DOES_STORE) - pixst , 2, 0, DST - .endif - .endif - -174: /* Destination now 4-byte aligned; we have 0 or more output bytes to go */ - switch_on_alignment narrow_case_inner_loop_and_trailing_pixels, process_head, process_tail,, 177f - -177: /* Check for another line */ - end_of_line %(dst_w_bpp < 32), %((flags) & FLAG_SPILL_LINE_VARS_NON_WIDE), 171b, last_one - .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE - .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4 - .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4 - .endif - -197: - .if (flags) & FLAG_SPILL_LINE_VARS - add sp, sp, #LINE_SAVED_REG_COUNT*4 - .endif -198: - .if (flags) & FLAG_PROCESS_CORRUPTS_WK0 - .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET-4 - .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET-4 - add sp, sp, #4 - .endif - - cleanup - -#ifdef DEBUG_PARAMS - add sp, sp, #9*4 /* junk the debug copy of arguments */ -#endif -199: - pop {r4-r11, pc} /* exit */ - - .ltorg - - .unreq X - .unreq Y - .unreq DST - .unreq STRIDE_D - .unreq SRC - .unreq STRIDE_S - .unreq MASK - .unreq STRIDE_M - .unreq WK0 - .unreq WK1 - .unreq WK2 - .unreq WK3 - .unreq SCRATCH - .unreq ORIG_W - .endfunc -.endm - -.macro line_saved_regs x:vararg - .set LINE_SAVED_REGS, 0 - .set LINE_SAVED_REG_COUNT, 0 - .irp SAVED_REG,x - .ifc "SAVED_REG","Y" - .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<1) - .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 - .endif - .ifc "SAVED_REG","STRIDE_D" - .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<3) - .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 - .endif - .ifc "SAVED_REG","STRIDE_S" - .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<5) - .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 - .endif - .ifc "SAVED_REG","STRIDE_M" - .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<7) - .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 - .endif - .ifc "SAVED_REG","ORIG_W" - .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<14) - .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 - .endif - .endr -.endm - -.macro nop_macro x:vararg -.endm diff --git a/vendor/pixman/pixman/pixman-arm-simd.c b/vendor/pixman/pixman/pixman-arm-simd.c deleted file mode 100644 index 40f3a9759..000000000 --- a/vendor/pixman/pixman/pixman-arm-simd.c +++ /dev/null @@ -1,291 +0,0 @@ -/* - * Copyright © 2008 Mozilla Corporation - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Mozilla Corporation not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Mozilla Corporation makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Jeff Muizelaar (jeff@infidigm.net) - * - */ -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "pixman-private.h" -#include "pixman-arm-common.h" -#include "pixman-inlines.h" - -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_8888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_x888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_0565, - uint16_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_8888, - uint16_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_x888_0565, - uint32_t, 1, uint16_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, in_reverse_8888_8888, - uint32_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, armv6, over_n_8888, - uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, over_reverse_n_8888, - uint32_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888, - uint32_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888, - uint8_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8888_8888_ca, - uint32_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC, - uint16_t, uint16_t) -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC, - uint32_t, uint32_t) - -void -pixman_composite_src_n_8888_asm_armv6 (int32_t w, - int32_t h, - uint32_t *dst, - int32_t dst_stride, - uint32_t src); - -void -pixman_composite_src_n_0565_asm_armv6 (int32_t w, - int32_t h, - uint16_t *dst, - int32_t dst_stride, - uint16_t src); - -void -pixman_composite_src_n_8_asm_armv6 (int32_t w, - int32_t h, - uint8_t *dst, - int32_t dst_stride, - uint8_t src); - -static pixman_bool_t -arm_simd_fill (pixman_implementation_t *imp, - uint32_t * bits, - int stride, /* in 32-bit words */ - int bpp, - int x, - int y, - int width, - int height, - uint32_t _xor) -{ - /* stride is always multiple of 32bit units in pixman */ - uint32_t byte_stride = stride * sizeof(uint32_t); - - switch (bpp) - { - case 8: - pixman_composite_src_n_8_asm_armv6 ( - width, - height, - (uint8_t *)(((char *) bits) + y * byte_stride + x), - byte_stride, - _xor & 0xff); - return TRUE; - case 16: - pixman_composite_src_n_0565_asm_armv6 ( - width, - height, - (uint16_t *)(((char *) bits) + y * byte_stride + x * 2), - byte_stride / 2, - _xor & 0xffff); - return TRUE; - case 32: - pixman_composite_src_n_8888_asm_armv6 ( - width, - height, - (uint32_t *)(((char *) bits) + y * byte_stride + x * 4), - byte_stride / 4, - _xor); - return TRUE; - default: - return FALSE; - } -} - -static pixman_bool_t -arm_simd_blt (pixman_implementation_t *imp, - uint32_t * src_bits, - uint32_t * dst_bits, - int src_stride, /* in 32-bit words */ - int dst_stride, /* in 32-bit words */ - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dest_x, - int dest_y, - int width, - int height) -{ - if (src_bpp != dst_bpp) - return FALSE; - - switch (src_bpp) - { - case 8: - pixman_composite_src_8_8_asm_armv6 ( - width, height, - (uint8_t *)(((char *) dst_bits) + - dest_y * dst_stride * 4 + dest_x * 1), dst_stride * 4, - (uint8_t *)(((char *) src_bits) + - src_y * src_stride * 4 + src_x * 1), src_stride * 4); - return TRUE; - case 16: - pixman_composite_src_0565_0565_asm_armv6 ( - width, height, - (uint16_t *)(((char *) dst_bits) + - dest_y * dst_stride * 4 + dest_x * 2), dst_stride * 2, - (uint16_t *)(((char *) src_bits) + - src_y * src_stride * 4 + src_x * 2), src_stride * 2); - return TRUE; - case 32: - pixman_composite_src_8888_8888_asm_armv6 ( - width, height, - (uint32_t *)(((char *) dst_bits) + - dest_y * dst_stride * 4 + dest_x * 4), dst_stride, - (uint32_t *)(((char *) src_bits) + - src_y * src_stride * 4 + src_x * 4), src_stride); - return TRUE; - default: - return FALSE; - } -} - -static const pixman_fast_path_t arm_simd_fast_paths[] = -{ - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, armv6_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, armv6_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, armv6_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, armv6_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, armv6_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, armv6_composite_src_8888_8888), - - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, armv6_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, armv6_composite_src_x888_8888), - - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, armv6_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, armv6_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, a1r5g5b5, armv6_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, a1b5g5r5, null, a1b5g5r5, armv6_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, armv6_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, a1b5g5r5, null, x1b5g5r5, armv6_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, armv6_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, x1b5g5r5, null, x1b5g5r5, armv6_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, a4r4g4b4, null, a4r4g4b4, armv6_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, a4b4g4r4, null, a4b4g4r4, armv6_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, a4r4g4b4, null, x4r4g4b4, armv6_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, a4b4g4r4, null, x4b4g4r4, armv6_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, x4r4g4b4, null, x4r4g4b4, armv6_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, x4b4g4r4, null, x4b4g4r4, armv6_composite_src_0565_0565), - - PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, armv6_composite_src_8_8), - PIXMAN_STD_FAST_PATH (SRC, r3g3b2, null, r3g3b2, armv6_composite_src_8_8), - PIXMAN_STD_FAST_PATH (SRC, b2g3r3, null, b2g3r3, armv6_composite_src_8_8), - PIXMAN_STD_FAST_PATH (SRC, a2r2g2b2, null, a2r2g2b2, armv6_composite_src_8_8), - PIXMAN_STD_FAST_PATH (SRC, a2b2g2r2, null, a2b2g2r2, armv6_composite_src_8_8), - PIXMAN_STD_FAST_PATH (SRC, c8, null, c8, armv6_composite_src_8_8), - PIXMAN_STD_FAST_PATH (SRC, g8, null, g8, armv6_composite_src_8_8), - PIXMAN_STD_FAST_PATH (SRC, x4a4, null, x4a4, armv6_composite_src_8_8), - PIXMAN_STD_FAST_PATH (SRC, x4c4, null, x4c4, armv6_composite_src_8_8), - PIXMAN_STD_FAST_PATH (SRC, x4g4, null, x4g4, armv6_composite_src_8_8), - - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, armv6_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, armv6_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, armv6_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, armv6_composite_src_0565_8888), - - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, armv6_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, armv6_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, armv6_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, armv6_composite_src_x888_0565), - - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, armv6_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, armv6_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, armv6_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888), - - PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888), - PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, armv6_composite_over_reverse_n_8888), - PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, armv6_composite_over_reverse_n_8888), - - PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8), - - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888), - - PIXMAN_STD_FAST_PATH (IN_REVERSE, a8r8g8b8, null, a8r8g8b8, armv6_composite_in_reverse_8888_8888), - PIXMAN_STD_FAST_PATH (IN_REVERSE, a8r8g8b8, null, x8r8g8b8, armv6_composite_in_reverse_8888_8888), - PIXMAN_STD_FAST_PATH (IN_REVERSE, a8b8g8r8, null, a8b8g8r8, armv6_composite_in_reverse_8888_8888), - PIXMAN_STD_FAST_PATH (IN_REVERSE, a8b8g8r8, null, x8b8g8r8, armv6_composite_in_reverse_8888_8888), - - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, armv6_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, armv6_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, armv6_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, armv6_composite_over_n_8888_8888_ca), - - SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565), - SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565), - - SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888), - SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888), - SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888), - SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888), - SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888), - SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888), - - { PIXMAN_OP_NONE }, -}; - -pixman_implementation_t * -_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback) -{ - pixman_implementation_t *imp = _pixman_implementation_create (fallback, arm_simd_fast_paths); - - imp->blt = arm_simd_blt; - imp->fill = arm_simd_fill; - - return imp; -} diff --git a/vendor/pixman/pixman/pixman-arm.c b/vendor/pixman/pixman/pixman-arm.c deleted file mode 100644 index 288172b62..000000000 --- a/vendor/pixman/pixman/pixman-arm.c +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "pixman-private.h" - -typedef enum -{ - ARM_V7 = (1 << 0), - ARM_V6 = (1 << 1), - ARM_VFP = (1 << 2), - ARM_NEON = (1 << 3), - ARM_IWMMXT = (1 << 4) -} arm_cpu_features_t; - -#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT) - -#if defined(_MSC_VER) - -/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */ -#include - -extern int pixman_msvc_try_arm_neon_op (); -extern int pixman_msvc_try_arm_simd_op (); - -static arm_cpu_features_t -detect_cpu_features (void) -{ - arm_cpu_features_t features = 0; - - __try - { - pixman_msvc_try_arm_simd_op (); - features |= ARM_V6; - } - __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) - { - } - - __try - { - pixman_msvc_try_arm_neon_op (); - features |= ARM_NEON; - } - __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) - { - } - - return features; -} - -#elif defined(__APPLE__) && defined(TARGET_OS_IPHONE) /* iOS */ - -#include "TargetConditionals.h" - -static arm_cpu_features_t -detect_cpu_features (void) -{ - arm_cpu_features_t features = 0; - - features |= ARM_V6; - - /* Detection of ARM NEON on iOS is fairly simple because iOS binaries - * contain separate executable images for each processor architecture. - * So all we have to do is detect the armv7 architecture build. The - * operating system automatically runs the armv7 binary for armv7 devices - * and the armv6 binary for armv6 devices. - */ -#if defined(__ARM_NEON__) - features |= ARM_NEON; -#endif - - return features; -} - -#elif defined(__ANDROID__) || defined(ANDROID) /* Android */ - -#include - -static arm_cpu_features_t -detect_cpu_features (void) -{ - arm_cpu_features_t features = 0; - AndroidCpuFamily cpu_family; - uint64_t cpu_features; - - cpu_family = android_getCpuFamily(); - cpu_features = android_getCpuFeatures(); - - if (cpu_family == ANDROID_CPU_FAMILY_ARM) - { - if (cpu_features & ANDROID_CPU_ARM_FEATURE_ARMv7) - features |= ARM_V7; - - if (cpu_features & ANDROID_CPU_ARM_FEATURE_VFPv3) - features |= ARM_VFP; - - if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) - features |= ARM_NEON; - } - - return features; -} - -#elif defined (__linux__) /* linux ELF */ - -#include -#include -#include -#include -#include -#include -#include - -static arm_cpu_features_t -detect_cpu_features (void) -{ - arm_cpu_features_t features = 0; - Elf32_auxv_t aux; - int fd; - - fd = open ("/proc/self/auxv", O_RDONLY); - if (fd >= 0) - { - while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) - { - if (aux.a_type == AT_HWCAP) - { - uint32_t hwcap = aux.a_un.a_val; - - /* hardcode these values to avoid depending on specific - * versions of the hwcap header, e.g. HWCAP_NEON - */ - if ((hwcap & 64) != 0) - features |= ARM_VFP; - if ((hwcap & 512) != 0) - features |= ARM_IWMMXT; - /* this flag is only present on kernel 2.6.29 */ - if ((hwcap & 4096) != 0) - features |= ARM_NEON; - } - else if (aux.a_type == AT_PLATFORM) - { - const char *plat = (const char*) aux.a_un.a_val; - - if (strncmp (plat, "v7l", 3) == 0) - features |= (ARM_V7 | ARM_V6); - else if (strncmp (plat, "v6l", 3) == 0) - features |= ARM_V6; - } - } - close (fd); - } - - return features; -} - -#elif defined (_3DS) /* 3DS homebrew (devkitARM) */ - -static arm_cpu_features_t -detect_cpu_features (void) -{ - arm_cpu_features_t features = 0; - - features |= ARM_V6; - - return features; -} - -#elif defined (PSP2) || defined (__SWITCH__) -/* Vita (VitaSDK) or Switch (devkitA64) homebrew */ - -static arm_cpu_features_t -detect_cpu_features (void) -{ - arm_cpu_features_t features = 0; - - features |= ARM_NEON; - - return features; -} - -#else /* Unknown */ - -static arm_cpu_features_t -detect_cpu_features (void) -{ - return 0; -} - -#endif /* Linux elf */ - -static pixman_bool_t -have_feature (arm_cpu_features_t feature) -{ - static pixman_bool_t initialized; - static arm_cpu_features_t features; - - if (!initialized) - { - features = detect_cpu_features(); - initialized = TRUE; - } - - return (features & feature) == feature; -} - -#endif /* USE_ARM_SIMD || USE_ARM_NEON || USE_ARM_IWMMXT */ - -pixman_implementation_t * -_pixman_arm_get_implementations (pixman_implementation_t *imp) -{ -#ifdef USE_ARM_SIMD - if (!_pixman_disabled ("arm-simd") && have_feature (ARM_V6)) - imp = _pixman_implementation_create_arm_simd (imp); -#endif - -#ifdef USE_ARM_IWMMXT - if (!_pixman_disabled ("arm-iwmmxt") && have_feature (ARM_IWMMXT)) - imp = _pixman_implementation_create_mmx (imp); -#endif - -#ifdef USE_ARM_NEON - if (!_pixman_disabled ("arm-neon") && have_feature (ARM_NEON)) - imp = _pixman_implementation_create_arm_neon (imp); -#endif - -#ifdef USE_ARM_A64_NEON - /* neon is a part of aarch64 */ - if (!_pixman_disabled ("arm-neon")) - imp = _pixman_implementation_create_arm_neon (imp); -#endif - - return imp; -} diff --git a/vendor/pixman/pixman/pixman-arma64-neon-asm-bilinear.S b/vendor/pixman/pixman/pixman-arma64-neon-asm-bilinear.S deleted file mode 100644 index 31d103d1d..000000000 --- a/vendor/pixman/pixman/pixman-arma64-neon-asm-bilinear.S +++ /dev/null @@ -1,1275 +0,0 @@ -/* - * Copyright © 2011 SCore Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) - * Author: Taekyun Kim (tkq.kim@samsung.com) - */ - -/* - * This file contains scaled bilinear scanline functions implemented - * using older siarhei's bilinear macro template. - * - * << General scanline function procedures >> - * 1. bilinear interpolate source pixels - * 2. load mask pixels - * 3. load destination pixels - * 4. duplicate mask to fill whole register - * 5. interleave source & destination pixels - * 6. apply mask to source pixels - * 7. combine source & destination pixels - * 8, Deinterleave final result - * 9. store destination pixels - * - * All registers with single number (i.e. src0, tmp0) are 64-bits registers. - * Registers with double numbers(src01, dst01) are 128-bits registers. - * All temp registers can be used freely outside the code block. - * Assume that symbol(register .req) OUT and MASK are defined at caller of these macro blocks. - * - * Remarks - * There can be lots of pipeline stalls inside code block and between code blocks. - * Further optimizations will be done by new macro templates using head/tail_head/tail scheme. - */ - -/* Prevent the stack from becoming executable for no reason... */ -#if defined(__linux__) && defined (__ELF__) -.section .note.GNU-stack,"",%progbits -#endif - -.text -.arch armv8-a -.altmacro -.p2align 2 - -#include "pixman-private.h" -#include "pixman-arm-asm.h" -#include "pixman-arma64-neon-asm.h" - -/* - * Bilinear macros from pixman-arm-neon-asm.S - */ - -/* - * Bilinear scaling support code which tries to provide pixel fetching, color - * format conversion, and interpolation as separate macros which can be used - * as the basic building blocks for constructing bilinear scanline functions. - */ - -.macro bilinear_load_8888 reg1, reg2, tmp - asr WTMP1, X, #16 - add X, X, UX - add TMP1, TOP, TMP1, lsl #2 - ld1 {®1&.2s}, [TMP1], STRIDE - ld1 {®2&.2s}, [TMP1] -.endm - -.macro bilinear_load_0565 reg1, reg2, tmp - asr WTMP1, X, #16 - add X, X, UX - add TMP1, TOP, TMP1, lsl #1 - ld1 {®2&.s}[0], [TMP1], STRIDE - ld1 {®2&.s}[1], [TMP1] - convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp -.endm - -.macro bilinear_load_and_vertical_interpolate_two_8888 \ - acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2 - - bilinear_load_8888 reg1, reg2, tmp1 - umull &acc1&.8h, ®1&.8b, v28.8b - umlal &acc1&.8h, ®2&.8b, v29.8b - bilinear_load_8888 reg3, reg4, tmp2 - umull &acc2&.8h, ®3&.8b, v28.8b - umlal &acc2&.8h, ®4&.8b, v29.8b -.endm - -.macro bilinear_load_and_vertical_interpolate_four_8888 \ - xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ - yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi - - bilinear_load_and_vertical_interpolate_two_8888 \ - xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi - bilinear_load_and_vertical_interpolate_two_8888 \ - yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi -.endm - -.macro vzip reg1, reg2 - zip1 v24.8b, reg1, reg2 - zip2 reg2, reg1, reg2 - mov reg1, v24.8b -.endm - -.macro vuzp reg1, reg2 - uzp1 v24.8b, reg1, reg2 - uzp2 reg2, reg1, reg2 - mov reg1, v24.8b -.endm - -.macro bilinear_load_and_vertical_interpolate_two_0565 \ - acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi - asr WTMP1, X, #16 - add X, X, UX - add TMP1, TOP, TMP1, lsl #1 - asr WTMP2, X, #16 - add X, X, UX - add TMP2, TOP, TMP2, lsl #1 - ld1 {&acc2&.s}[0], [TMP1], STRIDE - ld1 {&acc2&.s}[2], [TMP2], STRIDE - ld1 {&acc2&.s}[1], [TMP1] - ld1 {&acc2&.s}[3], [TMP2] - convert_0565_to_x888 acc2, reg3, reg2, reg1 - vzip ®1&.8b, ®3&.8b - vzip ®2&.8b, ®4&.8b - vzip ®3&.8b, ®4&.8b - vzip ®1&.8b, ®2&.8b - umull &acc1&.8h, ®1&.8b, v28.8b - umlal &acc1&.8h, ®2&.8b, v29.8b - umull &acc2&.8h, ®3&.8b, v28.8b - umlal &acc2&.8h, ®4&.8b, v29.8b -.endm - -.macro bilinear_load_and_vertical_interpolate_four_0565 \ - xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ - yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi - - asr WTMP1, X, #16 - add X, X, UX - add TMP1, TOP, TMP1, lsl #1 - asr WTMP2, X, #16 - add X, X, UX - add TMP2, TOP, TMP2, lsl #1 - ld1 {&xacc2&.s}[0], [TMP1], STRIDE - ld1 {&xacc2&.s}[2], [TMP2], STRIDE - ld1 {&xacc2&.s}[1], [TMP1] - ld1 {&xacc2&.s}[3], [TMP2] - convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1 - asr WTMP1, X, #16 - add X, X, UX - add TMP1, TOP, TMP1, lsl #1 - asr WTMP2, X, #16 - add X, X, UX - add TMP2, TOP, TMP2, lsl #1 - ld1 {&yacc2&.s}[0], [TMP1], STRIDE - vzip &xreg1&.8b, &xreg3&.8b - ld1 {&yacc2&.s}[2], [TMP2], STRIDE - vzip &xreg2&.8b, &xreg4&.8b - ld1 {&yacc2&.s}[1], [TMP1] - vzip &xreg3&.8b, &xreg4&.8b - ld1 {&yacc2&.s}[3], [TMP2] - vzip &xreg1&.8b, &xreg2&.8b - convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1 - umull &xacc1&.8h, &xreg1&.8b, v28.8b - vzip &yreg1&.8b, &yreg3&.8b - umlal &xacc1&.8h, &xreg2&.8b, v29.8b - vzip &yreg2&.8b, &yreg4&.8b - umull &xacc2&.8h, &xreg3&.8b, v28.8b - vzip &yreg3&.8b, &yreg4&.8b - umlal &xacc2&.8h, &xreg4&.8b, v29.8b - vzip &yreg1&.8b, &yreg2&.8b - umull &yacc1&.8h, &yreg1&.8b, v28.8b - umlal &yacc1&.8h, &yreg2&.8b, v29.8b - umull &yacc2&.8h, &yreg3&.8b, v28.8b - umlal &yacc2&.8h, &yreg4&.8b, v29.8b -.endm - -.macro bilinear_store_8888 numpix, tmp1, tmp2 -.if numpix == 4 - st1 {v0.2s, v1.2s}, [OUT], #16 -.elseif numpix == 2 - st1 {v0.2s}, [OUT], #8 -.elseif numpix == 1 - st1 {v0.s}[0], [OUT], #4 -.else - .error bilinear_store_8888 numpix is unsupported -.endif -.endm - -.macro bilinear_store_0565 numpix, tmp1, tmp2 - vuzp v0.8b, v1.8b - vuzp v2.8b, v3.8b - vuzp v1.8b, v3.8b - vuzp v0.8b, v2.8b - convert_8888_to_0565 v2, v1, v0, v1, tmp1, tmp2 -.if numpix == 4 - st1 {v1.4h}, [OUT], #8 -.elseif numpix == 2 - st1 {v1.s}[0], [OUT], #4 -.elseif numpix == 1 - st1 {v1.h}[0], [OUT], #2 -.else - .error bilinear_store_0565 numpix is unsupported -.endif -.endm - - -/* - * Macros for loading mask pixels into register 'mask'. - * dup must be done in somewhere else. - */ -.macro bilinear_load_mask_x numpix, mask -.endm - -.macro bilinear_load_mask_8 numpix, mask -.if numpix == 4 - ld1 {&mask&.s}[0], [MASK], #4 -.elseif numpix == 2 - ld1 {&mask&.h}[0], [MASK], #2 -.elseif numpix == 1 - ld1 {&mask&.b}[0], [MASK], #1 -.else - .error bilinear_load_mask_8 numpix is unsupported -.endif - prfm PREFETCH_MODE, [MASK, #prefetch_offset] -.endm - -.macro bilinear_load_mask mask_fmt, numpix, mask - bilinear_load_mask_&mask_fmt numpix, mask -.endm - - -/* - * Macros for loading destination pixels into register 'dst0' and 'dst1'. - * Interleave should be done somewhere else. - */ -.macro bilinear_load_dst_0565_src numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_load_dst_8888_src numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_load_dst_8888 numpix, dst0, dst1, dst01 -.if numpix == 4 - ld1 {&dst0&.2s, &dst1&.2s}, [OUT] -.elseif numpix == 2 - ld1 {&dst0&.2s}, [OUT] -.elseif numpix == 1 - ld1 {&dst0&.s}[0], [OUT] -.else - .error bilinear_load_dst_8888 numpix is unsupported -.endif - mov &dst01&.d[0], &dst0&.d[0] - mov &dst01&.d[1], &dst1&.d[0] - prfm PREFETCH_MODE, [OUT, #(prefetch_offset * 4)] -.endm - -.macro bilinear_load_dst_8888_over numpix, dst0, dst1, dst01 - bilinear_load_dst_8888 numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_load_dst_8888_add numpix, dst0, dst1, dst01 - bilinear_load_dst_8888 numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_load_dst dst_fmt, op, numpix, dst0, dst1, dst01 - bilinear_load_dst_&dst_fmt&_&op numpix, dst0, dst1, dst01 -.endm - -/* - * Macros for duplicating partially loaded mask to fill entire register. - * We will apply mask to interleaved source pixels, that is - * (r0, r1, r2, r3, g0, g1, g2, g3) x (m0, m1, m2, m3, m0, m1, m2, m3) - * (b0, b1, b2, b3, a0, a1, a2, a3) x (m0, m1, m2, m3, m0, m1, m2, m3) - * So, we need to duplicate loaded mask into whole register. - * - * For two pixel case - * (r0, r1, x, x, g0, g1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1) - * (b0, b1, x, x, a0, a1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1) - * We can do some optimizations for this including last pixel cases. - */ -.macro bilinear_duplicate_mask_x numpix, mask -.endm - -.macro bilinear_duplicate_mask_8 numpix, mask -.if numpix == 4 - dup &mask&.2s, &mask&.s[0] -.elseif numpix == 2 - dup &mask&.4h, &mask&.h[0] -.elseif numpix == 1 - dup &mask&.8b, &mask&.b[0] -.else - .error bilinear_duplicate_mask_8 is unsupported -.endif -.endm - -.macro bilinear_duplicate_mask mask_fmt, numpix, mask - bilinear_duplicate_mask_&mask_fmt numpix, mask -.endm - -/* - * Macros for interleaving src and dst pixels to rrrr gggg bbbb aaaa form. - * Interleave should be done when maks is enabled or operator is 'over'. - */ -.macro bilinear_interleave src0, src1, src01, dst0, dst1, dst01 - vuzp &src0&.8b, &src1&.8b - vuzp &dst0&.8b, &dst1&.8b - vuzp &src0&.8b, &src1&.8b - vuzp &dst0&.8b, &dst1&.8b - mov &src01&.d[1], &src1&.d[0] - mov &src01&.d[0], &src0&.d[0] - mov &dst01&.d[1], &dst1&.d[0] - mov &dst01&.d[0], &dst0&.d[0] -.endm - -.macro bilinear_interleave_src_dst_x_src \ - numpix, src0, src1, src01, dst0, dst1, dst01 -.endm - -.macro bilinear_interleave_src_dst_x_over \ - numpix, src0, src1, src01, dst0, dst1, dst01 - - bilinear_interleave src0, src1, src01, dst0, dst1, dst01 -.endm - -.macro bilinear_interleave_src_dst_x_add \ - numpix, src0, src1, src01, dst0, dst1, dst01 - bilinear_interleave src0, src1, src01, dst0, dst1, dst01 -.endm - -.macro bilinear_interleave_src_dst_8_src \ - numpix, src0, src1, src01, dst0, dst1, dst01 - - bilinear_interleave src0, src1, src01, dst0, dst1, dst01 -.endm - -.macro bilinear_interleave_src_dst_8_over \ - numpix, src0, src1, src01, dst0, dst1, dst01 - - bilinear_interleave src0, src1, src01, dst0, dst1, dst01 -.endm - -.macro bilinear_interleave_src_dst_8_add \ - numpix, src0, src1, src01, dst0, dst1, dst01 - - bilinear_interleave src0, src1, src01, dst0, dst1, dst01 -.endm - -.macro bilinear_interleave_src_dst \ - mask_fmt, op, numpix, src0, src1, src01, dst0, dst1, dst01 - - bilinear_interleave_src_dst_&mask_fmt&_&op \ - numpix, src0, src1, src01, dst0, dst1, dst01 -.endm - - -/* - * Macros for applying masks to src pixels. (see combine_mask_u() function) - * src, dst should be in interleaved form. - * mask register should be in form (m0, m1, m2, m3). - */ -.macro bilinear_apply_mask_to_src_x \ - numpix, src0, src1, src01, mask, \ - tmp01, tmp23, tmp45, tmp67 -.endm - -.macro bilinear_apply_mask_to_src_8 \ - numpix, src0, src1, src01, mask, \ - tmp01, tmp23, tmp45, tmp67 - - umull &tmp01&.8h, &src0&.8b, &mask&.8b - umull &tmp23&.8h, &src1&.8b, &mask&.8b - /* bubbles */ - urshr &tmp45&.8h, &tmp01&.8h, #8 - urshr &tmp67&.8h, &tmp23&.8h, #8 - /* bubbles */ - raddhn &src0&.8b, &tmp45&.8h, &tmp01&.8h - raddhn &src1&.8b, &tmp67&.8h, &tmp23&.8h - mov &src01&.d[0], &src0&.d[0] - mov &src01&.d[1], &src1&.d[0] -.endm - -.macro bilinear_apply_mask_to_src \ - mask_fmt, numpix, src0, src1, src01, mask, \ - tmp01, tmp23, tmp45, tmp67 - - bilinear_apply_mask_to_src_&mask_fmt \ - numpix, src0, src1, src01, mask, \ - tmp01, tmp23, tmp45, tmp67 -.endm - - -/* - * Macros for combining src and destination pixels. - * Interleave or not is depending on operator 'op'. - */ -.macro bilinear_combine_src \ - numpix, src0, src1, src01, dst0, dst1, dst01, \ - tmp01, tmp23, tmp45, tmp67, tmp8 -.endm - -.macro bilinear_combine_over \ - numpix, src0, src1, src01, dst0, dst1, dst01, \ - tmp01, tmp23, tmp45, tmp67, tmp8 - - dup &tmp8&.2s, &src1&.s[1] - /* bubbles */ - mvn &tmp8&.8b, &tmp8&.8b - /* bubbles */ - umull &tmp01&.8h, &dst0&.8b, &tmp8&.8b - /* bubbles */ - umull &tmp23&.8h, &dst1&.8b, &tmp8&.8b - /* bubbles */ - urshr &tmp45&.8h, &tmp01&.8h, #8 - urshr &tmp67&.8h, &tmp23&.8h, #8 - /* bubbles */ - raddhn &dst0&.8b, &tmp45&.8h, &tmp01&.8h - raddhn &dst1&.8b, &tmp67&.8h, &tmp23&.8h - mov &dst01&.d[0], &dst0&.d[0] - mov &dst01&.d[1], &dst1&.d[0] - /* bubbles */ - uqadd &src0&.8b, &dst0&.8b, &src0&.8b - uqadd &src1&.8b, &dst1&.8b, &src1&.8b - mov &src01&.d[0], &src0&.d[0] - mov &src01&.d[1], &src1&.d[0] -.endm - -.macro bilinear_combine_add \ - numpix, src0, src1, src01, dst0, dst1, dst01, \ - tmp01, tmp23, tmp45, tmp67, tmp8 - - uqadd &src0&.8b, &dst0&.8b, &src0&.8b - uqadd &src1&.8b, &dst1&.8b, &src1&.8b - mov &src01&.d[0], &src0&.d[0] - mov &src01&.d[1], &src1&.d[0] -.endm - -.macro bilinear_combine \ - op, numpix, src0, src1, src01, dst0, dst1, dst01, \ - tmp01, tmp23, tmp45, tmp67, tmp8 - - bilinear_combine_&op \ - numpix, src0, src1, src01, dst0, dst1, dst01, \ - tmp01, tmp23, tmp45, tmp67, tmp8 -.endm - -/* - * Macros for final deinterleaving of destination pixels if needed. - */ -.macro bilinear_deinterleave numpix, dst0, dst1, dst01 - vuzp &dst0&.8b, &dst1&.8b - /* bubbles */ - vuzp &dst0&.8b, &dst1&.8b - mov &dst01&.d[0], &dst0&.d[0] - mov &dst01&.d[1], &dst1&.d[0] -.endm - -.macro bilinear_deinterleave_dst_x_src numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_deinterleave_dst_x_over numpix, dst0, dst1, dst01 - bilinear_deinterleave numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_deinterleave_dst_x_add numpix, dst0, dst1, dst01 - bilinear_deinterleave numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_deinterleave_dst_8_src numpix, dst0, dst1, dst01 - bilinear_deinterleave numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_deinterleave_dst_8_over numpix, dst0, dst1, dst01 - bilinear_deinterleave numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_deinterleave_dst_8_add numpix, dst0, dst1, dst01 - bilinear_deinterleave numpix, dst0, dst1, dst01 -.endm - -.macro bilinear_deinterleave_dst mask_fmt, op, numpix, dst0, dst1, dst01 - bilinear_deinterleave_dst_&mask_fmt&_&op numpix, dst0, dst1, dst01 -.endm - - -.macro bilinear_interpolate_last_pixel src_fmt, mask_fmt, dst_fmt, op - bilinear_load_&src_fmt v0, v1, v2 - bilinear_load_mask mask_fmt, 1, v4 - bilinear_load_dst dst_fmt, op, 1, v18, v19, v9 - umull v2.8h, v0.8b, v28.8b - umlal v2.8h, v1.8b, v29.8b - /* 5 cycles bubble */ - ushll v0.4s, v2.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v0.4s, v2.4h, v15.h[0] - umlal2 v0.4s, v2.8h, v15.h[0] - /* 5 cycles bubble */ - bilinear_duplicate_mask mask_fmt, 1, v4 - shrn v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - /* 3 cycles bubble */ - xtn v0.8b, v0.8h - /* 1 cycle bubble */ - bilinear_interleave_src_dst \ - mask_fmt, op, 1, v0, v1, v0, v18, v19, v9 - bilinear_apply_mask_to_src \ - mask_fmt, 1, v0, v1, v0, v4, \ - v3, v8, v10, v11 - bilinear_combine \ - op, 1, v0, v1, v0, v18, v19, v9, \ - v3, v8, v10, v11, v5 - bilinear_deinterleave_dst mask_fmt, op, 1, v0, v1, v0 - bilinear_store_&dst_fmt 1, v17, v18 -.endm - -.macro bilinear_interpolate_two_pixels src_fmt, mask_fmt, dst_fmt, op - bilinear_load_and_vertical_interpolate_two_&src_fmt \ - v1, v11, v18, v19, v20, v21, v22, v23 - bilinear_load_mask mask_fmt, 2, v4 - bilinear_load_dst dst_fmt, op, 2, v18, v19, v9 - ushll v0.4s, v1.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v0.4s, v1.4h, v15.h[0] - umlal2 v0.4s, v1.8h, v15.h[0] - ushll v10.4s, v11.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v10.4s, v11.4h, v15.h[4] - umlal2 v10.4s, v11.8h, v15.h[4] - shrn v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - shrn2 v0.8h, v10.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - bilinear_duplicate_mask mask_fmt, 2, v4 - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - add v12.8h, v12.8h, v13.8h - xtn v0.8b, v0.8h - bilinear_interleave_src_dst \ - mask_fmt, op, 2, v0, v1, v0, v18, v19, v9 - bilinear_apply_mask_to_src \ - mask_fmt, 2, v0, v1, v0, v4, \ - v3, v8, v10, v11 - bilinear_combine \ - op, 2, v0, v1, v0, v18, v19, v9, \ - v3, v8, v10, v11, v5 - bilinear_deinterleave_dst mask_fmt, op, 2, v0, v1, v0 - bilinear_store_&dst_fmt 2, v16, v17 -.endm - -.macro bilinear_interpolate_four_pixels src_fmt, mask_fmt, dst_fmt, op - bilinear_load_and_vertical_interpolate_four_&src_fmt \ - v1, v11, v4, v5, v6, v7, v22, v23 \ - v3, v9, v16, v17, v20, v21, v18, v19 - prfm PREFETCH_MODE, [TMP1, PF_OFFS] - sub TMP1, TMP1, STRIDE - prfm PREFETCH_MODE, [TMP1, PF_OFFS] - ushll v0.4s, v1.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v0.4s, v1.4h, v15.h[0] - umlal2 v0.4s, v1.8h, v15.h[0] - ushll v10.4s, v11.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v10.4s, v11.4h, v15.h[4] - umlal2 v10.4s, v11.8h, v15.h[4] - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - ushll v2.4s, v3.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v2.4s, v3.4h, v15.h[0] - umlal2 v2.4s, v3.8h, v15.h[0] - ushll v8.4s, v9.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v8.4s, v9.4h, v15.h[4] - umlal2 v8.4s, v9.8h, v15.h[4] - add v12.8h, v12.8h, v13.8h - shrn v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - shrn2 v0.8h, v10.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - shrn v2.4h, v2.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - shrn2 v2.8h, v8.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - bilinear_load_mask mask_fmt, 4, v4 - bilinear_duplicate_mask mask_fmt, 4, v4 - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - xtn v0.8b, v0.8h - xtn v1.8b, v2.8h - add v12.8h, v12.8h, v13.8h - bilinear_load_dst dst_fmt, op, 4, v2, v3, v21 - bilinear_interleave_src_dst \ - mask_fmt, op, 4, v0, v1, v0, v2, v3, v11 - bilinear_apply_mask_to_src \ - mask_fmt, 4, v0, v1, v0, v4, \ - v6, v8, v9, v10 - bilinear_combine \ - op, 4, v0, v1, v0, v2, v3, v1, \ - v6, v8, v9, v10, v23 - bilinear_deinterleave_dst mask_fmt, op, 4, v0, v1, v0 - bilinear_store_&dst_fmt 4, v6, v7 -.endm - -.set BILINEAR_FLAG_USE_MASK, 1 -.set BILINEAR_FLAG_USE_ALL_NEON_REGS, 2 - -/* - * Main template macro for generating NEON optimized bilinear scanline functions. - * - * Bilinear scanline generator macro take folling arguments: - * fname - name of the function to generate - * src_fmt - source color format (8888 or 0565) - * dst_fmt - destination color format (8888 or 0565) - * src/dst_bpp_shift - (1 << bpp_shift) is the size of src/dst pixel in bytes - * process_last_pixel - code block that interpolate one pixel and does not - * update horizontal weight - * process_two_pixels - code block that interpolate two pixels and update - * horizontal weight - * process_four_pixels - code block that interpolate four pixels and update - * horizontal weight - * process_pixblock_head - head part of middle loop - * process_pixblock_tail - tail part of middle loop - * process_pixblock_tail_head - tail_head of middle loop - * pixblock_size - number of pixels processed in a single middle loop - * prefetch_distance - prefetch in the source image by that many pixels ahead - */ - -.macro generate_bilinear_scanline_func \ - fname, \ - src_fmt, dst_fmt, src_bpp_shift, dst_bpp_shift, \ - bilinear_process_last_pixel, \ - bilinear_process_two_pixels, \ - bilinear_process_four_pixels, \ - bilinear_process_pixblock_head, \ - bilinear_process_pixblock_tail, \ - bilinear_process_pixblock_tail_head, \ - pixblock_size, \ - prefetch_distance, \ - flags - -pixman_asm_function fname -.if pixblock_size == 8 -.elseif pixblock_size == 4 -.else - .error unsupported pixblock size -.endif - -.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0 - OUT .req x0 - TOP .req x1 - BOTTOM .req x2 - WT .req x3 - WWT .req w3 - WB .req x4 - WWB .req w4 - X .req w5 - UX .req w6 - WIDTH .req x7 - TMP1 .req x10 - WTMP1 .req w10 - TMP2 .req x11 - WTMP2 .req w11 - PF_OFFS .req x12 - TMP3 .req x13 - WTMP3 .req w13 - TMP4 .req x14 - WTMP4 .req w14 - STRIDE .req x15 - DUMMY .req x30 - - stp x29, x30, [sp, -16]! - mov x29, sp - sub sp, sp, 112 - sub x29, x29, 64 - st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32 - st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32 - stp x10, x11, [x29, -80] - stp x12, x13, [x29, -96] - stp x14, x15, [x29, -112] -.else - OUT .req x0 - MASK .req x1 - TOP .req x2 - BOTTOM .req x3 - WT .req x4 - WWT .req w4 - WB .req x5 - WWB .req w5 - X .req w6 - UX .req w7 - WIDTH .req x8 - TMP1 .req x10 - WTMP1 .req w10 - TMP2 .req x11 - WTMP2 .req w11 - PF_OFFS .req x12 - TMP3 .req x13 - WTMP3 .req w13 - TMP4 .req x14 - WTMP4 .req w14 - STRIDE .req x15 - DUMMY .req x30 - - .set prefetch_offset, prefetch_distance - - stp x29, x30, [sp, -16]! - mov x29, sp - sub x29, x29, 64 - st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32 - st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32 - stp x10, x11, [x29, -80] - stp x12, x13, [x29, -96] - stp x14, x15, [x29, -112] - str x8, [x29, -120] - ldr w8, [x29, 16] - sub sp, sp, 120 -.endif - - mov WTMP1, #prefetch_distance - umull PF_OFFS, WTMP1, UX - - sub STRIDE, BOTTOM, TOP - .unreq BOTTOM - - cmp WIDTH, #0 - ble 300f - - dup v12.8h, X - dup v13.8h, UX - dup v28.8b, WWT - dup v29.8b, WWB - mov v25.d[0], v12.d[1] - mov v26.d[0], v13.d[0] - add v25.4h, v25.4h, v26.4h - mov v12.d[1], v25.d[0] - - /* ensure good destination alignment */ - cmp WIDTH, #1 - blt 100f - tst OUT, #(1 << dst_bpp_shift) - beq 100f - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - add v12.8h, v12.8h, v13.8h - bilinear_process_last_pixel - sub WIDTH, WIDTH, #1 -100: - add v13.8h, v13.8h, v13.8h - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - add v12.8h, v12.8h, v13.8h - - cmp WIDTH, #2 - blt 100f - tst OUT, #(1 << (dst_bpp_shift + 1)) - beq 100f - bilinear_process_two_pixels - sub WIDTH, WIDTH, #2 -100: -.if pixblock_size == 8 - cmp WIDTH, #4 - blt 100f - tst OUT, #(1 << (dst_bpp_shift + 2)) - beq 100f - bilinear_process_four_pixels - sub WIDTH, WIDTH, #4 -100: -.endif - subs WIDTH, WIDTH, #pixblock_size - blt 100f - asr PF_OFFS, PF_OFFS, #(16 - src_bpp_shift) - bilinear_process_pixblock_head - subs WIDTH, WIDTH, #pixblock_size - blt 500f -0: - bilinear_process_pixblock_tail_head - subs WIDTH, WIDTH, #pixblock_size - bge 0b -500: - bilinear_process_pixblock_tail -100: -.if pixblock_size == 8 - tst WIDTH, #4 - beq 200f - bilinear_process_four_pixels -200: -.endif - /* handle the remaining trailing pixels */ - tst WIDTH, #2 - beq 200f - bilinear_process_two_pixels -200: - tst WIDTH, #1 - beq 300f - bilinear_process_last_pixel -300: - -.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0 - sub x29, x29, 64 - ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32 - ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32 - ldp x10, x11, [x29, -80] - ldp x12, x13, [x29, -96] - ldp x14, x15, [x29, -112] - mov sp, x29 - ldp x29, x30, [sp], 16 -.else - sub x29, x29, 64 - ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32 - ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32 - ldp x10, x11, [x29, -80] - ldp x12, x13, [x29, -96] - ldp x14, x15, [x29, -112] - ldr x8, [x29, -120] - mov sp, x29 - ldp x29, x30, [sp], 16 -.endif - ret - - .unreq OUT - .unreq TOP - .unreq WT - .unreq WWT - .unreq WB - .unreq WWB - .unreq X - .unreq UX - .unreq WIDTH - .unreq TMP1 - .unreq WTMP1 - .unreq TMP2 - .unreq PF_OFFS - .unreq TMP3 - .unreq TMP4 - .unreq STRIDE -.if ((flags) & BILINEAR_FLAG_USE_MASK) != 0 - .unreq MASK -.endif - -.endfunc - -.endm - -/* src_8888_8_8888 */ -.macro bilinear_src_8888_8_8888_process_last_pixel - bilinear_interpolate_last_pixel 8888, 8, 8888, src -.endm - -.macro bilinear_src_8888_8_8888_process_two_pixels - bilinear_interpolate_two_pixels 8888, 8, 8888, src -.endm - -.macro bilinear_src_8888_8_8888_process_four_pixels - bilinear_interpolate_four_pixels 8888, 8, 8888, src -.endm - -.macro bilinear_src_8888_8_8888_process_pixblock_head - bilinear_src_8888_8_8888_process_four_pixels -.endm - -.macro bilinear_src_8888_8_8888_process_pixblock_tail -.endm - -.macro bilinear_src_8888_8_8888_process_pixblock_tail_head - bilinear_src_8888_8_8888_process_pixblock_tail - bilinear_src_8888_8_8888_process_pixblock_head -.endm - -/* src_8888_8_0565 */ -.macro bilinear_src_8888_8_0565_process_last_pixel - bilinear_interpolate_last_pixel 8888, 8, 0565, src -.endm - -.macro bilinear_src_8888_8_0565_process_two_pixels - bilinear_interpolate_two_pixels 8888, 8, 0565, src -.endm - -.macro bilinear_src_8888_8_0565_process_four_pixels - bilinear_interpolate_four_pixels 8888, 8, 0565, src -.endm - -.macro bilinear_src_8888_8_0565_process_pixblock_head - bilinear_src_8888_8_0565_process_four_pixels -.endm - -.macro bilinear_src_8888_8_0565_process_pixblock_tail -.endm - -.macro bilinear_src_8888_8_0565_process_pixblock_tail_head - bilinear_src_8888_8_0565_process_pixblock_tail - bilinear_src_8888_8_0565_process_pixblock_head -.endm - -/* src_0565_8_x888 */ -.macro bilinear_src_0565_8_x888_process_last_pixel - bilinear_interpolate_last_pixel 0565, 8, 8888, src -.endm - -.macro bilinear_src_0565_8_x888_process_two_pixels - bilinear_interpolate_two_pixels 0565, 8, 8888, src -.endm - -.macro bilinear_src_0565_8_x888_process_four_pixels - bilinear_interpolate_four_pixels 0565, 8, 8888, src -.endm - -.macro bilinear_src_0565_8_x888_process_pixblock_head - bilinear_src_0565_8_x888_process_four_pixels -.endm - -.macro bilinear_src_0565_8_x888_process_pixblock_tail -.endm - -.macro bilinear_src_0565_8_x888_process_pixblock_tail_head - bilinear_src_0565_8_x888_process_pixblock_tail - bilinear_src_0565_8_x888_process_pixblock_head -.endm - -/* src_0565_8_0565 */ -.macro bilinear_src_0565_8_0565_process_last_pixel - bilinear_interpolate_last_pixel 0565, 8, 0565, src -.endm - -.macro bilinear_src_0565_8_0565_process_two_pixels - bilinear_interpolate_two_pixels 0565, 8, 0565, src -.endm - -.macro bilinear_src_0565_8_0565_process_four_pixels - bilinear_interpolate_four_pixels 0565, 8, 0565, src -.endm - -.macro bilinear_src_0565_8_0565_process_pixblock_head - bilinear_src_0565_8_0565_process_four_pixels -.endm - -.macro bilinear_src_0565_8_0565_process_pixblock_tail -.endm - -.macro bilinear_src_0565_8_0565_process_pixblock_tail_head - bilinear_src_0565_8_0565_process_pixblock_tail - bilinear_src_0565_8_0565_process_pixblock_head -.endm - -/* over_8888_8888 */ -.macro bilinear_over_8888_8888_process_last_pixel - bilinear_interpolate_last_pixel 8888, x, 8888, over -.endm - -.macro bilinear_over_8888_8888_process_two_pixels - bilinear_interpolate_two_pixels 8888, x, 8888, over -.endm - -.macro bilinear_over_8888_8888_process_four_pixels - bilinear_interpolate_four_pixels 8888, x, 8888, over -.endm - -.macro bilinear_over_8888_8888_process_pixblock_head - asr WTMP1, X, #16 - add X, X, UX - add TMP1, TOP, TMP1, lsl #2 - asr WTMP2, X, #16 - add X, X, UX - add TMP2, TOP, TMP2, lsl #2 - - ld1 {v22.2s}, [TMP1], STRIDE - ld1 {v23.2s}, [TMP1] - asr WTMP3, X, #16 - add X, X, UX - add TMP3, TOP, TMP3, lsl #2 - umull v8.8h, v22.8b, v28.8b - umlal v8.8h, v23.8b, v29.8b - - ld1 {v22.2s}, [TMP2], STRIDE - ld1 {v23.2s}, [TMP2] - asr WTMP4, X, #16 - add X, X, UX - add TMP4, TOP, TMP4, lsl #2 - umull v9.8h, v22.8b, v28.8b - umlal v9.8h, v23.8b, v29.8b - - ld1 {v22.2s}, [TMP3], STRIDE - ld1 {v23.2s}, [TMP3] - umull v10.8h, v22.8b, v28.8b - umlal v10.8h, v23.8b, v29.8b - - ushll v0.4s, v8.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v0.4s, v8.4h, v15.h[0] - umlal2 v0.4s, v8.8h, v15.h[0] - - prfm PREFETCH_MODE, [TMP4, PF_OFFS] - ld1 {v16.2s}, [TMP4], STRIDE - ld1 {v17.2s}, [TMP4] - prfm PREFETCH_MODE, [TMP4, PF_OFFS] - umull v11.8h, v16.8b, v28.8b - umlal v11.8h, v17.8b, v29.8b - - ushll v1.4s, v9.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v1.4s, v9.4h, v15.h[4] - umlal2 v1.4s, v9.8h, v15.h[4] - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - add v12.8h, v12.8h, v13.8h -.endm - -.macro bilinear_over_8888_8888_process_pixblock_tail - ushll v2.4s, v10.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v2.4s, v10.4h, v15.h[0] - umlal2 v2.4s, v10.8h, v15.h[0] - ushll v3.4s, v11.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v3.4s, v11.4h, v15.h[4] - umlal2 v3.4s, v11.8h, v15.h[4] - shrn v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - shrn2 v0.8h, v1.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - shrn v2.4h, v2.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - shrn2 v2.8h, v3.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - xtn v6.8b, v0.8h - xtn v7.8b, v2.8h - ld1 {v2.2s, v3.2s}, [OUT] - prfm PREFETCH_MODE, [OUT, #(prefetch_offset * 4)] - vuzp v6.8b, v7.8b - vuzp v2.8b, v3.8b - vuzp v6.8b, v7.8b - vuzp v2.8b, v3.8b - dup v4.2s, v7.s[1] - mvn v4.8b, v4.8b - umull v11.8h, v2.8b, v4.8b - umull v2.8h, v3.8b, v4.8b - urshr v1.8h, v11.8h, #8 - urshr v10.8h, v2.8h, #8 - raddhn v3.8b, v10.8h, v2.8h - raddhn v2.8b, v1.8h, v11.8h - uqadd v6.8b, v2.8b, v6.8b - uqadd v7.8b, v3.8b, v7.8b - vuzp v6.8b, v7.8b - vuzp v6.8b, v7.8b - add v12.8h, v12.8h, v13.8h - st1 {v6.2s, v7.2s}, [OUT], #16 -.endm - -.macro bilinear_over_8888_8888_process_pixblock_tail_head - ushll v2.4s, v10.4h, #BILINEAR_INTERPOLATION_BITS - asr WTMP1, X, #16 - add X, X, UX - add TMP1, TOP, TMP1, lsl #2 - umlsl v2.4s, v10.4h, v15.h[0] - asr WTMP2, X, #16 - add X, X, UX - add TMP2, TOP, TMP2, lsl #2 - umlal2 v2.4s, v10.8h, v15.h[0] - ushll v3.4s, v11.4h, #BILINEAR_INTERPOLATION_BITS - ld1 {v20.2s}, [TMP1], STRIDE - umlsl v3.4s, v11.4h, v15.h[4] - umlal2 v3.4s, v11.8h, v15.h[4] - ld1 {v21.2s}, [TMP1] - umull v8.8h, v20.8b, v28.8b - umlal v8.8h, v21.8b, v29.8b - shrn v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - shrn2 v0.8h, v1.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - shrn v2.4h, v2.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - ld1 {v22.2s}, [TMP2], STRIDE - shrn2 v2.8h, v3.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - xtn v6.8b, v0.8h - ld1 {v23.2s}, [TMP2] - umull v9.8h, v22.8b, v28.8b - asr WTMP3, X, #16 - add X, X, UX - add TMP3, TOP, TMP3, lsl #2 - asr WTMP4, X, #16 - add X, X, UX - add TMP4, TOP, TMP4, lsl #2 - umlal v9.8h, v23.8b, v29.8b - xtn v7.8b, v2.8h - ld1 {v2.2s, v3.2s}, [OUT] - prfm PREFETCH_MODE, [OUT, PF_OFFS] - ld1 {v22.2s}, [TMP3], STRIDE - vuzp v6.8b, v7.8b - vuzp v2.8b, v3.8b - vuzp v6.8b, v7.8b - vuzp v2.8b, v3.8b - dup v4.2s, v7.s[1] - ld1 {v23.2s}, [TMP3] - mvn v4.8b, v4.8b - umull v10.8h, v22.8b, v28.8b - umlal v10.8h, v23.8b, v29.8b - umull v11.8h, v2.8b, v4.8b - umull v2.8h, v3.8b, v4.8b - ushll v0.4s, v8.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v0.4s, v8.4h, v15.h[0] - urshr v1.8h, v11.8h, #8 - umlal2 v0.4s, v8.8h, v15.h[0] - urshr v8.8h, v2.8h, #8 - raddhn v3.8b, v8.8h, v2.8h - raddhn v2.8b, v1.8h, v11.8h - prfm PREFETCH_MODE, [TMP4, PF_OFFS] - ld1 {v16.2s}, [TMP4], STRIDE - uqadd v6.8b, v2.8b, v6.8b - uqadd v7.8b, v3.8b, v7.8b - ld1 {v17.2s}, [TMP4] - prfm PREFETCH_MODE, [TMP4, PF_OFFS] - umull v11.8h, v16.8b, v28.8b - umlal v11.8h, v17.8b, v29.8b - vuzp v6.8b, v7.8b - ushll v1.4s, v9.4h, #BILINEAR_INTERPOLATION_BITS - vuzp v6.8b, v7.8b - umlsl v1.4s, v9.4h, v15.h[4] - add v12.8h, v12.8h, v13.8h - umlal2 v1.4s, v9.8h, v15.h[4] - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - add v12.8h, v12.8h, v13.8h - st1 {v6.2s, v7.2s}, [OUT], #16 -.endm - -/* over_8888_8_8888 */ -.macro bilinear_over_8888_8_8888_process_last_pixel - bilinear_interpolate_last_pixel 8888, 8, 8888, over -.endm - -.macro bilinear_over_8888_8_8888_process_two_pixels - bilinear_interpolate_two_pixels 8888, 8, 8888, over -.endm - -.macro bilinear_over_8888_8_8888_process_four_pixels - bilinear_interpolate_two_pixels 8888, 8, 8888, over - bilinear_interpolate_two_pixels 8888, 8, 8888, over -.endm - -.macro bilinear_over_8888_8_8888_process_pixblock_head - bilinear_over_8888_8_8888_process_four_pixels -.endm - -.macro bilinear_over_8888_8_8888_process_pixblock_tail -.endm - -.macro bilinear_over_8888_8_8888_process_pixblock_tail_head - bilinear_over_8888_8_8888_process_pixblock_tail - bilinear_over_8888_8_8888_process_pixblock_head -.endm - -/* add_8888_8888 */ -.macro bilinear_add_8888_8888_process_last_pixel - bilinear_interpolate_last_pixel 8888, x, 8888, add -.endm - -.macro bilinear_add_8888_8888_process_two_pixels - bilinear_interpolate_two_pixels 8888, x, 8888, add -.endm - -.macro bilinear_add_8888_8888_process_four_pixels - bilinear_interpolate_two_pixels 8888, x, 8888, add - bilinear_interpolate_two_pixels 8888, x, 8888, add -.endm - -.macro bilinear_add_8888_8888_process_pixblock_head - bilinear_add_8888_8888_process_four_pixels -.endm - -.macro bilinear_add_8888_8888_process_pixblock_tail -.endm - -.macro bilinear_add_8888_8888_process_pixblock_tail_head - bilinear_add_8888_8888_process_pixblock_tail - bilinear_add_8888_8888_process_pixblock_head -.endm - -/* add_8888_8_8888 */ -.macro bilinear_add_8888_8_8888_process_last_pixel - bilinear_interpolate_last_pixel 8888, 8, 8888, add -.endm - -.macro bilinear_add_8888_8_8888_process_two_pixels - bilinear_interpolate_two_pixels 8888, 8, 8888, add -.endm - -.macro bilinear_add_8888_8_8888_process_four_pixels - bilinear_interpolate_four_pixels 8888, 8, 8888, add -.endm - -.macro bilinear_add_8888_8_8888_process_pixblock_head - bilinear_add_8888_8_8888_process_four_pixels -.endm - -.macro bilinear_add_8888_8_8888_process_pixblock_tail -.endm - -.macro bilinear_add_8888_8_8888_process_pixblock_tail_head - bilinear_add_8888_8_8888_process_pixblock_tail - bilinear_add_8888_8_8888_process_pixblock_head -.endm - - -/* Bilinear scanline functions */ -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_neon, \ - 8888, 8888, 2, 2, \ - bilinear_src_8888_8_8888_process_last_pixel, \ - bilinear_src_8888_8_8888_process_two_pixels, \ - bilinear_src_8888_8_8888_process_four_pixels, \ - bilinear_src_8888_8_8888_process_pixblock_head, \ - bilinear_src_8888_8_8888_process_pixblock_tail, \ - bilinear_src_8888_8_8888_process_pixblock_tail_head, \ - 4, 28, BILINEAR_FLAG_USE_MASK - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_neon, \ - 8888, 0565, 2, 1, \ - bilinear_src_8888_8_0565_process_last_pixel, \ - bilinear_src_8888_8_0565_process_two_pixels, \ - bilinear_src_8888_8_0565_process_four_pixels, \ - bilinear_src_8888_8_0565_process_pixblock_head, \ - bilinear_src_8888_8_0565_process_pixblock_tail, \ - bilinear_src_8888_8_0565_process_pixblock_tail_head, \ - 4, 28, BILINEAR_FLAG_USE_MASK - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_neon, \ - 0565, 8888, 1, 2, \ - bilinear_src_0565_8_x888_process_last_pixel, \ - bilinear_src_0565_8_x888_process_two_pixels, \ - bilinear_src_0565_8_x888_process_four_pixels, \ - bilinear_src_0565_8_x888_process_pixblock_head, \ - bilinear_src_0565_8_x888_process_pixblock_tail, \ - bilinear_src_0565_8_x888_process_pixblock_tail_head, \ - 4, 28, BILINEAR_FLAG_USE_MASK - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_neon, \ - 0565, 0565, 1, 1, \ - bilinear_src_0565_8_0565_process_last_pixel, \ - bilinear_src_0565_8_0565_process_two_pixels, \ - bilinear_src_0565_8_0565_process_four_pixels, \ - bilinear_src_0565_8_0565_process_pixblock_head, \ - bilinear_src_0565_8_0565_process_pixblock_tail, \ - bilinear_src_0565_8_0565_process_pixblock_tail_head, \ - 4, 28, BILINEAR_FLAG_USE_MASK - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_neon, \ - 8888, 8888, 2, 2, \ - bilinear_over_8888_8888_process_last_pixel, \ - bilinear_over_8888_8888_process_two_pixels, \ - bilinear_over_8888_8888_process_four_pixels, \ - bilinear_over_8888_8888_process_pixblock_head, \ - bilinear_over_8888_8888_process_pixblock_tail, \ - bilinear_over_8888_8888_process_pixblock_tail_head, \ - 4, 28, 0 - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_neon, \ - 8888, 8888, 2, 2, \ - bilinear_over_8888_8_8888_process_last_pixel, \ - bilinear_over_8888_8_8888_process_two_pixels, \ - bilinear_over_8888_8_8888_process_four_pixels, \ - bilinear_over_8888_8_8888_process_pixblock_head, \ - bilinear_over_8888_8_8888_process_pixblock_tail, \ - bilinear_over_8888_8_8888_process_pixblock_tail_head, \ - 4, 28, BILINEAR_FLAG_USE_MASK - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_neon, \ - 8888, 8888, 2, 2, \ - bilinear_add_8888_8888_process_last_pixel, \ - bilinear_add_8888_8888_process_two_pixels, \ - bilinear_add_8888_8888_process_four_pixels, \ - bilinear_add_8888_8888_process_pixblock_head, \ - bilinear_add_8888_8888_process_pixblock_tail, \ - bilinear_add_8888_8888_process_pixblock_tail_head, \ - 4, 28, 0 - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_neon, \ - 8888, 8888, 2, 2, \ - bilinear_add_8888_8_8888_process_last_pixel, \ - bilinear_add_8888_8_8888_process_two_pixels, \ - bilinear_add_8888_8_8888_process_four_pixels, \ - bilinear_add_8888_8_8888_process_pixblock_head, \ - bilinear_add_8888_8_8888_process_pixblock_tail, \ - bilinear_add_8888_8_8888_process_pixblock_tail_head, \ - 4, 28, BILINEAR_FLAG_USE_MASK diff --git a/vendor/pixman/pixman/pixman-arma64-neon-asm.S b/vendor/pixman/pixman/pixman-arma64-neon-asm.S deleted file mode 100644 index 774d98d38..000000000 --- a/vendor/pixman/pixman/pixman-arma64-neon-asm.S +++ /dev/null @@ -1,3704 +0,0 @@ -/* - * Copyright © 2009 Nokia Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) - */ - -/* - * This file contains implementations of NEON optimized pixel processing - * functions. There is no full and detailed tutorial, but some functions - * (those which are exposing some new or interesting features) are - * extensively commented and can be used as examples. - * - * You may want to have a look at the comments for following functions: - * - pixman_composite_over_8888_0565_asm_neon - * - pixman_composite_over_n_8_0565_asm_neon - */ - -/* Prevent the stack from becoming executable for no reason... */ -#if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits -#endif - -.text -.arch armv8-a - -.altmacro -.p2align 2 - -#include "pixman-private.h" -#include "pixman-arm-asm.h" -#include "pixman-arma64-neon-asm.h" - -/* Global configuration options and preferences */ - -/* - * The code can optionally make use of unaligned memory accesses to improve - * performance of handling leading/trailing pixels for each scanline. - * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for - * example in linux if unaligned memory accesses are not configured to - * generate.exceptions. - */ -.set RESPECT_STRICT_ALIGNMENT, 1 - -/* - * Set default prefetch type. There is a choice between the following options: - * - * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work - * as NOP to workaround some HW bugs or for whatever other reason) - * - * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where - * advanced prefetch intruduces heavy overhead) - * - * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8 - * which can run ARM and NEON instructions simultaneously so that extra ARM - * instructions do not add (many) extra cycles, but improve prefetch efficiency) - * - * Note: some types of function can't support advanced prefetch and fallback - * to simple one (those which handle 24bpp pixels) - */ -.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED - -/* Prefetch distance in pixels for simple prefetch */ -.set PREFETCH_DISTANCE_SIMPLE, 64 - -/* - * Implementation of pixman_composite_over_8888_0565_asm_neon - * - * This function takes a8r8g8b8 source buffer, r5g6b5 destination buffer and - * performs OVER compositing operation. Function fast_composite_over_8888_0565 - * from pixman-fast-path.c does the same in C and can be used as a reference. - * - * First we need to have some NEON assembly code which can do the actual - * operation on the pixels and provide it to the template macro. - * - * Template macro quite conveniently takes care of emitting all the necessary - * code for memory reading and writing (including quite tricky cases of - * handling unaligned leading/trailing pixels), so we only need to deal with - * the data in NEON registers. - * - * NEON registers allocation in general is recommented to be the following: - * v0, v1, v2, v3 - contain loaded source pixel data - * v4, v5, v6, v7 - contain loaded destination pixels (if they are needed) - * v24, v25, v26, v27 - contain loading mask pixel data (if mask is used) - * v28, v29, v30, v31 - place for storing the result (destination pixels) - * - * As can be seen above, four 64-bit NEON registers are used for keeping - * intermediate pixel data and up to 8 pixels can be processed in one step - * for 32bpp formats (16 pixels for 16bpp, 32 pixels for 8bpp). - * - * This particular function uses the following registers allocation: - * v0, v1, v2, v3 - contain loaded source pixel data - * v4, v5 - contain loaded destination pixels (they are needed) - * v28, v29 - place for storing the result (destination pixels) - */ - -/* - * Step one. We need to have some code to do some arithmetics on pixel data. - * This is implemented as a pair of macros: '*_head' and '*_tail'. When used - * back-to-back, they take pixel data from {v0, v1, v2, v3} and {v4, v5}, - * perform all the needed calculations and write the result to {v28, v29}. - * The rationale for having two macros and not just one will be explained - * later. In practice, any single monolitic function which does the work can - * be split into two parts in any arbitrary way without affecting correctness. - * - * There is one special trick here too. Common template macro can optionally - * make our life a bit easier by doing R, G, B, A color components - * deinterleaving for 32bpp pixel formats (and this feature is used in - * 'pixman_composite_over_8888_0565_asm_neon' function). So it means that - * instead of having 8 packed pixels in {v0, v1, v2, v3} registers, we - * actually use v0 register for blue channel (a vector of eight 8-bit - * values), v1 register for green, v2 for red and v3 for alpha. This - * simple conversion can be also done with a few NEON instructions: - * - * Packed to planar conversion: // vuzp8 is a wrapper macro - * vuzp8 v0, v1 - * vuzp8 v2, v3 - * vuzp8 v1, v3 - * vuzp8 v0, v2 - * - * Planar to packed conversion: // vzip8 is a wrapper macro - * vzip8 v0, v2 - * vzip8 v1, v3 - * vzip8 v2, v3 - * vzip8 v0, v1 - * - * But pixel can be loaded directly in planar format using LD4 / b NEON - * instruction. It is 1 cycle slower than LD1 / s, so this is not always - * desirable, that's why deinterleaving is optional. - * - * But anyway, here is the code: - */ - -.macro pixman_composite_over_8888_0565_process_pixblock_head - /* convert 8 r5g6b5 pixel data from {v4} to planar 8-bit format - and put data into v6 - red, v7 - green, v30 - blue */ - mov v4.d[1], v5.d[0] - shrn v6.8b, v4.8h, #8 - shrn v7.8b, v4.8h, #3 - sli v4.8h, v4.8h, #5 - sri v6.8b, v6.8b, #5 - mvn v3.8b, v3.8b /* invert source alpha */ - sri v7.8b, v7.8b, #6 - shrn v30.8b, v4.8h, #2 - /* now do alpha blending, storing results in 8-bit planar format - into v20 - red, v23 - green, v22 - blue */ - umull v10.8h, v3.8b, v6.8b - umull v11.8h, v3.8b, v7.8b - umull v12.8h, v3.8b, v30.8b - urshr v17.8h, v10.8h, #8 - urshr v18.8h, v11.8h, #8 - urshr v19.8h, v12.8h, #8 - raddhn v20.8b, v10.8h, v17.8h - raddhn v23.8b, v11.8h, v18.8h - raddhn v22.8b, v12.8h, v19.8h -.endm - -.macro pixman_composite_over_8888_0565_process_pixblock_tail - /* ... continue alpha blending */ - uqadd v17.8b, v2.8b, v20.8b - uqadd v18.8b, v0.8b, v22.8b - uqadd v19.8b, v1.8b, v23.8b - /* convert the result to r5g6b5 and store it into {v14} */ - ushll v14.8h, v17.8b, #7 - sli v14.8h, v14.8h, #1 - ushll v8.8h, v19.8b, #7 - sli v8.8h, v8.8h, #1 - ushll v9.8h, v18.8b, #7 - sli v9.8h, v9.8h, #1 - sri v14.8h, v8.8h, #5 - sri v14.8h, v9.8h, #11 - mov v28.d[0], v14.d[0] - mov v29.d[0], v14.d[1] -.endm - -/* - * OK, now we got almost everything that we need. Using the above two - * macros, the work can be done right. But now we want to optimize - * it a bit. ARM Cortex-A8 is an in-order core, and benefits really - * a lot from good code scheduling and software pipelining. - * - * Let's construct some code, which will run in the core main loop. - * Some pseudo-code of the main loop will look like this: - * head - * while (...) { - * tail - * head - * } - * tail - * - * It may look a bit weird, but this setup allows to hide instruction - * latencies better and also utilize dual-issue capability more - * efficiently (make pairs of load-store and ALU instructions). - * - * So what we need now is a '*_tail_head' macro, which will be used - * in the core main loop. A trivial straightforward implementation - * of this macro would look like this: - * - * pixman_composite_over_8888_0565_process_pixblock_tail - * st1 {v28.4h, v29.4h}, [DST_W], #32 - * ld1 {v4.4h, v5.4h}, [DST_R], #16 - * ld4 {v0.2s, v1.2s, v2.2s, v3.2s}, [SRC], #32 - * pixman_composite_over_8888_0565_process_pixblock_head - * cache_preload 8, 8 - * - * Now it also got some VLD/VST instructions. We simply can't move from - * processing one block of pixels to the other one with just arithmetics. - * The previously processed data needs to be written to memory and new - * data needs to be fetched. Fortunately, this main loop does not deal - * with partial leading/trailing pixels and can load/store a full block - * of pixels in a bulk. Additionally, destination buffer is already - * 16 bytes aligned here (which is good for performance). - * - * New things here are DST_R, DST_W, SRC and MASK identifiers. These - * are the aliases for ARM registers which are used as pointers for - * accessing data. We maintain separate pointers for reading and writing - * destination buffer (DST_R and DST_W). - * - * Another new thing is 'cache_preload' macro. It is used for prefetching - * data into CPU L2 cache and improve performance when dealing with large - * images which are far larger than cache size. It uses one argument - * (actually two, but they need to be the same here) - number of pixels - * in a block. Looking into 'pixman-arm-neon-asm.h' can provide some - * details about this macro. Moreover, if good performance is needed - * the code from this macro needs to be copied into '*_tail_head' macro - * and mixed with the rest of code for optimal instructions scheduling. - * We are actually doing it below. - * - * Now after all the explanations, here is the optimized code. - * Different instruction streams (originaling from '*_head', '*_tail' - * and 'cache_preload' macro) use different indentation levels for - * better readability. Actually taking the code from one of these - * indentation levels and ignoring a few LD/ST instructions would - * result in exactly the code from '*_head', '*_tail' or 'cache_preload' - * macro! - */ - -#if 1 - -.macro pixman_composite_over_8888_0565_process_pixblock_tail_head - uqadd v17.8b, v2.8b, v20.8b - ld1 {v4.4h, v5.4h}, [DST_R], #16 - mov v4.d[1], v5.d[0] - uqadd v18.8b, v0.8b, v22.8b - uqadd v19.8b, v1.8b, v23.8b - shrn v6.8b, v4.8h, #8 - fetch_src_pixblock - shrn v7.8b, v4.8h, #3 - sli v4.8h, v4.8h, #5 - ushll v14.8h, v17.8b, #7 - sli v14.8h, v14.8h, #1 - PF add PF_X, PF_X, #8 - ushll v8.8h, v19.8b, #7 - sli v8.8h, v8.8h, #1 - PF tst PF_CTL, #0xF - sri v6.8b, v6.8b, #5 - PF beq 10f - PF add PF_X, PF_X, #8 -10: - mvn v3.8b, v3.8b - PF beq 10f - PF sub PF_CTL, PF_CTL, #1 -10: - sri v7.8b, v7.8b, #6 - shrn v30.8b, v4.8h, #2 - umull v10.8h, v3.8b, v6.8b - PF lsl DUMMY, PF_X, #src_bpp_shift - PF prfm PREFETCH_MODE, [PF_SRC, DUMMY] - umull v11.8h, v3.8b, v7.8b - umull v12.8h, v3.8b, v30.8b - PF lsl DUMMY, PF_X, #dst_bpp_shift - PF prfm PREFETCH_MODE, [PF_DST, DUMMY] - sri v14.8h, v8.8h, #5 - PF cmp PF_X, ORIG_W - ushll v9.8h, v18.8b, #7 - sli v9.8h, v9.8h, #1 - urshr v17.8h, v10.8h, #8 - PF ble 10f - PF sub PF_X, PF_X, ORIG_W -10: - urshr v19.8h, v11.8h, #8 - urshr v18.8h, v12.8h, #8 - PF ble 10f - PF subs PF_CTL, PF_CTL, #0x10 -10: - sri v14.8h, v9.8h, #11 - mov v28.d[0], v14.d[0] - mov v29.d[0], v14.d[1] - PF ble 10f - PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb DUMMY, [PF_SRC, DUMMY] - PF add PF_SRC, PF_SRC, #1 -10: - raddhn v20.8b, v10.8h, v17.8h - raddhn v23.8b, v11.8h, v19.8h - PF ble 10f - PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb DUMMY, [PF_DST, DUMMY] - PF add PF_DST, PF_SRC, #1 -10: - raddhn v22.8b, v12.8h, v18.8h - st1 {v14.8h}, [DST_W], #16 -.endm - -#else - -/* If we did not care much about the performance, we would just use this... */ -.macro pixman_composite_over_8888_0565_process_pixblock_tail_head - pixman_composite_over_8888_0565_process_pixblock_tail - st1 {v14.8h}, [DST_W], #16 - ld1 {v4.4h, v4.5h}, [DST_R], #16 - fetch_src_pixblock - pixman_composite_over_8888_0565_process_pixblock_head - cache_preload 8, 8 -.endm - -#endif - -/* - * And now the final part. We are using 'generate_composite_function' macro - * to put all the stuff together. We are specifying the name of the function - * which we want to get, number of bits per pixel for the source, mask and - * destination (0 if unused, like mask in this case). Next come some bit - * flags: - * FLAG_DST_READWRITE - tells that the destination buffer is both read - * and written, for write-only buffer we would use - * FLAG_DST_WRITEONLY flag instead - * FLAG_DEINTERLEAVE_32BPP - tells that we prefer to work with planar data - * and separate color channels for 32bpp format. - * The next things are: - * - the number of pixels processed per iteration (8 in this case, because - * that's the maximum what can fit into four 64-bit NEON registers). - * - prefetch distance, measured in pixel blocks. In this case it is 5 times - * by 8 pixels. That would be 40 pixels, or up to 160 bytes. Optimal - * prefetch distance can be selected by running some benchmarks. - * - * After that we specify some macros, these are 'default_init', - * 'default_cleanup' here which are empty (but it is possible to have custom - * init/cleanup macros to be able to save/restore some extra NEON registers - * like d8-d15 or do anything else) followed by - * 'pixman_composite_over_8888_0565_process_pixblock_head', - * 'pixman_composite_over_8888_0565_process_pixblock_tail' and - * 'pixman_composite_over_8888_0565_process_pixblock_tail_head' - * which we got implemented above. - * - * The last part is the NEON registers allocation scheme. - */ -generate_composite_function \ - pixman_composite_over_8888_0565_asm_neon, 32, 0, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_0565_process_pixblock_head, \ - pixman_composite_over_8888_0565_process_pixblock_tail, \ - pixman_composite_over_8888_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_n_0565_process_pixblock_head - /* convert 8 r5g6b5 pixel data from {v4} to planar 8-bit format - and put data into v6 - red, v7 - green, v30 - blue */ - mov v4.d[1], v5.d[0] - shrn v6.8b, v4.8h, #8 - shrn v7.8b, v4.8h, #3 - sli v4.8h, v4.8h, #5 - sri v6.8b, v6.8b, #5 - sri v7.8b, v7.8b, #6 - shrn v30.8b, v4.8h, #2 - /* now do alpha blending, storing results in 8-bit planar format - into v20 - red, v23 - green, v22 - blue */ - umull v10.8h, v3.8b, v6.8b - umull v11.8h, v3.8b, v7.8b - umull v12.8h, v3.8b, v30.8b - urshr v13.8h, v10.8h, #8 - urshr v14.8h, v11.8h, #8 - urshr v15.8h, v12.8h, #8 - raddhn v20.8b, v10.8h, v13.8h - raddhn v23.8b, v11.8h, v14.8h - raddhn v22.8b, v12.8h, v15.8h -.endm - -.macro pixman_composite_over_n_0565_process_pixblock_tail - /* ... continue alpha blending */ - uqadd v17.8b, v2.8b, v20.8b - uqadd v18.8b, v0.8b, v22.8b - uqadd v19.8b, v1.8b, v23.8b - /* convert the result to r5g6b5 and store it into {v14} */ - ushll v14.8h, v17.8b, #7 - sli v14.8h, v14.8h, #1 - ushll v8.8h, v19.8b, #7 - sli v8.8h, v8.8h, #1 - ushll v9.8h, v18.8b, #7 - sli v9.8h, v9.8h, #1 - sri v14.8h, v8.8h, #5 - sri v14.8h, v9.8h, #11 - mov v28.d[0], v14.d[0] - mov v29.d[0], v14.d[1] -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_n_0565_process_pixblock_tail_head - pixman_composite_over_n_0565_process_pixblock_tail - ld1 {v4.4h, v5.4h}, [DST_R], #16 - st1 {v14.8h}, [DST_W], #16 - pixman_composite_over_n_0565_process_pixblock_head - cache_preload 8, 8 -.endm - -.macro pixman_composite_over_n_0565_init - mov v3.s[0], w4 - dup v0.8b, v3.b[0] - dup v1.8b, v3.b[1] - dup v2.8b, v3.b[2] - dup v3.8b, v3.b[3] - mvn v3.8b, v3.8b /* invert source alpha */ -.endm - -generate_composite_function \ - pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_0565_init, \ - default_cleanup, \ - pixman_composite_over_n_0565_process_pixblock_head, \ - pixman_composite_over_n_0565_process_pixblock_tail, \ - pixman_composite_over_n_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_8888_0565_process_pixblock_head - ushll v8.8h, v1.8b, #7 - sli v8.8h, v8.8h, #1 - ushll v14.8h, v2.8b, #7 - sli v14.8h, v14.8h, #1 - ushll v9.8h, v0.8b, #7 - sli v9.8h, v9.8h, #1 -.endm - -.macro pixman_composite_src_8888_0565_process_pixblock_tail - sri v14.8h, v8.8h, #5 - sri v14.8h, v9.8h, #11 - mov v28.d[0], v14.d[0] - mov v29.d[0], v14.d[1] -.endm - -.macro pixman_composite_src_8888_0565_process_pixblock_tail_head - sri v14.8h, v8.8h, #5 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - fetch_src_pixblock - PF beq 10f - PF add PF_X, PF_X, #8 - PF sub PF_CTL, PF_CTL, #1 -10: - sri v14.8h, v9.8h, #11 - mov v28.d[0], v14.d[0] - mov v29.d[0], v14.d[1] - PF cmp PF_X, ORIG_W - PF lsl DUMMY, PF_X, #src_bpp_shift - PF prfm PREFETCH_MODE, [PF_SRC, DUMMY] - ushll v8.8h, v1.8b, #7 - sli v8.8h, v8.8h, #1 - st1 {v14.8h}, [DST_W], #16 - PF ble 10f - PF sub PF_X, PF_X, ORIG_W - PF subs PF_CTL, PF_CTL, #0x10 -10: - ushll v14.8h, v2.8b, #7 - sli v14.8h, v14.8h, #1 - PF ble 10f - PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb DUMMY, [PF_SRC, DUMMY] - PF add PF_SRC, PF_SRC, #1 -10: - ushll v9.8h, v0.8b, #7 - sli v9.8h, v9.8h, #1 -.endm - -generate_composite_function \ - pixman_composite_src_8888_0565_asm_neon, 32, 0, 16, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_8888_0565_process_pixblock_head, \ - pixman_composite_src_8888_0565_process_pixblock_tail, \ - pixman_composite_src_8888_0565_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_src_0565_8888_process_pixblock_head - mov v0.d[1], v1.d[0] - shrn v30.8b, v0.8h, #8 - shrn v29.8b, v0.8h, #3 - sli v0.8h, v0.8h, #5 - movi v31.8b, #255 - sri v30.8b, v30.8b, #5 - sri v29.8b, v29.8b, #6 - shrn v28.8b, v0.8h, #2 -.endm - -.macro pixman_composite_src_0565_8888_process_pixblock_tail -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_src_0565_8888_process_pixblock_tail_head - pixman_composite_src_0565_8888_process_pixblock_tail - st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 - fetch_src_pixblock - pixman_composite_src_0565_8888_process_pixblock_head - cache_preload 8, 8 -.endm - -generate_composite_function \ - pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0565_8888_process_pixblock_head, \ - pixman_composite_src_0565_8888_process_pixblock_tail, \ - pixman_composite_src_0565_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_add_8_8_process_pixblock_head - uqadd v28.8b, v0.8b, v4.8b - uqadd v29.8b, v1.8b, v5.8b - uqadd v30.8b, v2.8b, v6.8b - uqadd v31.8b, v3.8b, v7.8b -.endm - -.macro pixman_composite_add_8_8_process_pixblock_tail -.endm - -.macro pixman_composite_add_8_8_process_pixblock_tail_head - fetch_src_pixblock - PF add PF_X, PF_X, #32 - PF tst PF_CTL, #0xF - ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32 - PF beq 10f - PF add PF_X, PF_X, #32 - PF sub PF_CTL, PF_CTL, #1 -10: - st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 - PF cmp PF_X, ORIG_W - PF lsl DUMMY, PF_X, #src_bpp_shift - PF prfm PREFETCH_MODE, [PF_SRC, DUMMY] - PF lsl DUMMY, PF_X, #dst_bpp_shift - PF prfm PREFETCH_MODE, [PF_DST, DUMMY] - PF ble 10f - PF sub PF_X, PF_X, ORIG_W - PF subs PF_CTL, PF_CTL, #0x10 -10: - uqadd v28.8b, v0.8b, v4.8b - PF ble 10f - PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb DUMMY, [PF_SRC, DUMMY] - PF add PF_SRC, PF_SRC, #1 - PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb DUMMY, [PF_DST, DUMMY] - PF add PF_DST, PF_DST, #1 -10: - uqadd v29.8b, v1.8b, v5.8b - uqadd v30.8b, v2.8b, v6.8b - uqadd v31.8b, v3.8b, v7.8b -.endm - -generate_composite_function \ - pixman_composite_add_8_8_asm_neon, 8, 0, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8_8_process_pixblock_head, \ - pixman_composite_add_8_8_process_pixblock_tail, \ - pixman_composite_add_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_add_8888_8888_process_pixblock_tail_head - fetch_src_pixblock - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32 - PF beq 10f - PF add PF_X, PF_X, #8 - PF sub PF_CTL, PF_CTL, #1 -10: - st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 - PF cmp PF_X, ORIG_W - PF lsl DUMMY, PF_X, #src_bpp_shift - PF prfm PREFETCH_MODE, [PF_SRC, DUMMY] - PF lsl DUMMY, PF_X, #dst_bpp_shift - PF prfm PREFETCH_MODE, [PF_DST, DUMMY] - PF ble 10f - PF sub PF_X, PF_X, ORIG_W - PF subs PF_CTL, PF_CTL, #0x10 -10: - uqadd v28.8b, v0.8b, v4.8b - PF ble 10f - PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb DUMMY, [PF_SRC, DUMMY] - PF add PF_SRC, PF_SRC, #1 - PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb DUMMY, [PF_DST, DUMMY] - PF add PF_DST, PF_DST, #1 -10: - uqadd v29.8b, v1.8b, v5.8b - uqadd v30.8b, v2.8b, v6.8b - uqadd v31.8b, v3.8b, v7.8b -.endm - -generate_composite_function \ - pixman_composite_add_8888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8_8_process_pixblock_head, \ - pixman_composite_add_8_8_process_pixblock_tail, \ - pixman_composite_add_8888_8888_process_pixblock_tail_head - -generate_composite_function_single_scanline \ - pixman_composite_scanline_add_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8_8_process_pixblock_head, \ - pixman_composite_add_8_8_process_pixblock_tail, \ - pixman_composite_add_8888_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_out_reverse_8888_8888_process_pixblock_head - mvn v24.8b, v3.8b /* get inverted alpha */ - /* do alpha blending */ - umull v8.8h, v24.8b, v4.8b - umull v9.8h, v24.8b, v5.8b - umull v10.8h, v24.8b, v6.8b - umull v11.8h, v24.8b, v7.8b -.endm - -.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail - urshr v14.8h, v8.8h, #8 - urshr v15.8h, v9.8h, #8 - urshr v16.8h, v10.8h, #8 - urshr v17.8h, v11.8h, #8 - raddhn v28.8b, v14.8h, v8.8h - raddhn v29.8b, v15.8h, v9.8h - raddhn v30.8b, v16.8h, v10.8h - raddhn v31.8b, v17.8h, v11.8h -.endm - -.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head - ld4 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32 - urshr v14.8h, v8.8h, #8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - urshr v15.8h, v9.8h, #8 - urshr v16.8h, v10.8h, #8 - urshr v17.8h, v11.8h, #8 - PF beq 10f - PF add PF_X, PF_X, #8 - PF sub PF_CTL, PF_CTL, #1 -10: - raddhn v28.8b, v14.8h, v8.8h - raddhn v29.8b, v15.8h, v9.8h - PF cmp PF_X, ORIG_W - raddhn v30.8b, v16.8h, v10.8h - raddhn v31.8b, v17.8h, v11.8h - fetch_src_pixblock - PF lsl DUMMY, PF_X, #src_bpp_shift - PF prfm PREFETCH_MODE, [PF_SRC, DUMMY] - mvn v22.8b, v3.8b - PF lsl DUMMY, PF_X, #dst_bpp_shift - PF prfm PREFETCH_MODE, [PF_DST, DUMMY] - st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 - PF ble 10f - PF sub PF_X, PF_X, ORIG_W -10: - umull v8.8h, v22.8b, v4.8b - PF ble 10f - PF subs PF_CTL, PF_CTL, #0x10 -10: - umull v9.8h, v22.8b, v5.8b - PF ble 10f - PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb DUMMY, [PF_SRC, DUMMY] - PF add PF_SRC, PF_SRC, #1 -10: - umull v10.8h, v22.8b, v6.8b - PF ble 10f - PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb DUMMY, [PF_DST, DUMMY] - PF add PF_DST, PF_DST, #1 -10: - umull v11.8h, v22.8b, v7.8b -.endm - -generate_composite_function_single_scanline \ - pixman_composite_scanline_out_reverse_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_out_reverse_8888_8888_process_pixblock_head, \ - pixman_composite_out_reverse_8888_8888_process_pixblock_tail, \ - pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_8888_8888_process_pixblock_head - pixman_composite_out_reverse_8888_8888_process_pixblock_head -.endm - -.macro pixman_composite_over_8888_8888_process_pixblock_tail - pixman_composite_out_reverse_8888_8888_process_pixblock_tail - uqadd v28.8b, v0.8b, v28.8b - uqadd v29.8b, v1.8b, v29.8b - uqadd v30.8b, v2.8b, v30.8b - uqadd v31.8b, v3.8b, v31.8b -.endm - -.macro pixman_composite_over_8888_8888_process_pixblock_tail_head - ld4 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32 - urshr v14.8h, v8.8h, #8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - urshr v15.8h, v9.8h, #8 - urshr v16.8h, v10.8h, #8 - urshr v17.8h, v11.8h, #8 - PF beq 10f - PF add PF_X, PF_X, #8 - PF sub PF_CTL, PF_CTL, #1 -10: - raddhn v28.8b, v14.8h, v8.8h - raddhn v29.8b, v15.8h, v9.8h - PF cmp PF_X, ORIG_W - raddhn v30.8b, v16.8h, v10.8h - raddhn v31.8b, v17.8h, v11.8h - uqadd v28.8b, v0.8b, v28.8b - uqadd v29.8b, v1.8b, v29.8b - uqadd v30.8b, v2.8b, v30.8b - uqadd v31.8b, v3.8b, v31.8b - fetch_src_pixblock - PF lsl DUMMY, PF_X, #src_bpp_shift - PF prfm PREFETCH_MODE, [PF_SRC, DUMMY] - mvn v22.8b, v3.8b - PF lsl DUMMY, PF_X, #dst_bpp_shift - PF prfm PREFETCH_MODE, [PF_DST, DUMMY] - st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 - PF ble 10f - PF sub PF_X, PF_X, ORIG_W -10: - umull v8.8h, v22.8b, v4.8b - PF ble 10f - PF subs PF_CTL, PF_CTL, #0x10 -10: - umull v9.8h, v22.8b, v5.8b - PF ble 10f - PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb DUMMY, [PF_SRC, DUMMY] - PF add PF_SRC, PF_SRC, #1 -10: - umull v10.8h, v22.8b, v6.8b - PF ble 10f - PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb DUMMY, [PF_DST, DUMMY] - PF add PF_DST, PF_DST, #1 -10: - umull v11.8h, v22.8b, v7.8b -.endm - -generate_composite_function \ - pixman_composite_over_8888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_process_pixblock_tail_head - -generate_composite_function_single_scanline \ - pixman_composite_scanline_over_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_n_8888_process_pixblock_head - /* deinterleaved source pixels in {v0, v1, v2, v3} */ - /* inverted alpha in {v24} */ - /* destination pixels in {v4, v5, v6, v7} */ - umull v8.8h, v24.8b, v4.8b - umull v9.8h, v24.8b, v5.8b - umull v10.8h, v24.8b, v6.8b - umull v11.8h, v24.8b, v7.8b -.endm - -.macro pixman_composite_over_n_8888_process_pixblock_tail - urshr v14.8h, v8.8h, #8 - urshr v15.8h, v9.8h, #8 - urshr v16.8h, v10.8h, #8 - urshr v17.8h, v11.8h, #8 - raddhn v28.8b, v14.8h, v8.8h - raddhn v29.8b, v15.8h, v9.8h - raddhn v30.8b, v16.8h, v10.8h - raddhn v31.8b, v17.8h, v11.8h - uqadd v28.8b, v0.8b, v28.8b - uqadd v29.8b, v1.8b, v29.8b - uqadd v30.8b, v2.8b, v30.8b - uqadd v31.8b, v3.8b, v31.8b -.endm - -.macro pixman_composite_over_n_8888_process_pixblock_tail_head - urshr v14.8h, v8.8h, #8 - urshr v15.8h, v9.8h, #8 - urshr v16.8h, v10.8h, #8 - urshr v17.8h, v11.8h, #8 - raddhn v28.8b, v14.8h, v8.8h - raddhn v29.8b, v15.8h, v9.8h - raddhn v30.8b, v16.8h, v10.8h - raddhn v31.8b, v17.8h, v11.8h - ld4 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32 - uqadd v28.8b, v0.8b, v28.8b - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0x0F - PF beq 10f - PF add PF_X, PF_X, #8 - PF sub PF_CTL, PF_CTL, #1 -10: - uqadd v29.8b, v1.8b, v29.8b - uqadd v30.8b, v2.8b, v30.8b - uqadd v31.8b, v3.8b, v31.8b - PF cmp PF_X, ORIG_W - umull v8.8h, v24.8b, v4.8b - PF lsl DUMMY, PF_X, #dst_bpp_shift - PF prfm PREFETCH_MODE, [PF_DST, DUMMY] - umull v9.8h, v24.8b, v5.8b - PF ble 10f - PF sub PF_X, PF_X, ORIG_W -10: - umull v10.8h, v24.8b, v6.8b - PF subs PF_CTL, PF_CTL, #0x10 - umull v11.8h, v24.8b, v7.8b - PF ble 10f - PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb DUMMY, [PF_DST, DUMMY] - PF add PF_DST, PF_DST, #1 -10: - st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 -.endm - -.macro pixman_composite_over_n_8888_init - mov v3.s[0], w4 - dup v0.8b, v3.b[0] - dup v1.8b, v3.b[1] - dup v2.8b, v3.b[2] - dup v3.8b, v3.b[3] - mvn v24.8b, v3.8b /* get inverted alpha */ -.endm - -generate_composite_function \ - pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8888_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_n_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head - urshr v14.8h, v8.8h, #8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - urshr v15.8h, v9.8h, #8 - urshr v12.8h, v10.8h, #8 - urshr v13.8h, v11.8h, #8 - PF beq 10f - PF add PF_X, PF_X, #8 - PF sub PF_CTL, PF_CTL, #1 -10: - raddhn v28.8b, v14.8h, v8.8h - raddhn v29.8b, v15.8h, v9.8h - PF cmp PF_X, ORIG_W - raddhn v30.8b, v12.8h, v10.8h - raddhn v31.8b, v13.8h, v11.8h - uqadd v28.8b, v0.8b, v28.8b - uqadd v29.8b, v1.8b, v29.8b - uqadd v30.8b, v2.8b, v30.8b - uqadd v31.8b, v3.8b, v31.8b - ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [DST_R], #32 - mvn v22.8b, v3.8b - PF lsl DUMMY, PF_X, #dst_bpp_shift - PF prfm PREFETCH_MODE, [PF_DST, DUMMY] - st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 - PF blt 10f - PF sub PF_X, PF_X, ORIG_W -10: - umull v8.8h, v22.8b, v4.8b - PF blt 10f - PF subs PF_CTL, PF_CTL, #0x10 -10: - umull v9.8h, v22.8b, v5.8b - umull v10.8h, v22.8b, v6.8b - PF blt 10f - PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb DUMMY, [PF_DST, DUMMY] - PF add PF_DST, PF_DST, #1 -10: - umull v11.8h, v22.8b, v7.8b -.endm - -.macro pixman_composite_over_reverse_n_8888_init - mov v7.s[0], w4 - dup v4.8b, v7.b[0] - dup v5.8b, v7.b[1] - dup v6.8b, v7.b[2] - dup v7.8b, v7.b[3] -.endm - -generate_composite_function \ - pixman_composite_over_reverse_n_8888_asm_neon, 0, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_reverse_n_8888_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 4, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_8888_8_0565_process_pixblock_head - umull v0.8h, v24.8b, v8.8b /* IN for SRC pixels (part1) */ - umull v1.8h, v24.8b, v9.8b - umull v2.8h, v24.8b, v10.8b - umull v3.8h, v24.8b, v11.8b - mov v4.d[1], v5.d[0] - shrn v25.8b, v4.8h, #8 /* convert DST_R data to 32-bpp (part1) */ - shrn v26.8b, v4.8h, #3 - sli v4.8h, v4.8h, #5 - urshr v17.8h, v0.8h, #8 /* IN for SRC pixels (part2) */ - urshr v18.8h, v1.8h, #8 - urshr v19.8h, v2.8h, #8 - urshr v20.8h, v3.8h, #8 - raddhn v0.8b, v0.8h, v17.8h - raddhn v1.8b, v1.8h, v18.8h - raddhn v2.8b, v2.8h, v19.8h - raddhn v3.8b, v3.8h, v20.8h - sri v25.8b, v25.8b, #5 /* convert DST_R data to 32-bpp (part2) */ - sri v26.8b, v26.8b, #6 - mvn v3.8b, v3.8b - shrn v30.8b, v4.8h, #2 - umull v18.8h, v3.8b, v25.8b /* now do alpha blending */ - umull v19.8h, v3.8b, v26.8b - umull v20.8h, v3.8b, v30.8b -.endm - -.macro pixman_composite_over_8888_8_0565_process_pixblock_tail - /* 3 cycle bubble (after vmull.u8) */ - urshr v5.8h, v18.8h, #8 - urshr v6.8h, v19.8h, #8 - urshr v7.8h, v20.8h, #8 - raddhn v17.8b, v18.8h, v5.8h - raddhn v19.8b, v19.8h, v6.8h - raddhn v18.8b, v20.8h, v7.8h - uqadd v5.8b, v2.8b, v17.8b - /* 1 cycle bubble */ - uqadd v6.8b, v0.8b, v18.8b - uqadd v7.8b, v1.8b, v19.8b - ushll v14.8h, v5.8b, #7 /* convert to 16bpp */ - sli v14.8h, v14.8h, #1 - ushll v18.8h, v7.8b, #7 - sli v18.8h, v18.8h, #1 - ushll v19.8h, v6.8b, #7 - sli v19.8h, v19.8h, #1 - sri v14.8h, v18.8h, #5 - /* 1 cycle bubble */ - sri v14.8h, v19.8h, #11 - mov v28.d[0], v14.d[0] - mov v29.d[0], v14.d[1] -.endm - -.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head -#if 0 - ld1 {v4.8h}, [DST_R], #16 - shrn v25.8b, v4.8h, #8 - fetch_mask_pixblock - shrn v26.8b, v4.8h, #3 - fetch_src_pixblock - umull v22.8h, v24.8b, v10.8b - urshr v13.8h, v18.8h, #8 - urshr v11.8h, v19.8h, #8 - urshr v15.8h, v20.8h, #8 - raddhn v17.8b, v18.8h, v13.8h - raddhn v19.8b, v19.8h, v11.8h - raddhn v18.8b, v20.8h, v15.8h - uqadd v17.8b, v2.8b, v17.8b - umull v21.8h, v24.8b, v9.8b - uqadd v18.8b, v0.8b, v18.8b - uqadd v19.8b, v1.8b, v19.8b - ushll v14.8h, v17.8b, #7 - sli v14.8h, v14.8h, #1 - umull v20.8h, v24.8b, v8.8b - ushll v18.8h, v18.8b, #7 - sli v18.8h, v18.8h, #1 - ushll v19.8h, v19.8b, #7 - sli v19.8h, v19.8h, #1 - sri v14.8h, v18.8h, #5 - umull v23.8h, v24.8b, v11.8b - sri v14.8h, v19.8h, #11 - mov v28.d[0], v14.d[0] - mov v29.d[0], v14.d[1] - - cache_preload 8, 8 - - sli v4.8h, v4.8h, #5 - urshr v16.8h, v20.8h, #8 - urshr v17.8h, v21.8h, #8 - urshr v18.8h, v22.8h, #8 - urshr v19.8h, v23.8h, #8 - raddhn v0.8b, v20.8h, v16.8h - raddhn v1.8b, v21.8h, v17.8h - raddhn v2.8b, v22.8h, v18.8h - raddhn v3.8b, v23.8h, v19.8h - sri v25.8b, v25.8b, #5 - sri v26.8b, v26.8b, #6 - mvn v3.8b, v3.8b - shrn v30.8b, v4.8h, #2 - st1 {v14.8h}, [DST_W], #16 - umull v18.8h, v3.8b, v25.8b - umull v19.8h, v3.8b, v26.8b - umull v20.8h, v3.8b, v30.8b -#else - pixman_composite_over_8888_8_0565_process_pixblock_tail - st1 {v28.4h, v29.4h}, [DST_W], #16 - ld1 {v4.4h, v5.4h}, [DST_R], #16 - fetch_mask_pixblock - fetch_src_pixblock - pixman_composite_over_8888_8_0565_process_pixblock_head -#endif -.endm - -generate_composite_function \ - pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_8_0565_process_pixblock_head, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -/* - * This function needs a special initialization of solid mask. - * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET - * offset, split into color components and replicated in d8-d11 - * registers. Additionally, this function needs all the NEON registers, - * so it has to save d8-d15 registers which are callee saved according - * to ABI. These registers are restored from 'cleanup' macro. All the - * other NEON registers are caller saved, so can be clobbered freely - * without introducing any problems. - */ -.macro pixman_composite_over_n_8_0565_init - mov v11.s[0], w4 - dup v8.8b, v11.b[0] - dup v9.8b, v11.b[1] - dup v10.8b, v11.b[2] - dup v11.8b, v11.b[3] -.endm - -.macro pixman_composite_over_n_8_0565_cleanup -.endm - -generate_composite_function \ - pixman_composite_over_n_8_0565_asm_neon, 0, 8, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8_0565_init, \ - pixman_composite_over_n_8_0565_cleanup, \ - pixman_composite_over_8888_8_0565_process_pixblock_head, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_8888_n_0565_init - mov v24.s[0], w6 - dup v24.8b, v24.b[3] -.endm - -.macro pixman_composite_over_8888_n_0565_cleanup -.endm - -generate_composite_function \ - pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_8888_n_0565_init, \ - pixman_composite_over_8888_n_0565_cleanup, \ - pixman_composite_over_8888_8_0565_process_pixblock_head, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_0565_0565_process_pixblock_head -.endm - -.macro pixman_composite_src_0565_0565_process_pixblock_tail -.endm - -.macro pixman_composite_src_0565_0565_process_pixblock_tail_head - st1 {v0.4h, v1.4h, v2.4h, v3.4h}, [DST_W], #32 - fetch_src_pixblock - cache_preload 16, 16 -.endm - -generate_composite_function \ - pixman_composite_src_0565_0565_asm_neon, 16, 0, 16, \ - FLAG_DST_WRITEONLY, \ - 16, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0565_0565_process_pixblock_head, \ - pixman_composite_src_0565_0565_process_pixblock_tail, \ - pixman_composite_src_0565_0565_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_n_8_process_pixblock_head -.endm - -.macro pixman_composite_src_n_8_process_pixblock_tail -.endm - -.macro pixman_composite_src_n_8_process_pixblock_tail_head - st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [DST_W], 32 -.endm - -.macro pixman_composite_src_n_8_init - mov v0.s[0], w4 - dup v3.8b, v0.b[0] - dup v2.8b, v0.b[0] - dup v1.8b, v0.b[0] - dup v0.8b, v0.b[0] -.endm - -.macro pixman_composite_src_n_8_cleanup -.endm - -generate_composite_function \ - pixman_composite_src_n_8_asm_neon, 0, 0, 8, \ - FLAG_DST_WRITEONLY, \ - 32, /* number of pixels, processed in a single block */ \ - 0, /* prefetch distance */ \ - pixman_composite_src_n_8_init, \ - pixman_composite_src_n_8_cleanup, \ - pixman_composite_src_n_8_process_pixblock_head, \ - pixman_composite_src_n_8_process_pixblock_tail, \ - pixman_composite_src_n_8_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_n_0565_process_pixblock_head -.endm - -.macro pixman_composite_src_n_0565_process_pixblock_tail -.endm - -.macro pixman_composite_src_n_0565_process_pixblock_tail_head - st1 {v0.4h, v1.4h, v2.4h, v3.4h}, [DST_W], #32 -.endm - -.macro pixman_composite_src_n_0565_init - mov v0.s[0], w4 - dup v3.4h, v0.h[0] - dup v2.4h, v0.h[0] - dup v1.4h, v0.h[0] - dup v0.4h, v0.h[0] -.endm - -.macro pixman_composite_src_n_0565_cleanup -.endm - -generate_composite_function \ - pixman_composite_src_n_0565_asm_neon, 0, 0, 16, \ - FLAG_DST_WRITEONLY, \ - 16, /* number of pixels, processed in a single block */ \ - 0, /* prefetch distance */ \ - pixman_composite_src_n_0565_init, \ - pixman_composite_src_n_0565_cleanup, \ - pixman_composite_src_n_0565_process_pixblock_head, \ - pixman_composite_src_n_0565_process_pixblock_tail, \ - pixman_composite_src_n_0565_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_n_8888_process_pixblock_head -.endm - -.macro pixman_composite_src_n_8888_process_pixblock_tail -.endm - -.macro pixman_composite_src_n_8888_process_pixblock_tail_head - st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [DST_W], #32 -.endm - -.macro pixman_composite_src_n_8888_init - mov v0.s[0], w4 - dup v3.2s, v0.s[0] - dup v2.2s, v0.s[0] - dup v1.2s, v0.s[0] - dup v0.2s, v0.s[0] -.endm - -.macro pixman_composite_src_n_8888_cleanup -.endm - -generate_composite_function \ - pixman_composite_src_n_8888_asm_neon, 0, 0, 32, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 0, /* prefetch distance */ \ - pixman_composite_src_n_8888_init, \ - pixman_composite_src_n_8888_cleanup, \ - pixman_composite_src_n_8888_process_pixblock_head, \ - pixman_composite_src_n_8888_process_pixblock_tail, \ - pixman_composite_src_n_8888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_8888_8888_process_pixblock_head -.endm - -.macro pixman_composite_src_8888_8888_process_pixblock_tail -.endm - -.macro pixman_composite_src_8888_8888_process_pixblock_tail_head - st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [DST_W], #32 - fetch_src_pixblock - cache_preload 8, 8 -.endm - -generate_composite_function \ - pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_8888_8888_process_pixblock_head, \ - pixman_composite_src_8888_8888_process_pixblock_tail, \ - pixman_composite_src_8888_8888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_x888_8888_process_pixblock_head - orr v0.8b, v0.8b, v4.8b - orr v1.8b, v1.8b, v4.8b - orr v2.8b, v2.8b, v4.8b - orr v3.8b, v3.8b, v4.8b -.endm - -.macro pixman_composite_src_x888_8888_process_pixblock_tail -.endm - -.macro pixman_composite_src_x888_8888_process_pixblock_tail_head - st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [DST_W], #32 - fetch_src_pixblock - orr v0.8b, v0.8b, v4.8b - orr v1.8b, v1.8b, v4.8b - orr v2.8b, v2.8b, v4.8b - orr v3.8b, v3.8b, v4.8b - cache_preload 8, 8 -.endm - -.macro pixman_composite_src_x888_8888_init - movi v4.2s, #0xff, lsl 24 -.endm - -generate_composite_function \ - pixman_composite_src_x888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - pixman_composite_src_x888_8888_init, \ - default_cleanup, \ - pixman_composite_src_x888_8888_process_pixblock_head, \ - pixman_composite_src_x888_8888_process_pixblock_tail, \ - pixman_composite_src_x888_8888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_n_8_8888_process_pixblock_head - /* expecting solid source in {v0, v1, v2, v3} */ - /* mask is in v24 (v25, v26, v27 are unused) */ - - /* in */ - umull v8.8h, v24.8b, v0.8b - umull v9.8h, v24.8b, v1.8b - umull v10.8h, v24.8b, v2.8b - umull v11.8h, v24.8b, v3.8b - ursra v8.8h, v8.8h, #8 - ursra v9.8h, v9.8h, #8 - ursra v10.8h, v10.8h, #8 - ursra v11.8h, v11.8h, #8 -.endm - -.macro pixman_composite_src_n_8_8888_process_pixblock_tail - rshrn v28.8b, v8.8h, #8 - rshrn v29.8b, v9.8h, #8 - rshrn v30.8b, v10.8h, #8 - rshrn v31.8b, v11.8h, #8 -.endm - -.macro pixman_composite_src_n_8_8888_process_pixblock_tail_head - fetch_mask_pixblock - PF add PF_X, PF_X, #8 - rshrn v28.8b, v8.8h, #8 - PF tst PF_CTL, #0x0F - rshrn v29.8b, v9.8h, #8 - PF beq 10f - PF add PF_X, PF_X, #8 -10: - rshrn v30.8b, v10.8h, #8 - PF beq 10f - PF sub PF_CTL, PF_CTL, #1 -10: - rshrn v31.8b, v11.8h, #8 - PF cmp PF_X, ORIG_W - umull v8.8h, v24.8b, v0.8b - PF lsl DUMMY, PF_X, #mask_bpp_shift - PF prfm PREFETCH_MODE, [PF_MASK, DUMMY] - umull v9.8h, v24.8b, v1.8b - PF ble 10f - PF sub PF_X, PF_X, ORIG_W -10: - umull v10.8h, v24.8b, v2.8b - PF ble 10f - PF subs PF_CTL, PF_CTL, #0x10 -10: - umull v11.8h, v24.8b, v3.8b - PF ble 10f - PF lsl DUMMY, MASK_STRIDE, #mask_bpp_shift - PF ldrsb DUMMY, [PF_MASK, DUMMY] - PF add PF_MASK, PF_MASK, #1 -10: - st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 - ursra v8.8h, v8.8h, #8 - ursra v9.8h, v9.8h, #8 - ursra v10.8h, v10.8h, #8 - ursra v11.8h, v11.8h, #8 -.endm - -.macro pixman_composite_src_n_8_8888_init - mov v3.s[0], w4 - dup v0.8b, v3.b[0] - dup v1.8b, v3.b[1] - dup v2.8b, v3.b[2] - dup v3.8b, v3.b[3] -.endm - -.macro pixman_composite_src_n_8_8888_cleanup -.endm - -generate_composite_function \ - pixman_composite_src_n_8_8888_asm_neon, 0, 8, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_src_n_8_8888_init, \ - pixman_composite_src_n_8_8888_cleanup, \ - pixman_composite_src_n_8_8888_process_pixblock_head, \ - pixman_composite_src_n_8_8888_process_pixblock_tail, \ - pixman_composite_src_n_8_8888_process_pixblock_tail_head, \ - -/******************************************************************************/ - -.macro pixman_composite_src_n_8_8_process_pixblock_head - umull v0.8h, v24.8b, v16.8b - umull v1.8h, v25.8b, v16.8b - umull v2.8h, v26.8b, v16.8b - umull v3.8h, v27.8b, v16.8b - ursra v0.8h, v0.8h, #8 - ursra v1.8h, v1.8h, #8 - ursra v2.8h, v2.8h, #8 - ursra v3.8h, v3.8h, #8 -.endm - -.macro pixman_composite_src_n_8_8_process_pixblock_tail - rshrn v28.8b, v0.8h, #8 - rshrn v29.8b, v1.8h, #8 - rshrn v30.8b, v2.8h, #8 - rshrn v31.8b, v3.8h, #8 -.endm - -.macro pixman_composite_src_n_8_8_process_pixblock_tail_head - fetch_mask_pixblock - PF add PF_X, PF_X, #8 - rshrn v28.8b, v0.8h, #8 - PF tst PF_CTL, #0x0F - rshrn v29.8b, v1.8h, #8 - PF beq 10f - PF add PF_X, PF_X, #8 -10: - rshrn v30.8b, v2.8h, #8 - PF beq 10f - PF sub PF_CTL, PF_CTL, #1 -10: - rshrn v31.8b, v3.8h, #8 - PF cmp PF_X, ORIG_W - umull v0.8h, v24.8b, v16.8b - PF lsl DUMMY, PF_X, mask_bpp_shift - PF prfm PREFETCH_MODE, [PF_MASK, DUMMY] - umull v1.8h, v25.8b, v16.8b - PF ble 10f - PF sub PF_X, PF_X, ORIG_W -10: - umull v2.8h, v26.8b, v16.8b - PF ble 10f - PF subs PF_CTL, PF_CTL, #0x10 -10: - umull v3.8h, v27.8b, v16.8b - PF ble 10f - PF lsl DUMMY, MASK_STRIDE, #mask_bpp_shift - PF ldrsb DUMMY, [PF_MASK, DUMMY] - PF add PF_MASK, PF_MASK, #1 -10: - st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 - ursra v0.8h, v0.8h, #8 - ursra v1.8h, v1.8h, #8 - ursra v2.8h, v2.8h, #8 - ursra v3.8h, v3.8h, #8 -.endm - -.macro pixman_composite_src_n_8_8_init - mov v16.s[0], w4 - dup v16.8b, v16.b[3] -.endm - -.macro pixman_composite_src_n_8_8_cleanup -.endm - -generate_composite_function \ - pixman_composite_src_n_8_8_asm_neon, 0, 8, 8, \ - FLAG_DST_WRITEONLY, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_src_n_8_8_init, \ - pixman_composite_src_n_8_8_cleanup, \ - pixman_composite_src_n_8_8_process_pixblock_head, \ - pixman_composite_src_n_8_8_process_pixblock_tail, \ - pixman_composite_src_n_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_n_8_8888_process_pixblock_head - /* expecting deinterleaved source data in {v8, v9, v10, v11} */ - /* v8 - blue, v9 - green, v10 - red, v11 - alpha */ - /* and destination data in {v4, v5, v6, v7} */ - /* mask is in v24 (v25, v26, v27 are unused) */ - - /* in */ - umull v12.8h, v24.8b, v8.8b - umull v13.8h, v24.8b, v9.8b - umull v14.8h, v24.8b, v10.8b - umull v15.8h, v24.8b, v11.8b - urshr v16.8h, v12.8h, #8 - urshr v17.8h, v13.8h, #8 - urshr v18.8h, v14.8h, #8 - urshr v19.8h, v15.8h, #8 - raddhn v0.8b, v12.8h, v16.8h - raddhn v1.8b, v13.8h, v17.8h - raddhn v2.8b, v14.8h, v18.8h - raddhn v3.8b, v15.8h, v19.8h - mvn v25.8b, v3.8b /* get inverted alpha */ - /* source: v0 - blue, v1 - green, v2 - red, v3 - alpha */ - /* destination: v4 - blue, v5 - green, v6 - red, v7 - alpha */ - /* now do alpha blending */ - umull v12.8h, v25.8b, v4.8b - umull v13.8h, v25.8b, v5.8b - umull v14.8h, v25.8b, v6.8b - umull v15.8h, v25.8b, v7.8b -.endm - -.macro pixman_composite_over_n_8_8888_process_pixblock_tail - urshr v16.8h, v12.8h, #8 - urshr v17.8h, v13.8h, #8 - urshr v18.8h, v14.8h, #8 - urshr v19.8h, v15.8h, #8 - raddhn v28.8b, v16.8h, v12.8h - raddhn v29.8b, v17.8h, v13.8h - raddhn v30.8b, v18.8h, v14.8h - raddhn v31.8b, v19.8h, v15.8h - uqadd v28.8b, v0.8b, v28.8b - uqadd v29.8b, v1.8b, v29.8b - uqadd v30.8b, v2.8b, v30.8b - uqadd v31.8b, v3.8b, v31.8b -.endm - -.macro pixman_composite_over_n_8_8888_process_pixblock_tail_head - urshr v16.8h, v12.8h, #8 - ld4 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32 - urshr v17.8h, v13.8h, #8 - fetch_mask_pixblock - urshr v18.8h, v14.8h, #8 - PF add PF_X, PF_X, #8 - urshr v19.8h, v15.8h, #8 - PF tst PF_CTL, #0x0F - raddhn v28.8b, v16.8h, v12.8h - PF beq 10f - PF add PF_X, PF_X, #8 -10: - raddhn v29.8b, v17.8h, v13.8h - PF beq 10f - PF sub PF_CTL, PF_CTL, #1 -10: - raddhn v30.8b, v18.8h, v14.8h - PF cmp PF_X, ORIG_W - raddhn v31.8b, v19.8h, v15.8h - PF lsl DUMMY, PF_X, #dst_bpp_shift - PF prfm PREFETCH_MODE, [PF_DST, DUMMY] - umull v16.8h, v24.8b, v8.8b - PF lsl DUMMY, PF_X, #mask_bpp_shift - PF prfm PREFETCH_MODE, [PF_MASK, DUMMY] - umull v17.8h, v24.8b, v9.8b - PF ble 10f - PF sub PF_X, PF_X, ORIG_W -10: - umull v18.8h, v24.8b, v10.8b - PF ble 10f - PF subs PF_CTL, PF_CTL, #0x10 -10: - umull v19.8h, v24.8b, v11.8b - PF ble 10f - PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb DUMMY, [PF_DST, DUMMY] - PF add PF_DST, PF_DST, #1 -10: - uqadd v28.8b, v0.8b, v28.8b - PF ble 10f - PF lsl DUMMY, MASK_STRIDE, #mask_bpp_shift - PF ldrsb DUMMY, [PF_MASK, DUMMY] - PF add PF_MASK, PF_MASK, #1 -10: - uqadd v29.8b, v1.8b, v29.8b - uqadd v30.8b, v2.8b, v30.8b - uqadd v31.8b, v3.8b, v31.8b - urshr v12.8h, v16.8h, #8 - urshr v13.8h, v17.8h, #8 - urshr v14.8h, v18.8h, #8 - urshr v15.8h, v19.8h, #8 - raddhn v0.8b, v16.8h, v12.8h - raddhn v1.8b, v17.8h, v13.8h - raddhn v2.8b, v18.8h, v14.8h - raddhn v3.8b, v19.8h, v15.8h - st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 - mvn v25.8b, v3.8b - umull v12.8h, v25.8b, v4.8b - umull v13.8h, v25.8b, v5.8b - umull v14.8h, v25.8b, v6.8b - umull v15.8h, v25.8b, v7.8b -.endm - -.macro pixman_composite_over_n_8_8888_init - mov v11.s[0], w4 - dup v8.8b, v11.b[0] - dup v9.8b, v11.b[1] - dup v10.8b, v11.b[2] - dup v11.8b, v11.b[3] -.endm - -.macro pixman_composite_over_n_8_8888_cleanup -.endm - -generate_composite_function \ - pixman_composite_over_n_8_8888_asm_neon, 0, 8, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8_8888_init, \ - pixman_composite_over_n_8_8888_cleanup, \ - pixman_composite_over_n_8_8888_process_pixblock_head, \ - pixman_composite_over_n_8_8888_process_pixblock_tail, \ - pixman_composite_over_n_8_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_n_8_8_process_pixblock_head - umull v0.8h, v24.8b, v8.8b - umull v1.8h, v25.8b, v8.8b - umull v2.8h, v26.8b, v8.8b - umull v3.8h, v27.8b, v8.8b - urshr v10.8h, v0.8h, #8 - urshr v11.8h, v1.8h, #8 - urshr v12.8h, v2.8h, #8 - urshr v13.8h, v3.8h, #8 - raddhn v0.8b, v0.8h, v10.8h - raddhn v1.8b, v1.8h, v11.8h - raddhn v2.8b, v2.8h, v12.8h - raddhn v3.8b, v3.8h, v13.8h - mvn v24.8b, v0.8b - mvn v25.8b, v1.8b - mvn v26.8b, v2.8b - mvn v27.8b, v3.8b - umull v10.8h, v24.8b, v4.8b - umull v11.8h, v25.8b, v5.8b - umull v12.8h, v26.8b, v6.8b - umull v13.8h, v27.8b, v7.8b -.endm - -.macro pixman_composite_over_n_8_8_process_pixblock_tail - urshr v14.8h, v10.8h, #8 - urshr v15.8h, v11.8h, #8 - urshr v16.8h, v12.8h, #8 - urshr v17.8h, v13.8h, #8 - raddhn v28.8b, v14.8h, v10.8h - raddhn v29.8b, v15.8h, v11.8h - raddhn v30.8b, v16.8h, v12.8h - raddhn v31.8b, v17.8h, v13.8h - uqadd v28.8b, v0.8b, v28.8b - uqadd v29.8b, v1.8b, v29.8b - uqadd v30.8b, v2.8b, v30.8b - uqadd v31.8b, v3.8b, v31.8b -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_n_8_8_process_pixblock_tail_head - ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32 - pixman_composite_over_n_8_8_process_pixblock_tail - fetch_mask_pixblock - cache_preload 32, 32 - st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 - pixman_composite_over_n_8_8_process_pixblock_head -.endm - -.macro pixman_composite_over_n_8_8_init - mov v8.s[0], w4 - dup v8.8b, v8.b[3] -.endm - -.macro pixman_composite_over_n_8_8_cleanup -.endm - -generate_composite_function \ - pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8_8_init, \ - pixman_composite_over_n_8_8_cleanup, \ - pixman_composite_over_n_8_8_process_pixblock_head, \ - pixman_composite_over_n_8_8_process_pixblock_tail, \ - pixman_composite_over_n_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head - /* - * 'combine_mask_ca' replacement - * - * input: solid src (n) in {v8, v9, v10, v11} - * dest in {v4, v5, v6, v7 } - * mask in {v24, v25, v26, v27} - * output: updated src in {v0, v1, v2, v3 } - * updated mask in {v24, v25, v26, v3 } - */ - umull v0.8h, v24.8b, v8.8b - umull v1.8h, v25.8b, v9.8b - umull v2.8h, v26.8b, v10.8b - umull v3.8h, v27.8b, v11.8b - umull v12.8h, v11.8b, v25.8b - umull v13.8h, v11.8b, v24.8b - umull v14.8h, v11.8b, v26.8b - urshr v15.8h, v0.8h, #8 - urshr v16.8h, v1.8h, #8 - urshr v17.8h, v2.8h, #8 - raddhn v0.8b, v0.8h, v15.8h - raddhn v1.8b, v1.8h, v16.8h - raddhn v2.8b, v2.8h, v17.8h - urshr v15.8h, v13.8h, #8 - urshr v16.8h, v12.8h, #8 - urshr v17.8h, v14.8h, #8 - urshr v18.8h, v3.8h, #8 - raddhn v24.8b, v13.8h, v15.8h - raddhn v25.8b, v12.8h, v16.8h - raddhn v26.8b, v14.8h, v17.8h - raddhn v3.8b, v3.8h, v18.8h - /* - * 'combine_over_ca' replacement - * - * output: updated dest in {v28, v29, v30, v31} - */ - mvn v24.8b, v24.8b - mvn v25.8b, v25.8b - mvn v26.8b, v26.8b - mvn v27.8b, v3.8b - umull v12.8h, v24.8b, v4.8b - umull v13.8h, v25.8b, v5.8b - umull v14.8h, v26.8b, v6.8b - umull v15.8h, v27.8b, v7.8b -.endm - -.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail - /* ... continue 'combine_over_ca' replacement */ - urshr v16.8h, v12.8h, #8 - urshr v17.8h, v13.8h, #8 - urshr v18.8h, v14.8h, #8 - urshr v19.8h, v15.8h, #8 - raddhn v28.8b, v16.8h, v12.8h - raddhn v29.8b, v17.8h, v13.8h - raddhn v30.8b, v18.8h, v14.8h - raddhn v31.8b, v19.8h, v15.8h - uqadd v28.8b, v0.8b, v28.8b - uqadd v29.8b, v1.8b, v29.8b - uqadd v30.8b, v2.8b, v30.8b - uqadd v31.8b, v3.8b, v31.8b -.endm - -.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head - urshr v16.8h, v12.8h, #8 - urshr v17.8h, v13.8h, #8 - ld4 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32 - urshr v18.8h, v14.8h, #8 - urshr v19.8h, v15.8h, #8 - raddhn v28.8b, v16.8h, v12.8h - raddhn v29.8b, v17.8h, v13.8h - raddhn v30.8b, v18.8h, v14.8h - raddhn v31.8b, v19.8h, v15.8h - fetch_mask_pixblock - uqadd v28.8b, v0.8b, v28.8b - uqadd v29.8b, v1.8b, v29.8b - uqadd v30.8b, v2.8b, v30.8b - uqadd v31.8b, v3.8b, v31.8b - cache_preload 8, 8 - pixman_composite_over_n_8888_8888_ca_process_pixblock_head - st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 -.endm - -.macro pixman_composite_over_n_8888_8888_ca_init - mov v13.s[0], w4 - dup v8.8b, v13.b[0] - dup v9.8b, v13.b[1] - dup v10.8b, v13.b[2] - dup v11.8b, v13.b[3] -.endm - -.macro pixman_composite_over_n_8888_8888_ca_cleanup -.endm - -generate_composite_function \ - pixman_composite_over_n_8888_8888_ca_asm_neon, 0, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8888_8888_ca_init, \ - pixman_composite_over_n_8888_8888_ca_cleanup, \ - pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \ - pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \ - pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_head - /* - * 'combine_mask_ca' replacement - * - * input: solid src (n) in {v8, v9, v10, v11} [B, G, R, A] - * mask in {v24, v25, v26} [B, G, R] - * output: updated src in {v0, v1, v2 } [B, G, R] - * updated mask in {v24, v25, v26} [B, G, R] - */ - umull v0.8h, v24.8b, v8.8b - umull v1.8h, v25.8b, v9.8b - umull v2.8h, v26.8b, v10.8b - umull v12.8h, v11.8b, v24.8b - umull v13.8h, v11.8b, v25.8b - umull v14.8h, v11.8b, v26.8b - urshr v15.8h, v0.8h, #8 - urshr v16.8h, v1.8h, #8 - urshr v17.8h, v2.8h, #8 - raddhn v0.8b, v0.8h, v15.8h - raddhn v1.8b, v1.8h, v16.8h - raddhn v2.8b, v2.8h, v17.8h - urshr v19.8h, v12.8h, #8 - urshr v20.8h, v13.8h, #8 - urshr v21.8h, v14.8h, #8 - raddhn v24.8b, v12.8h, v19.8h - raddhn v25.8b, v13.8h, v20.8h - /* - * convert 8 r5g6b5 pixel data from {v4} to planar 8-bit format - * and put data into v16 - blue, v17 - green, v18 - red - */ - mov v4.d[1], v5.d[0] - shrn v17.8b, v4.8h, #3 - shrn v18.8b, v4.8h, #8 - raddhn v26.8b, v14.8h, v21.8h - sli v4.8h, v4.8h, #5 - sri v18.8b, v18.8b, #5 - sri v17.8b, v17.8b, #6 - /* - * 'combine_over_ca' replacement - * - * output: updated dest in v16 - blue, v17 - green, v18 - red - */ - mvn v24.8b, v24.8b - mvn v25.8b, v25.8b - shrn v16.8b, v4.8h, #2 - mvn v26.8b, v26.8b - umull v5.8h, v16.8b, v24.8b - umull v6.8h, v17.8b, v25.8b - umull v7.8h, v18.8b, v26.8b -.endm - -.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail - /* ... continue 'combine_over_ca' replacement */ - urshr v13.8h, v5.8h, #8 - urshr v14.8h, v6.8h, #8 - urshr v15.8h, v7.8h, #8 - raddhn v16.8b, v13.8h, v5.8h - raddhn v17.8b, v14.8h, v6.8h - raddhn v18.8b, v15.8h, v7.8h - uqadd v16.8b, v0.8b, v16.8b - uqadd v17.8b, v1.8b, v17.8b - uqadd v18.8b, v2.8b, v18.8b - /* - * convert the results in v16, v17, v18 to r5g6b5 and store - * them into {v14} - */ - ushll v14.8h, v18.8b, #7 - sli v14.8h, v14.8h, #1 - ushll v12.8h, v17.8b, #7 - sli v12.8h, v12.8h, #1 - ushll v13.8h, v16.8b, #7 - sli v13.8h, v13.8h, #1 - sri v14.8h, v12.8h, #5 - sri v14.8h, v13.8h, #11 - mov v28.d[0], v14.d[0] - mov v29.d[0], v14.d[1] -.endm - -.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head - fetch_mask_pixblock - urshr v13.8h, v5.8h, #8 - urshr v14.8h, v6.8h, #8 - ld1 {v4.8h}, [DST_R], #16 - urshr v15.8h, v7.8h, #8 - raddhn v16.8b, v13.8h, v5.8h - raddhn v17.8b, v14.8h, v6.8h - raddhn v18.8b, v15.8h, v7.8h - mov v5.d[0], v4.d[1] - /* process_pixblock_head */ - /* - * 'combine_mask_ca' replacement - * - * input: solid src (n) in {v8, v9, v10, v11} [B, G, R, A] - * mask in {v24, v25, v26} [B, G, R] - * output: updated src in {v0, v1, v2 } [B, G, R] - * updated mask in {v24, v25, v26} [B, G, R] - */ - uqadd v16.8b, v0.8b, v16.8b - uqadd v17.8b, v1.8b, v17.8b - uqadd v18.8b, v2.8b, v18.8b - umull v0.8h, v24.8b, v8.8b - umull v1.8h, v25.8b, v9.8b - umull v2.8h, v26.8b, v10.8b - /* - * convert the result in v16, v17, v18 to r5g6b5 and store - * it into {v14} - */ - ushll v14.8h, v18.8b, #7 - sli v14.8h, v14.8h, #1 - ushll v18.8h, v16.8b, #7 - sli v18.8h, v18.8h, #1 - ushll v19.8h, v17.8b, #7 - sli v19.8h, v19.8h, #1 - umull v12.8h, v11.8b, v24.8b - sri v14.8h, v19.8h, #5 - umull v13.8h, v11.8b, v25.8b - umull v15.8h, v11.8b, v26.8b - sri v14.8h, v18.8h, #11 - mov v28.d[0], v14.d[0] - mov v29.d[0], v14.d[1] - cache_preload 8, 8 - urshr v16.8h, v0.8h, #8 - urshr v17.8h, v1.8h, #8 - urshr v18.8h, v2.8h, #8 - raddhn v0.8b, v0.8h, v16.8h - raddhn v1.8b, v1.8h, v17.8h - raddhn v2.8b, v2.8h, v18.8h - urshr v19.8h, v12.8h, #8 - urshr v20.8h, v13.8h, #8 - urshr v21.8h, v15.8h, #8 - raddhn v24.8b, v12.8h, v19.8h - raddhn v25.8b, v13.8h, v20.8h - /* - * convert 8 r5g6b5 pixel data from {v4, v5} to planar - * 8-bit format and put data into v16 - blue, v17 - green, - * v18 - red - */ - mov v4.d[1], v5.d[0] - shrn v17.8b, v4.8h, #3 - shrn v18.8b, v4.8h, #8 - raddhn v26.8b, v15.8h, v21.8h - sli v4.8h, v4.8h, #5 - sri v17.8b, v17.8b, #6 - sri v18.8b, v18.8b, #5 - /* - * 'combine_over_ca' replacement - * - * output: updated dest in v16 - blue, v17 - green, v18 - red - */ - mvn v24.8b, v24.8b - mvn v25.8b, v25.8b - shrn v16.8b, v4.8h, #2 - mvn v26.8b, v26.8b - umull v5.8h, v16.8b, v24.8b - umull v6.8h, v17.8b, v25.8b - umull v7.8h, v18.8b, v26.8b - st1 {v14.8h}, [DST_W], #16 -.endm - -.macro pixman_composite_over_n_8888_0565_ca_init - mov v13.s[0], w4 - dup v8.8b, v13.b[0] - dup v9.8b, v13.b[1] - dup v10.8b, v13.b[2] - dup v11.8b, v13.b[3] -.endm - -.macro pixman_composite_over_n_8888_0565_ca_cleanup -.endm - -generate_composite_function \ - pixman_composite_over_n_8888_0565_ca_asm_neon, 0, 32, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8888_0565_ca_init, \ - pixman_composite_over_n_8888_0565_ca_cleanup, \ - pixman_composite_over_n_8888_0565_ca_process_pixblock_head, \ - pixman_composite_over_n_8888_0565_ca_process_pixblock_tail, \ - pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_in_n_8_process_pixblock_head - /* expecting source data in {v0, v1, v2, v3} */ - /* and destination data in {v4, v5, v6, v7} */ - umull v8.8h, v4.8b, v3.8b - umull v9.8h, v5.8b, v3.8b - umull v10.8h, v6.8b, v3.8b - umull v11.8h, v7.8b, v3.8b -.endm - -.macro pixman_composite_in_n_8_process_pixblock_tail - urshr v14.8h, v8.8h, #8 - urshr v15.8h, v9.8h, #8 - urshr v12.8h, v10.8h, #8 - urshr v13.8h, v11.8h, #8 - raddhn v28.8b, v8.8h, v14.8h - raddhn v29.8b, v9.8h, v15.8h - raddhn v30.8b, v10.8h, v12.8h - raddhn v31.8b, v11.8h, v13.8h -.endm - -.macro pixman_composite_in_n_8_process_pixblock_tail_head - pixman_composite_in_n_8_process_pixblock_tail - ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32 - cache_preload 32, 32 - pixman_composite_in_n_8_process_pixblock_head - st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 -.endm - -.macro pixman_composite_in_n_8_init - mov v3.s[0], w4 - dup v3.8b, v3.b[3] -.endm - -.macro pixman_composite_in_n_8_cleanup -.endm - -generate_composite_function \ - pixman_composite_in_n_8_asm_neon, 0, 0, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_in_n_8_init, \ - pixman_composite_in_n_8_cleanup, \ - pixman_composite_in_n_8_process_pixblock_head, \ - pixman_composite_in_n_8_process_pixblock_tail, \ - pixman_composite_in_n_8_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -.macro pixman_composite_add_n_8_8_process_pixblock_head - /* expecting source data in {v8, v9, v10, v11} */ - /* v8 - blue, v9 - green, v10 - red, v11 - alpha */ - /* and destination data in {v4, v5, v6, v7} */ - /* mask is in v24, v25, v26, v27 */ - umull v0.8h, v24.8b, v11.8b - umull v1.8h, v25.8b, v11.8b - umull v2.8h, v26.8b, v11.8b - umull v3.8h, v27.8b, v11.8b - urshr v12.8h, v0.8h, #8 - urshr v13.8h, v1.8h, #8 - urshr v14.8h, v2.8h, #8 - urshr v15.8h, v3.8h, #8 - raddhn v0.8b, v0.8h, v12.8h - raddhn v1.8b, v1.8h, v13.8h - raddhn v2.8b, v2.8h, v14.8h - raddhn v3.8b, v3.8h, v15.8h - uqadd v28.8b, v0.8b, v4.8b - uqadd v29.8b, v1.8b, v5.8b - uqadd v30.8b, v2.8b, v6.8b - uqadd v31.8b, v3.8b, v7.8b -.endm - -.macro pixman_composite_add_n_8_8_process_pixblock_tail -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_add_n_8_8_process_pixblock_tail_head - pixman_composite_add_n_8_8_process_pixblock_tail - st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 - ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32 - fetch_mask_pixblock - cache_preload 32, 32 - pixman_composite_add_n_8_8_process_pixblock_head -.endm - -.macro pixman_composite_add_n_8_8_init - mov v11.s[0], w4 - dup v11.8b, v11.b[3] -.endm - -.macro pixman_composite_add_n_8_8_cleanup -.endm - -generate_composite_function \ - pixman_composite_add_n_8_8_asm_neon, 0, 8, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_add_n_8_8_init, \ - pixman_composite_add_n_8_8_cleanup, \ - pixman_composite_add_n_8_8_process_pixblock_head, \ - pixman_composite_add_n_8_8_process_pixblock_tail, \ - pixman_composite_add_n_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_add_8_8_8_process_pixblock_head - /* expecting source data in {v0, v1, v2, v3} */ - /* destination data in {v4, v5, v6, v7} */ - /* mask in {v24, v25, v26, v27} */ - umull v8.8h, v24.8b, v0.8b - umull v9.8h, v25.8b, v1.8b - umull v10.8h, v26.8b, v2.8b - umull v11.8h, v27.8b, v3.8b - urshr v0.8h, v8.8h, #8 - urshr v1.8h, v9.8h, #8 - urshr v12.8h, v10.8h, #8 - urshr v13.8h, v11.8h, #8 - raddhn v0.8b, v0.8h, v8.8h - raddhn v1.8b, v1.8h, v9.8h - raddhn v2.8b, v12.8h, v10.8h - raddhn v3.8b, v13.8h, v11.8h - uqadd v28.8b, v0.8b, v4.8b - uqadd v29.8b, v1.8b, v5.8b - uqadd v30.8b, v2.8b, v6.8b - uqadd v31.8b, v3.8b, v7.8b -.endm - -.macro pixman_composite_add_8_8_8_process_pixblock_tail -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_add_8_8_8_process_pixblock_tail_head - pixman_composite_add_8_8_8_process_pixblock_tail - st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 - ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32 - fetch_mask_pixblock - fetch_src_pixblock - cache_preload 32, 32 - pixman_composite_add_8_8_8_process_pixblock_head -.endm - -.macro pixman_composite_add_8_8_8_init -.endm - -.macro pixman_composite_add_8_8_8_cleanup -.endm - -generate_composite_function \ - pixman_composite_add_8_8_8_asm_neon, 8, 8, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_add_8_8_8_init, \ - pixman_composite_add_8_8_8_cleanup, \ - pixman_composite_add_8_8_8_process_pixblock_head, \ - pixman_composite_add_8_8_8_process_pixblock_tail, \ - pixman_composite_add_8_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_add_8888_8888_8888_process_pixblock_head - /* expecting source data in {v0, v1, v2, v3} */ - /* destination data in {v4, v5, v6, v7} */ - /* mask in {v24, v25, v26, v27} */ - umull v8.8h, v27.8b, v0.8b - umull v9.8h, v27.8b, v1.8b - umull v10.8h, v27.8b, v2.8b - umull v11.8h, v27.8b, v3.8b - /* 1 cycle bubble */ - ursra v8.8h, v8.8h, #8 - ursra v9.8h, v9.8h, #8 - ursra v10.8h, v10.8h, #8 - ursra v11.8h, v11.8h, #8 -.endm - -.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail - /* 2 cycle bubble */ - rshrn v28.8b, v8.8h, #8 - rshrn v29.8b, v9.8h, #8 - rshrn v30.8b, v10.8h, #8 - rshrn v31.8b, v11.8h, #8 - uqadd v28.8b, v4.8b, v28.8b - uqadd v29.8b, v5.8b, v29.8b - uqadd v30.8b, v6.8b, v30.8b - uqadd v31.8b, v7.8b, v31.8b -.endm - -.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head - fetch_src_pixblock - rshrn v28.8b, v8.8h, #8 - fetch_mask_pixblock - rshrn v29.8b, v9.8h, #8 - umull v8.8h, v27.8b, v0.8b - rshrn v30.8b, v10.8h, #8 - umull v9.8h, v27.8b, v1.8b - rshrn v31.8b, v11.8h, #8 - umull v10.8h, v27.8b, v2.8b - umull v11.8h, v27.8b, v3.8b - uqadd v28.8b, v4.8b, v28.8b - uqadd v29.8b, v5.8b, v29.8b - uqadd v30.8b, v6.8b, v30.8b - uqadd v31.8b, v7.8b, v31.8b - ursra v8.8h, v8.8h, #8 - ld4 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32 - ursra v9.8h, v9.8h, #8 - st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 - ursra v10.8h, v10.8h, #8 - - cache_preload 8, 8 - - ursra v11.8h, v11.8h, #8 -.endm - -generate_composite_function \ - pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -generate_composite_function_single_scanline \ - pixman_composite_scanline_add_mask_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -generate_composite_function \ - pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 27 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_add_n_8_8888_init - mov v3.s[0], w4 - dup v0.8b, v3.b[0] - dup v1.8b, v3.b[1] - dup v2.8b, v3.b[2] - dup v3.8b, v3.b[3] -.endm - -.macro pixman_composite_add_n_8_8888_cleanup -.endm - -generate_composite_function \ - pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_add_n_8_8888_init, \ - pixman_composite_add_n_8_8888_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 27 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_add_8888_n_8888_init - mov v27.s[0], w6 - dup v27.8b, v27.b[3] -.endm - -.macro pixman_composite_add_8888_n_8888_cleanup -.endm - -generate_composite_function \ - pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_add_8888_n_8888_init, \ - pixman_composite_add_8888_n_8888_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 27 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head - /* expecting source data in {v0, v1, v2, v3} */ - /* destination data in {v4, v5, v6, v7} */ - /* solid mask is in v15 */ - - /* 'in' */ - umull v11.8h, v15.8b, v3.8b - umull v10.8h, v15.8b, v2.8b - umull v9.8h, v15.8b, v1.8b - umull v8.8h, v15.8b, v0.8b - urshr v16.8h, v11.8h, #8 - urshr v14.8h, v10.8h, #8 - urshr v13.8h, v9.8h, #8 - urshr v12.8h, v8.8h, #8 - raddhn v3.8b, v11.8h, v16.8h - raddhn v2.8b, v10.8h, v14.8h - raddhn v1.8b, v9.8h, v13.8h - raddhn v0.8b, v8.8h, v12.8h - mvn v24.8b, v3.8b /* get inverted alpha */ - /* now do alpha blending */ - umull v8.8h, v24.8b, v4.8b - umull v9.8h, v24.8b, v5.8b - umull v10.8h, v24.8b, v6.8b - umull v11.8h, v24.8b, v7.8b -.endm - -.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail - urshr v16.8h, v8.8h, #8 - urshr v17.8h, v9.8h, #8 - urshr v18.8h, v10.8h, #8 - urshr v19.8h, v11.8h, #8 - raddhn v28.8b, v16.8h, v8.8h - raddhn v29.8b, v17.8h, v9.8h - raddhn v30.8b, v18.8h, v10.8h - raddhn v31.8b, v19.8h, v11.8h -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head - ld4 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32 - pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail - fetch_src_pixblock - cache_preload 8, 8 - fetch_mask_pixblock - pixman_composite_out_reverse_8888_n_8888_process_pixblock_head - st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 -.endm - -generate_composite_function_single_scanline \ - pixman_composite_scanline_out_reverse_mask_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_out_reverse_8888_n_8888_process_pixblock_head, \ - pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail, \ - pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 12 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_8888_n_8888_process_pixblock_head - pixman_composite_out_reverse_8888_n_8888_process_pixblock_head -.endm - -.macro pixman_composite_over_8888_n_8888_process_pixblock_tail - pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail - uqadd v28.8b, v0.8b, v28.8b - uqadd v29.8b, v1.8b, v29.8b - uqadd v30.8b, v2.8b, v30.8b - uqadd v31.8b, v3.8b, v31.8b -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_8888_n_8888_process_pixblock_tail_head - ld4 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32 - pixman_composite_over_8888_n_8888_process_pixblock_tail - fetch_src_pixblock - cache_preload 8, 8 - pixman_composite_over_8888_n_8888_process_pixblock_head - st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 -.endm - -.macro pixman_composite_over_8888_n_8888_init - mov v15.s[0], w6 - dup v15.8b, v15.b[3] -.endm - -.macro pixman_composite_over_8888_n_8888_cleanup -.endm - -generate_composite_function \ - pixman_composite_over_8888_n_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_8888_n_8888_init, \ - pixman_composite_over_8888_n_8888_cleanup, \ - pixman_composite_over_8888_n_8888_process_pixblock_head, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 12 /* mask_basereg */ - -/******************************************************************************/ - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head - ld4 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32 - pixman_composite_over_8888_n_8888_process_pixblock_tail - fetch_src_pixblock - cache_preload 8, 8 - fetch_mask_pixblock - pixman_composite_over_8888_n_8888_process_pixblock_head - st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 -.endm - -generate_composite_function \ - pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_n_8888_process_pixblock_head, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 12 /* mask_basereg */ - -generate_composite_function_single_scanline \ - pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_n_8888_process_pixblock_head, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 12 /* mask_basereg */ - -/******************************************************************************/ - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head - ld4 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32 - pixman_composite_over_8888_n_8888_process_pixblock_tail - fetch_src_pixblock - cache_preload 8, 8 - fetch_mask_pixblock - pixman_composite_over_8888_n_8888_process_pixblock_head - st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 -.endm - -generate_composite_function \ - pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_n_8888_process_pixblock_head, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8_8888_process_pixblock_tail_head \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_0888_0888_process_pixblock_head -.endm - -.macro pixman_composite_src_0888_0888_process_pixblock_tail -.endm - -.macro pixman_composite_src_0888_0888_process_pixblock_tail_head - st3 {v0.8b, v1.8b, v2.8b}, [DST_W], #24 - fetch_src_pixblock - cache_preload 8, 8 -.endm - -generate_composite_function \ - pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0888_0888_process_pixblock_head, \ - pixman_composite_src_0888_0888_process_pixblock_tail, \ - pixman_composite_src_0888_0888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_0888_8888_rev_process_pixblock_head - mov v31.8b, v2.8b - mov v2.8b, v0.8b - mov v0.8b, v31.8b -.endm - -.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail -.endm - -.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head - st4 {v0.8b, v1.8b, v2.8b, v3.8b}, [DST_W], #32 - fetch_src_pixblock - mov v31.8b, v2.8b - mov v2.8b, v0.8b - mov v0.8b, v31.8b - cache_preload 8, 8 -.endm - -.macro pixman_composite_src_0888_8888_rev_init - eor v3.8b, v3.8b, v3.8b -.endm - -generate_composite_function \ - pixman_composite_src_0888_8888_rev_asm_neon, 24, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - pixman_composite_src_0888_8888_rev_init, \ - default_cleanup, \ - pixman_composite_src_0888_8888_rev_process_pixblock_head, \ - pixman_composite_src_0888_8888_rev_process_pixblock_tail, \ - pixman_composite_src_0888_8888_rev_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_0888_0565_rev_process_pixblock_head - ushll v8.8h, v1.8b, #7 - sli v8.8h, v8.8h, #1 - ushll v9.8h, v2.8b, #7 - sli v9.8h, v9.8h, #1 -.endm - -.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail - ushll v14.8h, v0.8b, #7 - sli v14.8h, v14.8h, #1 - sri v14.8h, v8.8h, #5 - sri v14.8h, v9.8h, #11 - mov v28.d[0], v14.d[0] - mov v29.d[0], v14.d[1] -.endm - -.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head - ushll v14.8h, v0.8b, #7 - sli v14.8h, v14.8h, #1 - fetch_src_pixblock - sri v14.8h, v8.8h, #5 - sri v14.8h, v9.8h, #11 - mov v28.d[0], v14.d[0] - mov v29.d[0], v14.d[1] - ushll v8.8h, v1.8b, #7 - sli v8.8h, v8.8h, #1 - st1 {v14.8h}, [DST_W], #16 - ushll v9.8h, v2.8b, #7 - sli v9.8h, v9.8h, #1 -.endm - -generate_composite_function \ - pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0888_0565_rev_process_pixblock_head, \ - pixman_composite_src_0888_0565_rev_process_pixblock_tail, \ - pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_pixbuf_8888_process_pixblock_head - umull v8.8h, v3.8b, v0.8b - umull v9.8h, v3.8b, v1.8b - umull v10.8h, v3.8b, v2.8b -.endm - -.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail - urshr v11.8h, v8.8h, #8 - mov v30.8b, v31.8b - mov v31.8b, v3.8b - mov v3.8b, v30.8b - urshr v12.8h, v9.8h, #8 - urshr v13.8h, v10.8h, #8 - raddhn v30.8b, v11.8h, v8.8h - raddhn v29.8b, v12.8h, v9.8h - raddhn v28.8b, v13.8h, v10.8h -.endm - -.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head - urshr v11.8h, v8.8h, #8 - mov v30.8b, v31.8b - mov v31.8b, v3.8b - mov v3.8b, v31.8b - urshr v12.8h, v9.8h, #8 - urshr v13.8h, v10.8h, #8 - fetch_src_pixblock - raddhn v30.8b, v11.8h, v8.8h - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - PF beq 10f - PF add PF_X, PF_X, #8 - PF sub PF_CTL, PF_CTL, #1 -10: - raddhn v29.8b, v12.8h, v9.8h - raddhn v28.8b, v13.8h, v10.8h - umull v8.8h, v3.8b, v0.8b - umull v9.8h, v3.8b, v1.8b - umull v10.8h, v3.8b, v2.8b - st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 - PF cmp PF_X, ORIG_W - PF lsl DUMMY, PF_X, src_bpp_shift - PF prfm PREFETCH_MODE, [PF_SRC, DUMMY] - PF ble 10f - PF sub PF_X, PF_X, ORIG_W - PF subs PF_CTL, PF_CTL, #0x10 - PF ble 10f - PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb DUMMY, [PF_SRC, DUMMY] - PF add PF_SRC, PF_SRC, #1 -10: -.endm - -generate_composite_function \ - pixman_composite_src_pixbuf_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_pixbuf_8888_process_pixblock_head, \ - pixman_composite_src_pixbuf_8888_process_pixblock_tail, \ - pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_rpixbuf_8888_process_pixblock_head - umull v8.8h, v3.8b, v0.8b - umull v9.8h, v3.8b, v1.8b - umull v10.8h, v3.8b, v2.8b -.endm - -.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail - urshr v11.8h, v8.8h, #8 - mov v30.8b, v31.8b - mov v31.8b, v3.8b - mov v3.8b, v30.8b - urshr v12.8h, v9.8h, #8 - urshr v13.8h, v10.8h, #8 - raddhn v28.8b, v11.8h, v8.8h - raddhn v29.8b, v12.8h, v9.8h - raddhn v30.8b, v13.8h, v10.8h -.endm - -.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head - urshr v11.8h, v8.8h, #8 - mov v30.8b, v31.8b - mov v31.8b, v3.8b - mov v3.8b, v30.8b - urshr v12.8h, v9.8h, #8 - urshr v13.8h, v10.8h, #8 - fetch_src_pixblock - raddhn v28.8b, v11.8h, v8.8h - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - PF beq 10f - PF add PF_X, PF_X, #8 - PF sub PF_CTL, PF_CTL, #1 -10: - raddhn v29.8b, v12.8h, v9.8h - raddhn v30.8b, v13.8h, v10.8h - umull v8.8h, v3.8b, v0.8b - umull v9.8h, v3.8b, v1.8b - umull v10.8h, v3.8b, v2.8b - st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 - PF cmp PF_X, ORIG_W - PF lsl DUMMY, PF_X, src_bpp_shift - PF prfm PREFETCH_MODE, [PF_SRC, DUMMY] - PF ble 10f - PF sub PF_X, PF_X, ORIG_W - PF subs PF_CTL, PF_CTL, #0x10 - PF ble 10f - PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb DUMMY, [PF_SRC, DUMMY] - PF add PF_SRC, PF_SRC, #1 -10: -.endm - -generate_composite_function \ - pixman_composite_src_rpixbuf_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_rpixbuf_8888_process_pixblock_head, \ - pixman_composite_src_rpixbuf_8888_process_pixblock_tail, \ - pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_0565_8_0565_process_pixblock_head - /* mask is in v15 */ - mov v4.d[0], v8.d[0] - mov v4.d[1], v9.d[0] - mov v13.d[0], v10.d[0] - mov v13.d[1], v11.d[0] - convert_0565_to_x888 v4, v2, v1, v0 - convert_0565_to_x888 v13, v6, v5, v4 - /* source pixel data is in {v0, v1, v2, XX} */ - /* destination pixel data is in {v4, v5, v6, XX} */ - mvn v7.8b, v15.8b - umull v10.8h, v15.8b, v2.8b - umull v9.8h, v15.8b, v1.8b - umull v8.8h, v15.8b, v0.8b - umull v11.8h, v7.8b, v4.8b - umull v12.8h, v7.8b, v5.8b - umull v13.8h, v7.8b, v6.8b - urshr v19.8h, v10.8h, #8 - urshr v18.8h, v9.8h, #8 - urshr v17.8h, v8.8h, #8 - raddhn v2.8b, v10.8h, v19.8h - raddhn v1.8b, v9.8h, v18.8h - raddhn v0.8b, v8.8h, v17.8h -.endm - -.macro pixman_composite_over_0565_8_0565_process_pixblock_tail - urshr v17.8h, v11.8h, #8 - urshr v18.8h, v12.8h, #8 - urshr v19.8h, v13.8h, #8 - raddhn v28.8b, v17.8h, v11.8h - raddhn v29.8b, v18.8h, v12.8h - raddhn v30.8b, v19.8h, v13.8h - uqadd v0.8b, v0.8b, v28.8b - uqadd v1.8b, v1.8b, v29.8b - uqadd v2.8b, v2.8b, v30.8b - /* 32bpp result is in {v0, v1, v2, XX} */ - convert_8888_to_0565 v2, v1, v0, v14, v30, v13 - mov v28.d[0], v14.d[0] - mov v29.d[0], v14.d[1] -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head - fetch_mask_pixblock - pixman_composite_over_0565_8_0565_process_pixblock_tail - fetch_src_pixblock - ld1 {v10.4h, v11.4h}, [DST_R], #16 - cache_preload 8, 8 - pixman_composite_over_0565_8_0565_process_pixblock_head - st1 {v14.8h}, [DST_W], #16 -.endm - -generate_composite_function \ - pixman_composite_over_0565_8_0565_asm_neon, 16, 8, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_0565_8_0565_process_pixblock_head, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_0565_n_0565_init - mov v15.s[0], w6 - dup v15.8b, v15.b[3] -.endm - -.macro pixman_composite_over_0565_n_0565_cleanup -.endm - -generate_composite_function \ - pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_0565_n_0565_init, \ - pixman_composite_over_0565_n_0565_cleanup, \ - pixman_composite_over_0565_8_0565_process_pixblock_head, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_add_0565_8_0565_process_pixblock_head - /* mask is in v15 */ - mov v4.d[0], v8.d[0] - mov v4.d[1], v9.d[0] - mov v13.d[0], v10.d[0] - mov v13.d[1], v11.d[0] - convert_0565_to_x888 v4, v2, v1, v0 - convert_0565_to_x888 v13, v6, v5, v4 - /* source pixel data is in {v0, v1, v2, XX} */ - /* destination pixel data is in {v4, v5, v6, XX} */ - umull v9.8h, v15.8b, v2.8b - umull v8.8h, v15.8b, v1.8b - umull v7.8h, v15.8b, v0.8b - urshr v12.8h, v9.8h, #8 - urshr v11.8h, v8.8h, #8 - urshr v10.8h, v7.8h, #8 - raddhn v2.8b, v9.8h, v12.8h - raddhn v1.8b, v8.8h, v11.8h - raddhn v0.8b, v7.8h, v10.8h -.endm - -.macro pixman_composite_add_0565_8_0565_process_pixblock_tail - uqadd v0.8b, v0.8b, v4.8b - uqadd v1.8b, v1.8b, v5.8b - uqadd v2.8b, v2.8b, v6.8b - /* 32bpp result is in {v0, v1, v2, XX} */ - convert_8888_to_0565 v2, v1, v0, v14, v30, v13 - mov v28.d[0], v14.d[0] - mov v29.d[0], v14.d[1] -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head - fetch_mask_pixblock - pixman_composite_add_0565_8_0565_process_pixblock_tail - fetch_src_pixblock - ld1 {v10.4h, v11.4h}, [DST_R], #16 - cache_preload 8, 8 - pixman_composite_add_0565_8_0565_process_pixblock_head - st1 {v14.8h}, [DST_W], #16 -.endm - -generate_composite_function \ - pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_add_0565_8_0565_process_pixblock_head, \ - pixman_composite_add_0565_8_0565_process_pixblock_tail, \ - pixman_composite_add_0565_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_out_reverse_8_0565_process_pixblock_head - /* mask is in v15 */ - mov v12.d[0], v10.d[0] - mov v12.d[1], v11.d[0] - convert_0565_to_x888 v12, v6, v5, v4 - /* destination pixel data is in {v4, v5, v6, xx} */ - mvn v24.8b, v15.8b /* get inverted alpha */ - /* now do alpha blending */ - umull v8.8h, v24.8b, v4.8b - umull v9.8h, v24.8b, v5.8b - umull v10.8h, v24.8b, v6.8b -.endm - -.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail - urshr v11.8h, v8.8h, #8 - urshr v12.8h, v9.8h, #8 - urshr v13.8h, v10.8h, #8 - raddhn v0.8b, v11.8h, v8.8h - raddhn v1.8b, v12.8h, v9.8h - raddhn v2.8b, v13.8h, v10.8h - /* 32bpp result is in {v0, v1, v2, XX} */ - convert_8888_to_0565 v2, v1, v0, v14, v12, v3 - mov v28.d[0], v14.d[0] - mov v29.d[0], v14.d[1] -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head - fetch_src_pixblock - pixman_composite_out_reverse_8_0565_process_pixblock_tail - ld1 {v10.4h, v11.4h}, [DST_R], #16 - cache_preload 8, 8 - pixman_composite_out_reverse_8_0565_process_pixblock_head - st1 {v14.8h}, [DST_W], #16 -.endm - -generate_composite_function \ - pixman_composite_out_reverse_8_0565_asm_neon, 8, 0, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_out_reverse_8_0565_process_pixblock_head, \ - pixman_composite_out_reverse_8_0565_process_pixblock_tail, \ - pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 15, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_out_reverse_8_8888_process_pixblock_head - /* src is in v0 */ - /* destination pixel data is in {v4, v5, v6, v7} */ - mvn v1.8b, v0.8b /* get inverted alpha */ - /* now do alpha blending */ - umull v8.8h, v1.8b, v4.8b - umull v9.8h, v1.8b, v5.8b - umull v10.8h, v1.8b, v6.8b - umull v11.8h, v1.8b, v7.8b -.endm - -.macro pixman_composite_out_reverse_8_8888_process_pixblock_tail - urshr v14.8h, v8.8h, #8 - urshr v15.8h, v9.8h, #8 - urshr v12.8h, v10.8h, #8 - urshr v13.8h, v11.8h, #8 - raddhn v28.8b, v14.8h, v8.8h - raddhn v29.8b, v15.8h, v9.8h - raddhn v30.8b, v12.8h, v10.8h - raddhn v31.8b, v13.8h, v11.8h - /* 32bpp result is in {v28, v29, v30, v31} */ -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_out_reverse_8_8888_process_pixblock_tail_head - fetch_src_pixblock - pixman_composite_out_reverse_8_8888_process_pixblock_tail - ld4 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32 - cache_preload 8, 8 - pixman_composite_out_reverse_8_8888_process_pixblock_head - st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 -.endm - -generate_composite_function \ - pixman_composite_out_reverse_8_8888_asm_neon, 8, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_out_reverse_8_8888_process_pixblock_head, \ - pixman_composite_out_reverse_8_8888_process_pixblock_tail, \ - pixman_composite_out_reverse_8_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_8888_8888_OVER_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_process_pixblock_tail_head - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_8888_0565_OVER_asm_neon, 32, 0, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_0565_process_pixblock_head, \ - pixman_composite_over_8888_0565_process_pixblock_tail, \ - pixman_composite_over_8888_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_8888_0565_SRC_asm_neon, 32, 0, 16, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_8888_0565_process_pixblock_head, \ - pixman_composite_src_8888_0565_process_pixblock_tail, \ - pixman_composite_src_8888_0565_process_pixblock_tail_head - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_0565_8888_SRC_asm_neon, 16, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0565_8888_process_pixblock_head, \ - pixman_composite_src_0565_8888_process_pixblock_tail, \ - pixman_composite_src_0565_8888_process_pixblock_tail_head - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_neon, 32, 8, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_8_0565_process_pixblock_head, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 24 /* mask_basereg */ - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_neon, 16, 8, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_0565_8_0565_process_pixblock_head, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -/* - * Bilinear scaling support code which tries to provide pixel fetching, color - * format conversion, and interpolation as separate macros which can be used - * as the basic building blocks for constructing bilinear scanline functions. - */ - -.macro bilinear_load_8888 reg1, reg2, tmp - asr TMP1, X, #16 - add X, X, UX - add TMP1, TOP, TMP1, lsl #2 - ld1 {®1&.2s}, [TMP1], STRIDE - ld1 {®2&.2s}, [TMP1] -.endm - -.macro bilinear_load_0565 reg1, reg2, tmp - asr TMP1, X, #16 - add X, X, UX - add TMP1, TOP, TMP1, lsl #1 - ld1 {®2&.s}[0], [TMP1], STRIDE - ld1 {®2&.s}[1], [TMP1] - convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp -.endm - -.macro bilinear_load_and_vertical_interpolate_two_8888 \ - acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2 - - bilinear_load_8888 reg1, reg2, tmp1 - umull &acc1&.8h, ®1&.8b, v28.8b - umlal &acc1&.8h, ®2&.8b, v29.8b - bilinear_load_8888 reg3, reg4, tmp2 - umull &acc2&.8h, ®3&.8b, v28.8b - umlal &acc2&.8h, ®4&.8b, v29.8b -.endm - -.macro bilinear_load_and_vertical_interpolate_four_8888 \ - xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ - yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi - - bilinear_load_and_vertical_interpolate_two_8888 \ - xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi - bilinear_load_and_vertical_interpolate_two_8888 \ - yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi -.endm - -.macro vzip reg1, reg2 - umov TMP4, v31.d[0] - zip1 v31.8b, reg1, reg2 - zip2 reg2, reg1, reg2 - mov reg1, v31.8b - mov v31.d[0], TMP4 -.endm - -.macro vuzp reg1, reg2 - umov TMP4, v31.d[0] - uzp1 v31.8b, reg1, reg2 - uzp2 reg2, reg1, reg2 - mov reg1, v31.8b - mov v31.d[0], TMP4 -.endm - -.macro bilinear_load_and_vertical_interpolate_two_0565 \ - acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi - asr TMP1, X, #16 - add X, X, UX - add TMP1, TOP, TMP1, lsl #1 - asr TMP2, X, #16 - add X, X, UX - add TMP2, TOP, TMP2, lsl #1 - ld1 {&acc2&.s}[0], [TMP1], STRIDE - ld1 {&acc2&.s}[2], [TMP2], STRIDE - ld1 {&acc2&.s}[1], [TMP1] - ld1 {&acc2&.s}[3], [TMP2] - convert_0565_to_x888 acc2, reg3, reg2, reg1 - vzip ®1&.8b, ®3&.8b - vzip ®2&.8b, ®4&.8b - vzip ®3&.8b, ®4&.8b - vzip ®1&.8b, ®2&.8b - umull &acc1&.8h, ®1&.8b, v28.8b - umlal &acc1&.8h, ®2&.8b, v29.8b - umull &acc2&.8h, ®3&.8b, v28.8b - umlal &acc2&.8h, ®4&.8b, v29.8b -.endm - -.macro bilinear_load_and_vertical_interpolate_four_0565 \ - xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ - yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi - asr TMP1, X, #16 - add X, X, UX - add TMP1, TOP, TMP1, lsl #1 - asr TMP2, X, #16 - add X, X, UX - add TMP2, TOP, TMP2, lsl #1 - ld1 {&xacc2&.s}[0], [TMP1], STRIDE - ld1 {&xacc2&.s}[2], [TMP2], STRIDE - ld1 {&xacc2&.s}[1], [TMP1] - ld1 {&xacc2&.s}[3], [TMP2] - convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1 - asr TMP1, X, #16 - add X, X, UX - add TMP1, TOP, TMP1, lsl #1 - asr TMP2, X, #16 - add X, X, UX - add TMP2, TOP, TMP2, lsl #1 - ld1 {&yacc2&.s}[0], [TMP1], STRIDE - vzip &xreg1&.8b, &xreg3&.8b - ld1 {&yacc2&.s}[2], [TMP2], STRIDE - vzip &xreg2&.8b, &xreg4&.8b - ld1 {&yacc2&.s}[1], [TMP1] - vzip &xreg3&.8b, &xreg4&.8b - ld1 {&yacc2&.s}[3], [TMP2] - vzip &xreg1&.8b, &xreg2&.8b - convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1 - umull &xacc1&.8h, &xreg1&.8b, v28.8b - vzip &yreg1&.8b, &yreg3&.8b - umlal &xacc1&.8h, &xreg2&.8b, v29.8b - vzip &yreg2&.8b, &yreg4&.8b - umull &xacc2&.8h, &xreg3&.8b, v28.8b - vzip &yreg3&.8b, &yreg4&.8b - umlal &xacc2&.8h, &xreg4&.8b, v29.8b - vzip &yreg1&.8b, &yreg2&.8b - umull &yacc1&.8h, &yreg1&.8b, v28.8b - umlal &yacc1&.8h, &yreg2&.8b, v29.8b - umull &yacc2&.8h, &yreg3&.8b, v28.8b - umlal &yacc2&.8h, &yreg4&.8b, v29.8b -.endm - -.macro bilinear_store_8888 numpix, tmp1, tmp2 -.if numpix == 4 - st1 {v0.2s, v1.2s}, [OUT], #16 -.elseif numpix == 2 - st1 {v0.2s}, [OUT], #8 -.elseif numpix == 1 - st1 {v0.s}[0], [OUT], #4 -.else - .error bilinear_store_8888 numpix is unsupported -.endif -.endm - -.macro bilinear_store_0565 numpix, tmp1, tmp2 - vuzp v0.8b, v1.8b - vuzp v2.8b, v3.8b - vuzp v1.8b, v3.8b - vuzp v0.8b, v2.8b - convert_8888_to_0565 v2, v1, v0, v1, tmp1, tmp2 -.if numpix == 4 - st1 {v1.4h}, [OUT], #8 -.elseif numpix == 2 - st1 {v1.s}[0], [OUT], #4 -.elseif numpix == 1 - st1 {v1.h}[0], [OUT], #2 -.else - .error bilinear_store_0565 numpix is unsupported -.endif -.endm - -.macro bilinear_interpolate_last_pixel src_fmt, dst_fmt - bilinear_load_&src_fmt v0, v1, v2 - umull v2.8h, v0.8b, v28.8b - umlal v2.8h, v1.8b, v29.8b - /* 5 cycles bubble */ - ushll v0.4s, v2.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v0.4s, v2.4h, v15.h[0] - umlal2 v0.4s, v2.8h, v15.h[0] - /* 5 cycles bubble */ - shrn v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - /* 3 cycles bubble */ - xtn v0.8b, v0.8h - /* 1 cycle bubble */ - bilinear_store_&dst_fmt 1, v3, v4 -.endm - -.macro bilinear_interpolate_two_pixels src_fmt, dst_fmt - bilinear_load_and_vertical_interpolate_two_&src_fmt \ - v1, v11, v2, v3, v20, v21, v22, v23 - ushll v0.4s, v1.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v0.4s, v1.4h, v15.h[0] - umlal2 v0.4s, v1.8h, v15.h[0] - ushll v10.4s, v11.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v10.4s, v11.4h, v15.h[4] - umlal2 v10.4s, v11.8h, v15.h[4] - shrn v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - shrn2 v0.8h, v10.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - add v12.8h, v12.8h, v13.8h - xtn v0.8b, v0.8h - bilinear_store_&dst_fmt 2, v3, v4 -.endm - -.macro bilinear_interpolate_four_pixels src_fmt, dst_fmt - bilinear_load_and_vertical_interpolate_four_&src_fmt \ - v1, v11, v14, v20, v16, v17, v22, v23 \ - v3, v9, v24, v25, v26, v27, v18, v19 - prfm PREFETCH_MODE, [TMP1, PF_OFFS] - sub TMP1, TMP1, STRIDE - ushll v0.4s, v1.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v0.4s, v1.4h, v15.h[0] - umlal2 v0.4s, v1.8h, v15.h[0] - ushll v10.4s, v11.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v10.4s, v11.4h, v15.h[4] - umlal2 v10.4s, v11.8h, v15.h[4] - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - ushll v2.4s, v3.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v2.4s, v3.4h, v15.h[0] - umlal2 v2.4s, v3.8h, v15.h[0] - ushll v8.4s, v9.4h, #BILINEAR_INTERPOLATION_BITS - prfm PREFETCH_MODE, [TMP2, PF_OFFS] - umlsl v8.4s, v9.4h, v15.h[4] - umlal2 v8.4s, v9.8h, v15.h[4] - add v12.8h, v12.8h, v13.8h - shrn v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - shrn2 v0.8h, v10.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - shrn v2.4h, v2.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - shrn2 v2.8h, v8.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - xtn v0.8b, v0.8h - xtn v1.8b, v2.8h - add v12.8h, v12.8h, v13.8h - bilinear_store_&dst_fmt 4, v3, v4 -.endm - -.macro bilinear_interpolate_four_pixels_head src_fmt, dst_fmt -.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt - bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_head -.else - bilinear_interpolate_four_pixels src_fmt, dst_fmt -.endif -.endm - -.macro bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt -.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt - bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail -.endif -.endm - -.macro bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt -.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt - bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail_head -.else - bilinear_interpolate_four_pixels src_fmt, dst_fmt -.endif -.endm - -.macro bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt -.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt - bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_head -.else - bilinear_interpolate_four_pixels_head src_fmt, dst_fmt - bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt -.endif -.endm - -.macro bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt -.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt - bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail -.else - bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt -.endif -.endm - -.macro bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt -.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt - bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail_head -.else - bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt - bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt -.endif -.endm - -.set BILINEAR_FLAG_UNROLL_4, 0 -.set BILINEAR_FLAG_UNROLL_8, 1 -.set BILINEAR_FLAG_USE_ALL_NEON_REGS, 2 - -/* - * Main template macro for generating NEON optimized bilinear scanline - * functions. - * - * Bilinear scanline scaler macro template uses the following arguments: - * fname - name of the function to generate - * src_fmt - source color format (8888 or 0565) - * dst_fmt - destination color format (8888 or 0565) - * bpp_shift - (1 << bpp_shift) is the size of source pixel in bytes - * prefetch_distance - prefetch in the source image by that many - * pixels ahead - */ - -.macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \ - src_bpp_shift, dst_bpp_shift, \ - prefetch_distance, flags - -pixman_asm_function fname - OUT .req x0 - TOP .req x1 - BOTTOM .req x2 - WT .req x3 - WB .req x4 - X .req x5 - UX .req x6 - WIDTH .req x7 - TMP1 .req x8 - TMP2 .req x9 - PF_OFFS .req x10 - TMP3 .req x11 - TMP4 .req x12 - STRIDE .req x13 - - sxtw x3, w3 - sxtw x4, w4 - sxtw x5, w5 - sxtw x6, w6 - sxtw x7, w7 - - stp x29, x30, [sp, -16]! - mov x29, sp - sub sp, sp, 112 /* push all registers */ - sub x29, x29, 64 - st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], #32 - st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], #32 - stp x8, x9, [x29, -80] - stp x10, x11, [x29, -96] - stp x12, x13, [x29, -112] - - mov PF_OFFS, #prefetch_distance - mul PF_OFFS, PF_OFFS, UX - - subs STRIDE, BOTTOM, TOP - .unreq BOTTOM - - cmp WIDTH, #0 - ble 300f - - dup v12.8h, w5 - dup v13.8h, w6 - dup v28.8b, w3 - dup v29.8b, w4 - mov v25.d[0], v12.d[1] - mov v26.d[0], v13.d[0] - add v25.4h, v25.4h, v26.4h - mov v12.d[1], v25.d[0] - - /* ensure good destination alignment */ - cmp WIDTH, #1 - blt 100f - tst OUT, #(1 << dst_bpp_shift) - beq 100f - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - add v12.8h, v12.8h, v13.8h - bilinear_interpolate_last_pixel src_fmt, dst_fmt - sub WIDTH, WIDTH, #1 -100: - add v13.8h, v13.8h, v13.8h - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - add v12.8h, v12.8h, v13.8h - - cmp WIDTH, #2 - blt 100f - tst OUT, #(1 << (dst_bpp_shift + 1)) - beq 100f - bilinear_interpolate_two_pixels src_fmt, dst_fmt - sub WIDTH, WIDTH, #2 -100: -.if ((flags) & BILINEAR_FLAG_UNROLL_8) != 0 -/*********** 8 pixels per iteration *****************/ - cmp WIDTH, #4 - blt 100f - tst OUT, #(1 << (dst_bpp_shift + 2)) - beq 100f - bilinear_interpolate_four_pixels src_fmt, dst_fmt - sub WIDTH, WIDTH, #4 -100: - subs WIDTH, WIDTH, #8 - blt 100f - asr PF_OFFS, PF_OFFS, #(16 - src_bpp_shift) - bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt - subs WIDTH, WIDTH, #8 - blt 500f -1000: - bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt - subs WIDTH, WIDTH, #8 - bge 1000b -500: - bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt -100: - tst WIDTH, #4 - beq 200f - bilinear_interpolate_four_pixels src_fmt, dst_fmt -200: -.else -/*********** 4 pixels per iteration *****************/ - subs WIDTH, WIDTH, #4 - blt 100f - asr PF_OFFS, PF_OFFS, #(16 - src_bpp_shift) - bilinear_interpolate_four_pixels_head src_fmt, dst_fmt - subs WIDTH, WIDTH, #4 - blt 500f -1000: - bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt - subs WIDTH, WIDTH, #4 - bge 1000b -500: - bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt -100: -/****************************************************/ -.endif - /* handle the remaining trailing pixels */ - tst WIDTH, #2 - beq 200f - bilinear_interpolate_two_pixels src_fmt, dst_fmt -200: - tst WIDTH, #1 - beq 300f - bilinear_interpolate_last_pixel src_fmt, dst_fmt -300: - sub x29, x29, 64 - ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], #32 - ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], #32 - ldp x8, x9, [x29, -80] - ldp x10, x11, [x29, -96] - ldp x12, x13, [x29, -104] - mov sp, x29 - ldp x29, x30, [sp], 16 - ret - - .unreq OUT - .unreq TOP - .unreq WT - .unreq WB - .unreq X - .unreq UX - .unreq WIDTH - .unreq TMP1 - .unreq TMP2 - .unreq PF_OFFS - .unreq TMP3 - .unreq TMP4 - .unreq STRIDE -.endfunc - -.endm - -/*****************************************************************************/ - -.set have_bilinear_interpolate_four_pixels_8888_8888, 1 - -.macro bilinear_interpolate_four_pixels_8888_8888_head - asr TMP1, X, #16 - add X, X, UX - add TMP1, TOP, TMP1, lsl #2 - asr TMP2, X, #16 - add X, X, UX - add TMP2, TOP, TMP2, lsl #2 - - ld1 {v22.2s}, [TMP1], STRIDE - ld1 {v23.2s}, [TMP1] - asr TMP3, X, #16 - add X, X, UX - add TMP3, TOP, TMP3, lsl #2 - umull v8.8h, v22.8b, v28.8b - umlal v8.8h, v23.8b, v29.8b - - ld1 {v22.2s}, [TMP2], STRIDE - ld1 {v23.2s}, [TMP2] - asr TMP4, X, #16 - add X, X, UX - add TMP4, TOP, TMP4, lsl #2 - umull v9.8h, v22.8b, v28.8b - umlal v9.8h, v23.8b, v29.8b - - ld1 {v22.2s}, [TMP3], STRIDE - ld1 {v23.2s}, [TMP3] - umull v10.8h, v22.8b, v28.8b - umlal v10.8h, v23.8b, v29.8b - - ushll v0.4s, v8.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v0.4s, v8.4h, v15.h[0] - umlal2 v0.4s, v8.8h, v15.h[0] - - prfm PREFETCH_MODE, [TMP4, PF_OFFS] - ld1 {v16.2s}, [TMP4], STRIDE - ld1 {v17.2s}, [TMP4] - prfm PREFETCH_MODE, [TMP4, PF_OFFS] - umull v11.8h, v16.8b, v28.8b - umlal v11.8h, v17.8b, v29.8b - - ushll v1.4s, v9.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v1.4s, v9.4h, v15.h[4] -.endm - -.macro bilinear_interpolate_four_pixels_8888_8888_tail - umlal2 v1.4s, v9.8h, v15.h[4] - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - ushll v2.4s, v10.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v2.4s, v10.4h, v15.h[0] - umlal2 v2.4s, v10.8h, v15.h[0] - ushll v3.4s, v11.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v3.4s, v11.4h, v15.h[4] - umlal2 v3.4s, v11.8h, v15.h[4] - add v12.8h, v12.8h, v13.8h - shrn v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - shrn2 v0.8h, v1.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - shrn v2.4h, v2.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - shrn2 v2.8h, v3.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - xtn v6.8b, v0.8h - xtn v7.8b, v2.8h - add v12.8h, v12.8h, v13.8h - st1 {v6.2s, v7.2s}, [OUT], #16 -.endm - -.macro bilinear_interpolate_four_pixels_8888_8888_tail_head - asr TMP1, X, #16 - add X, X, UX - add TMP1, TOP, TMP1, lsl #2 - asr TMP2, X, #16 - add X, X, UX - add TMP2, TOP, TMP2, lsl #2 - umlal2 v1.4s, v9.8h, v15.h[4] - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - ushll v2.4s, v10.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v2.4s, v10.4h, v15.h[0] - umlal2 v2.4s, v10.8h, v15.h[0] - ushll v3.4s, v11.4h, #BILINEAR_INTERPOLATION_BITS - ld1 {v20.2s}, [TMP1], STRIDE - umlsl v3.4s, v11.4h, v15.h[4] - umlal2 v3.4s, v11.8h, v15.h[4] - ld1 {v21.2s}, [TMP1] - umull v8.8h, v20.8b, v28.8b - umlal v8.8h, v21.8b, v29.8b - shrn v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - shrn2 v0.8h, v1.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - shrn v4.4h, v2.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - ld1 {v22.2s}, [TMP2], STRIDE - shrn2 v4.8h, v3.4s, #(2 * BILINEAR_INTERPOLATION_BITS) - add v12.8h, v12.8h, v13.8h - ld1 {v23.2s}, [TMP2] - umull v9.8h, v22.8b, v28.8b - asr TMP3, X, #16 - add X, X, UX - add TMP3, TOP, TMP3, lsl #2 - asr TMP4, X, #16 - add X, X, UX - add TMP4, TOP, TMP4, lsl #2 - umlal v9.8h, v23.8b, v29.8b - ld1 {v22.2s}, [TMP3], STRIDE - ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS) - ld1 {v23.2s}, [TMP3] - umull v10.8h, v22.8b, v28.8b - umlal v10.8h, v23.8b, v29.8b - xtn v6.8b, v0.8h - ushll v0.4s, v8.4h, #BILINEAR_INTERPOLATION_BITS - xtn v7.8b, v4.8h - umlsl v0.4s, v8.4h, v15.h[0] - umlal2 v0.4s, v8.8h, v15.h[0] - prfm PREFETCH_MODE, [TMP4, PF_OFFS] - ld1 {v16.2s}, [TMP4], STRIDE - add v12.8h, v12.8h, v13.8h - ld1 {v17.2s}, [TMP4] - prfm PREFETCH_MODE, [TMP4, PF_OFFS] - umull v11.8h, v16.8b, v28.8b - umlal v11.8h, v17.8b, v29.8b - st1 {v6.2s, v7.2s}, [OUT], #16 - ushll v1.4s, v9.4h, #BILINEAR_INTERPOLATION_BITS - umlsl v1.4s, v9.4h, v15.h[4] -.endm - -/*****************************************************************************/ - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, \ - 2, 2, 28, BILINEAR_FLAG_UNROLL_4 - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, \ - 2, 1, 28, BILINEAR_FLAG_UNROLL_8 | BILINEAR_FLAG_USE_ALL_NEON_REGS - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, \ - 1, 2, 28, BILINEAR_FLAG_UNROLL_4 - -generate_bilinear_scanline_func \ - pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, \ - 1, 1, 28, BILINEAR_FLAG_UNROLL_4 diff --git a/vendor/pixman/pixman/pixman-arma64-neon-asm.h b/vendor/pixman/pixman/pixman-arma64-neon-asm.h deleted file mode 100644 index 5d9317217..000000000 --- a/vendor/pixman/pixman/pixman-arma64-neon-asm.h +++ /dev/null @@ -1,1310 +0,0 @@ -/* - * Copyright © 2009 Nokia Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) - */ - -/* - * This file contains a macro ('generate_composite_function') which can - * construct 2D image processing functions, based on a common template. - * Any combinations of source, destination and mask images with 8bpp, - * 16bpp, 24bpp, 32bpp color formats are supported. - * - * This macro takes care of: - * - handling of leading and trailing unaligned pixels - * - doing most of the work related to L2 cache preload - * - encourages the use of software pipelining for better instructions - * scheduling - * - * The user of this macro has to provide some configuration parameters - * (bit depths for the images, prefetch distance, etc.) and a set of - * macros, which should implement basic code chunks responsible for - * pixels processing. See 'pixman-armv8-neon-asm.S' file for the usage - * examples. - * - * TODO: - * - try overlapped pixel method (from Ian Rickards) when processing - * exactly two blocks of pixels - * - maybe add an option to do reverse scanline processing - */ - -/* - * Bit flags for 'generate_composite_function' macro which are used - * to tune generated functions behavior. - */ -.set FLAG_DST_WRITEONLY, 0 -.set FLAG_DST_READWRITE, 1 -.set FLAG_DEINTERLEAVE_32BPP, 2 - -/* - * Constants for selecting preferable prefetch type. - */ -.set PREFETCH_TYPE_NONE, 0 /* No prefetch at all */ -.set PREFETCH_TYPE_SIMPLE, 1 /* A simple, fixed-distance-ahead prefetch */ -.set PREFETCH_TYPE_ADVANCED, 2 /* Advanced fine-grained prefetch */ - -/* - * prefetch mode - * available modes are: - * pldl1keep - * pldl1strm - * pldl2keep - * pldl2strm - * pldl3keep - * pldl3strm - */ -#define PREFETCH_MODE pldl1keep - -/* - * Definitions of supplementary pixld/pixst macros (for partial load/store of - * pixel data). - */ - -.macro pixldst1 op, elem_size, reg1, mem_operand, abits - op {v®1&.&elem_size}, [&mem_operand&], #8 -.endm - -.macro pixldst2 op, elem_size, reg1, reg2, mem_operand, abits - op {v®1&.&elem_size, v®2&.&elem_size}, [&mem_operand&], #16 -.endm - -.macro pixldst4 op, elem_size, reg1, reg2, reg3, reg4, mem_operand, abits - op {v®1&.&elem_size, v®2&.&elem_size, v®3&.&elem_size, v®4&.&elem_size}, [&mem_operand&], #32 -.endm - -.macro pixldst0 op, elem_size, reg1, idx, mem_operand, abits, bytes - op {v®1&.&elem_size}[idx], [&mem_operand&], #&bytes& -.endm - -.macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand - op {v®1&.&elem_size, v®2&.&elem_size, v®3&.&elem_size}, [&mem_operand&], #24 -.endm - -.macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand - op {v®1&.&elem_size, v®2&.&elem_size, v®3&.&elem_size}[idx], [&mem_operand&], #3 -.endm - -.macro pixldst numbytes, op, elem_size, basereg, mem_operand, abits -.if numbytes == 32 - .if elem_size==32 - pixldst4 op, 2s, %(basereg+4), %(basereg+5), \ - %(basereg+6), %(basereg+7), mem_operand, abits - .elseif elem_size==16 - pixldst4 op, 4h, %(basereg+4), %(basereg+5), \ - %(basereg+6), %(basereg+7), mem_operand, abits - .else - pixldst4 op, 8b, %(basereg+4), %(basereg+5), \ - %(basereg+6), %(basereg+7), mem_operand, abits - .endif -.elseif numbytes == 16 - .if elem_size==32 - pixldst2 op, 2s, %(basereg+2), %(basereg+3), mem_operand, abits - .elseif elem_size==16 - pixldst2 op, 4h, %(basereg+2), %(basereg+3), mem_operand, abits - .else - pixldst2 op, 8b, %(basereg+2), %(basereg+3), mem_operand, abits - .endif -.elseif numbytes == 8 - .if elem_size==32 - pixldst1 op, 2s, %(basereg+1), mem_operand, abits - .elseif elem_size==16 - pixldst1 op, 4h, %(basereg+1), mem_operand, abits - .else - pixldst1 op, 8b, %(basereg+1), mem_operand, abits - .endif -.elseif numbytes == 4 - .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 32) - pixldst0 op, s, %(basereg+0), 1, mem_operand, abits, 4 - .elseif elem_size == 16 - pixldst0 op, h, %(basereg+0), 2, mem_operand, abits, 2 - pixldst0 op, h, %(basereg+0), 3, mem_operand, abits, 2 - .else - pixldst0 op, b, %(basereg+0), 4, mem_operand, abits, 1 - pixldst0 op, b, %(basereg+0), 5, mem_operand, abits, 1 - pixldst0 op, b, %(basereg+0), 6, mem_operand, abits, 1 - pixldst0 op, b, %(basereg+0), 7, mem_operand, abits, 1 - .endif -.elseif numbytes == 2 - .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 16) - pixldst0 op, h, %(basereg+0), 1, mem_operand, abits, 2 - .else - pixldst0 op, b, %(basereg+0), 2, mem_operand, abits, 1 - pixldst0 op, b, %(basereg+0), 3, mem_operand, abits, 1 - .endif -.elseif numbytes == 1 - pixldst0 op, b, %(basereg+0), 1, mem_operand, abits, 1 -.else - .error "unsupported size: numbytes" -.endif -.endm - -.macro pixld numpix, bpp, basereg, mem_operand, abits=0 -.if bpp > 0 -.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) - pixldst4 ld4, 8b, %(basereg+4), %(basereg+5), \ - %(basereg+6), %(basereg+7), mem_operand, abits -.elseif (bpp == 24) && (numpix == 8) - pixldst3 ld3, 8b, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand -.elseif (bpp == 24) && (numpix == 4) - pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand - pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand - pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand - pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand -.elseif (bpp == 24) && (numpix == 2) - pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand - pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand -.elseif (bpp == 24) && (numpix == 1) - pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand -.else - pixldst %(numpix * bpp / 8), ld1, %(bpp), basereg, mem_operand, abits -.endif -.endif -.endm - -.macro pixst numpix, bpp, basereg, mem_operand, abits=0 -.if bpp > 0 -.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) - pixldst4 st4, 8b, %(basereg+4), %(basereg+5), \ - %(basereg+6), %(basereg+7), mem_operand, abits -.elseif (bpp == 24) && (numpix == 8) - pixldst3 st3, 8b, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand -.elseif (bpp == 24) && (numpix == 4) - pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand - pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand - pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand - pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand -.elseif (bpp == 24) && (numpix == 2) - pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand - pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand -.elseif (bpp == 24) && (numpix == 1) - pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand -.elseif numpix * bpp == 32 && abits == 32 - pixldst 4, st1, 32, basereg, mem_operand, abits -.elseif numpix * bpp == 16 && abits == 16 - pixldst 2, st1, 16, basereg, mem_operand, abits -.else - pixldst %(numpix * bpp / 8), st1, %(bpp), basereg, mem_operand, abits -.endif -.endif -.endm - -.macro pixld_a numpix, bpp, basereg, mem_operand -.if (bpp * numpix) <= 128 - pixld numpix, bpp, basereg, mem_operand, %(bpp * numpix) -.else - pixld numpix, bpp, basereg, mem_operand, 128 -.endif -.endm - -.macro pixst_a numpix, bpp, basereg, mem_operand -.if (bpp * numpix) <= 128 - pixst numpix, bpp, basereg, mem_operand, %(bpp * numpix) -.else - pixst numpix, bpp, basereg, mem_operand, 128 -.endif -.endm - -/* - * Pixel fetcher for nearest scaling (needs TMP1, TMP2, VX, UNIT_X register - * aliases to be defined) - */ -.macro pixld1_s elem_size, reg1, mem_operand -.if elem_size == 16 - asr TMP1, VX, #16 - adds VX, VX, UNIT_X - bmi 55f -5: subs VX, VX, SRC_WIDTH_FIXED - bpl 5b -55: - add TMP1, mem_operand, TMP1, lsl #1 - asr TMP2, VX, #16 - adds VX, VX, UNIT_X - bmi 55f -5: subs VX, VX, SRC_WIDTH_FIXED - bpl 5b -55: - add TMP2, mem_operand, TMP2, lsl #1 - ld1 {v®1&.h}[0], [TMP1] - asr TMP1, VX, #16 - adds VX, VX, UNIT_X - bmi 55f -5: subs VX, VX, SRC_WIDTH_FIXED - bpl 5b -55: - add TMP1, mem_operand, TMP1, lsl #1 - ld1 {v®1&.h}[1], [TMP2] - asr TMP2, VX, #16 - adds VX, VX, UNIT_X - bmi 55f -5: subs VX, VX, SRC_WIDTH_FIXED - bpl 5b -55: - add TMP2, mem_operand, TMP2, lsl #1 - ld1 {v®1&.h}[2], [TMP1] - ld1 {v®1&.h}[3], [TMP2] -.elseif elem_size == 32 - asr TMP1, VX, #16 - adds VX, VX, UNIT_X - bmi 55f -5: subs VX, VX, SRC_WIDTH_FIXED - bpl 5b -55: - add TMP1, mem_operand, TMP1, lsl #2 - asr TMP2, VX, #16 - adds VX, VX, UNIT_X - bmi 55f -5: subs VX, VX, SRC_WIDTH_FIXED - bpl 5b -55: - add TMP2, mem_operand, TMP2, lsl #2 - ld1 {v®1&.s}[0], [TMP1] - ld1 {v®1&.s}[1], [TMP2] -.else - .error "unsupported" -.endif -.endm - -.macro pixld2_s elem_size, reg1, reg2, mem_operand -.if 0 /* elem_size == 32 */ - mov TMP1, VX, asr #16 - add VX, VX, UNIT_X, asl #1 - add TMP1, mem_operand, TMP1, asl #2 - mov TMP2, VX, asr #16 - sub VX, VX, UNIT_X - add TMP2, mem_operand, TMP2, asl #2 - ld1 {v®1&.s}[0], [TMP1] - mov TMP1, VX, asr #16 - add VX, VX, UNIT_X, asl #1 - add TMP1, mem_operand, TMP1, asl #2 - ld1 {v®2&.s}[0], [TMP2, :32] - mov TMP2, VX, asr #16 - add VX, VX, UNIT_X - add TMP2, mem_operand, TMP2, asl #2 - ld1 {v®1&.s}[1], [TMP1] - ld1 {v®2&.s}[1], [TMP2] -.else - pixld1_s elem_size, reg1, mem_operand - pixld1_s elem_size, reg2, mem_operand -.endif -.endm - -.macro pixld0_s elem_size, reg1, idx, mem_operand -.if elem_size == 16 - asr TMP1, VX, #16 - adds VX, VX, UNIT_X - bmi 55f -5: subs VX, VX, SRC_WIDTH_FIXED - bpl 5b -55: - add TMP1, mem_operand, TMP1, lsl #1 - ld1 {v®1&.h}[idx], [TMP1] -.elseif elem_size == 32 - asr DUMMY, VX, #16 - mov TMP1, DUMMY - adds VX, VX, UNIT_X - bmi 55f -5: subs VX, VX, SRC_WIDTH_FIXED - bpl 5b -55: - add TMP1, mem_operand, TMP1, lsl #2 - ld1 {v®1&.s}[idx], [TMP1] -.endif -.endm - -.macro pixld_s_internal numbytes, elem_size, basereg, mem_operand -.if numbytes == 32 - pixld2_s elem_size, %(basereg+4), %(basereg+5), mem_operand - pixld2_s elem_size, %(basereg+6), %(basereg+7), mem_operand - pixdeinterleave elem_size, %(basereg+4) -.elseif numbytes == 16 - pixld2_s elem_size, %(basereg+2), %(basereg+3), mem_operand -.elseif numbytes == 8 - pixld1_s elem_size, %(basereg+1), mem_operand -.elseif numbytes == 4 - .if elem_size == 32 - pixld0_s elem_size, %(basereg+0), 1, mem_operand - .elseif elem_size == 16 - pixld0_s elem_size, %(basereg+0), 2, mem_operand - pixld0_s elem_size, %(basereg+0), 3, mem_operand - .else - pixld0_s elem_size, %(basereg+0), 4, mem_operand - pixld0_s elem_size, %(basereg+0), 5, mem_operand - pixld0_s elem_size, %(basereg+0), 6, mem_operand - pixld0_s elem_size, %(basereg+0), 7, mem_operand - .endif -.elseif numbytes == 2 - .if elem_size == 16 - pixld0_s elem_size, %(basereg+0), 1, mem_operand - .else - pixld0_s elem_size, %(basereg+0), 2, mem_operand - pixld0_s elem_size, %(basereg+0), 3, mem_operand - .endif -.elseif numbytes == 1 - pixld0_s elem_size, %(basereg+0), 1, mem_operand -.else - .error "unsupported size: numbytes" -.endif -.endm - -.macro pixld_s numpix, bpp, basereg, mem_operand -.if bpp > 0 - pixld_s_internal %(numpix * bpp / 8), %(bpp), basereg, mem_operand -.endif -.endm - -.macro vuzp8 reg1, reg2 - umov DUMMY, v16.d[0] - uzp1 v16.8b, v®1&.8b, v®2&.8b - uzp2 v®2&.8b, v®1&.8b, v®2&.8b - mov v®1&.8b, v16.8b - mov v16.d[0], DUMMY -.endm - -.macro vzip8 reg1, reg2 - umov DUMMY, v16.d[0] - zip1 v16.8b, v®1&.8b, v®2&.8b - zip2 v®2&.8b, v®1&.8b, v®2&.8b - mov v®1&.8b, v16.8b - mov v16.d[0], DUMMY -.endm - -/* deinterleave B, G, R, A channels for eight 32bpp pixels in 4 registers */ -.macro pixdeinterleave bpp, basereg -.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0) - vuzp8 %(basereg+0), %(basereg+1) - vuzp8 %(basereg+2), %(basereg+3) - vuzp8 %(basereg+1), %(basereg+3) - vuzp8 %(basereg+0), %(basereg+2) -.endif -.endm - -/* interleave B, G, R, A channels for eight 32bpp pixels in 4 registers */ -.macro pixinterleave bpp, basereg -.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0) - vzip8 %(basereg+0), %(basereg+2) - vzip8 %(basereg+1), %(basereg+3) - vzip8 %(basereg+2), %(basereg+3) - vzip8 %(basereg+0), %(basereg+1) -.endif -.endm - -/* - * This is a macro for implementing cache preload. The main idea is that - * cache preload logic is mostly independent from the rest of pixels - * processing code. It starts at the top left pixel and moves forward - * across pixels and can jump across scanlines. Prefetch distance is - * handled in an 'incremental' way: it starts from 0 and advances to the - * optimal distance over time. After reaching optimal prefetch distance, - * it is kept constant. There are some checks which prevent prefetching - * unneeded pixel lines below the image (but it still can prefetch a bit - * more data on the right side of the image - not a big issue and may - * be actually helpful when rendering text glyphs). Additional trick is - * the use of LDR instruction for prefetch instead of PLD when moving to - * the next line, the point is that we have a high chance of getting TLB - * miss in this case, and PLD would be useless. - * - * This sounds like it may introduce a noticeable overhead (when working with - * fully cached data). But in reality, due to having a separate pipeline and - * instruction queue for NEON unit in ARM Cortex-A8, normal ARM code can - * execute simultaneously with NEON and be completely shadowed by it. Thus - * we get no performance overhead at all (*). This looks like a very nice - * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher, - * but still can implement some rather advanced prefetch logic in software - * for almost zero cost! - * - * (*) The overhead of the prefetcher is visible when running some trivial - * pixels processing like simple copy. Anyway, having prefetch is a must - * when working with the graphics data. - */ -.macro PF a, x:vararg -.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_ADVANCED) - a x -.endif -.endm - -.macro cache_preload std_increment, boost_increment -.if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0) -.if std_increment != 0 - PF add PF_X, PF_X, #std_increment -.endif - PF tst PF_CTL, #0xF - PF beq 71f - PF add PF_X, PF_X, #boost_increment - PF sub PF_CTL, PF_CTL, #1 -71: - PF cmp PF_X, ORIG_W -.if src_bpp_shift >= 0 - PF lsl DUMMY, PF_X, #src_bpp_shift - PF prfm PREFETCH_MODE, [PF_SRC, DUMMY] -.endif -.if dst_r_bpp != 0 - PF lsl DUMMY, PF_X, #dst_bpp_shift - PF prfm PREFETCH_MODE, [PF_DST, DUMMY] -.endif -.if mask_bpp_shift >= 0 - PF lsl DUMMY, PF_X, #mask_bpp_shift - PF prfm PREFETCH_MODE, [PF_MASK, DUMMY] -.endif - PF ble 71f - PF sub PF_X, PF_X, ORIG_W - PF subs PF_CTL, PF_CTL, #0x10 -71: - PF ble 72f -.if src_bpp_shift >= 0 - PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb DUMMY, [PF_SRC, DUMMY] - PF add PF_SRC, PF_SRC, #1 -.endif -.if dst_r_bpp != 0 - PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb DUMMY, [PF_DST, DUMMY] - PF add PF_DST, PF_DST, #1 -.endif -.if mask_bpp_shift >= 0 - PF lsl DUMMY, MASK_STRIDE, #mask_bpp_shift - PF ldrsb DUMMY, [PF_MASK, DUMMY] - PF add PF_MASK, PF_MASK, #1 -.endif -72: -.endif -.endm - -.macro cache_preload_simple -.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_SIMPLE) -.if src_bpp > 0 - prfm PREFETCH_MODE, [SRC, #(PREFETCH_DISTANCE_SIMPLE * src_bpp / 8)] -.endif -.if dst_r_bpp > 0 - prfm PREFETCH_MODE, [DST_R, #(PREFETCH_DISTANCE_SIMPLE * dst_r_bpp / 8)] -.endif -.if mask_bpp > 0 - prfm PREFETCH_MODE, [MASK, #(PREFETCH_DISTANCE_SIMPLE * mask_bpp / 8)] -.endif -.endif -.endm - -.macro fetch_mask_pixblock - pixld pixblock_size, mask_bpp, \ - (mask_basereg - pixblock_size * mask_bpp / 64), MASK -.endm - -/* - * Macro which is used to process leading pixels until destination - * pointer is properly aligned (at 16 bytes boundary). When destination - * buffer uses 16bpp format, this is unnecessary, or even pointless. - */ -.macro ensure_destination_ptr_alignment process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head -.if dst_w_bpp != 24 - tst DST_R, #0xF - beq 52f - -.if src_bpp > 0 || mask_bpp > 0 || dst_r_bpp > 0 -.irp lowbit, 1, 2, 4, 8, 16 -local skip1 -.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp)) -.if lowbit < 16 /* we don't need more than 16-byte alignment */ - tst DST_R, #lowbit - beq 51f -.endif - pixld_src (lowbit * 8 / dst_w_bpp), src_bpp, src_basereg, SRC - pixld (lowbit * 8 / dst_w_bpp), mask_bpp, mask_basereg, MASK -.if dst_r_bpp > 0 - pixld_a (lowbit * 8 / dst_r_bpp), dst_r_bpp, dst_r_basereg, DST_R -.else - add DST_R, DST_R, #lowbit -.endif - PF add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp) - sub W, W, #(lowbit * 8 / dst_w_bpp) -51: -.endif -.endr -.endif - pixdeinterleave src_bpp, src_basereg - pixdeinterleave mask_bpp, mask_basereg - pixdeinterleave dst_r_bpp, dst_r_basereg - - process_pixblock_head - cache_preload 0, pixblock_size - cache_preload_simple - process_pixblock_tail - - pixinterleave dst_w_bpp, dst_w_basereg - -.irp lowbit, 1, 2, 4, 8, 16 -.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp)) -.if lowbit < 16 /* we don't need more than 16-byte alignment */ - tst DST_W, #lowbit - beq 51f -.endif -.if src_bpp == 0 && mask_bpp == 0 && dst_r_bpp == 0 - sub W, W, #(lowbit * 8 / dst_w_bpp) -.endif - pixst_a (lowbit * 8 / dst_w_bpp), dst_w_bpp, dst_w_basereg, DST_W -51: -.endif -.endr -.endif -52: -.endm - -/* - * Special code for processing up to (pixblock_size - 1) remaining - * trailing pixels. As SIMD processing performs operation on - * pixblock_size pixels, anything smaller than this has to be loaded - * and stored in a special way. Loading and storing of pixel data is - * performed in such a way that we fill some 'slots' in the NEON - * registers (some slots naturally are unused), then perform compositing - * operation as usual. In the end, the data is taken from these 'slots' - * and saved to memory. - * - * cache_preload_flag - allows to suppress prefetch if - * set to 0 - * dst_aligned_flag - selects whether destination buffer - * is aligned - */ -.macro process_trailing_pixels cache_preload_flag, \ - dst_aligned_flag, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - tst W, #(pixblock_size - 1) - beq 52f -.if src_bpp > 0 || mask_bpp > 0 || dst_r_bpp > 0 -.irp chunk_size, 16, 8, 4, 2, 1 -.if pixblock_size > chunk_size - tst W, #chunk_size - beq 51f - pixld_src chunk_size, src_bpp, src_basereg, SRC - pixld chunk_size, mask_bpp, mask_basereg, MASK -.if dst_aligned_flag != 0 - pixld_a chunk_size, dst_r_bpp, dst_r_basereg, DST_R -.else - pixld chunk_size, dst_r_bpp, dst_r_basereg, DST_R -.endif -.if cache_preload_flag != 0 - PF add PF_X, PF_X, #chunk_size -.endif -51: -.endif -.endr -.endif - pixdeinterleave src_bpp, src_basereg - pixdeinterleave mask_bpp, mask_basereg - pixdeinterleave dst_r_bpp, dst_r_basereg - - process_pixblock_head -.if cache_preload_flag != 0 - cache_preload 0, pixblock_size - cache_preload_simple -.endif - process_pixblock_tail - pixinterleave dst_w_bpp, dst_w_basereg -.irp chunk_size, 16, 8, 4, 2, 1 -.if pixblock_size > chunk_size - tst W, #chunk_size - beq 51f -.if dst_aligned_flag != 0 - pixst_a chunk_size, dst_w_bpp, dst_w_basereg, DST_W -.else - pixst chunk_size, dst_w_bpp, dst_w_basereg, DST_W -.endif -51: -.endif -.endr -52: -.endm - -/* - * Macro, which performs all the needed operations to switch to the next - * scanline and start the next loop iteration unless all the scanlines - * are already processed. - */ -.macro advance_to_next_scanline start_of_loop_label - mov W, ORIG_W - add DST_W, DST_W, DST_STRIDE, lsl #dst_bpp_shift -.if src_bpp != 0 - add SRC, SRC, SRC_STRIDE, lsl #src_bpp_shift -.endif -.if mask_bpp != 0 - add MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift -.endif -.if (dst_w_bpp != 24) - sub DST_W, DST_W, W, lsl #dst_bpp_shift -.endif -.if (src_bpp != 24) && (src_bpp != 0) - sub SRC, SRC, W, lsl #src_bpp_shift -.endif -.if (mask_bpp != 24) && (mask_bpp != 0) - sub MASK, MASK, W, lsl #mask_bpp_shift -.endif - subs H, H, #1 - mov DST_R, DST_W - bge start_of_loop_label -.endm - -/* - * Registers are allocated in the following way by default: - * v0, v1, v2, v3 - reserved for loading source pixel data - * v4, v5, v6, v7 - reserved for loading destination pixel data - * v24, v25, v26, v27 - reserved for loading mask pixel data - * v28, v29, v30, v31 - final destination pixel data for writeback to memory - */ -.macro generate_composite_function fname, \ - src_bpp_, \ - mask_bpp_, \ - dst_w_bpp_, \ - flags, \ - pixblock_size_, \ - prefetch_distance, \ - init, \ - cleanup, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head, \ - dst_w_basereg_ = 28, \ - dst_r_basereg_ = 4, \ - src_basereg_ = 0, \ - mask_basereg_ = 24 - - pixman_asm_function fname - stp x29, x30, [sp, -16]! - mov x29, sp - sub sp, sp, 232 /* push all registers */ - sub x29, x29, 64 - st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], #32 - st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], #32 - stp x8, x9, [x29, -80] - stp x10, x11, [x29, -96] - stp x12, x13, [x29, -112] - stp x14, x15, [x29, -128] - stp x16, x17, [x29, -144] - stp x18, x19, [x29, -160] - stp x20, x21, [x29, -176] - stp x22, x23, [x29, -192] - stp x24, x25, [x29, -208] - stp x26, x27, [x29, -224] - str x28, [x29, -232] - -/* - * Select prefetch type for this function. If prefetch distance is - * set to 0 or one of the color formats is 24bpp, SIMPLE prefetch - * has to be used instead of ADVANCED. - */ - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_DEFAULT -.if prefetch_distance == 0 - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE -.elseif (PREFETCH_TYPE_CURRENT > PREFETCH_TYPE_SIMPLE) && \ - ((src_bpp_ == 24) || (mask_bpp_ == 24) || (dst_w_bpp_ == 24)) - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_SIMPLE -.endif - -/* - * Make some macro arguments globally visible and accessible - * from other macros - */ - .set src_bpp, src_bpp_ - .set mask_bpp, mask_bpp_ - .set dst_w_bpp, dst_w_bpp_ - .set pixblock_size, pixblock_size_ - .set dst_w_basereg, dst_w_basereg_ - .set dst_r_basereg, dst_r_basereg_ - .set src_basereg, src_basereg_ - .set mask_basereg, mask_basereg_ - - .macro pixld_src x:vararg - pixld x - .endm - .macro fetch_src_pixblock - pixld_src pixblock_size, src_bpp, \ - (src_basereg - pixblock_size * src_bpp / 64), SRC - .endm -/* - * Assign symbolic names to registers - */ - W .req x0 /* width (is updated during processing) */ - H .req x1 /* height (is updated during processing) */ - DST_W .req x2 /* destination buffer pointer for writes */ - DST_STRIDE .req x3 /* destination image stride */ - SRC .req x4 /* source buffer pointer */ - SRC_STRIDE .req x5 /* source image stride */ - MASK .req x6 /* mask pointer */ - MASK_STRIDE .req x7 /* mask stride */ - - DST_R .req x8 /* destination buffer pointer for reads */ - - PF_CTL .req x9 /* combined lines counter and prefetch */ - /* distance increment counter */ - PF_X .req x10 /* pixel index in a scanline for current */ - /* pretetch position */ - PF_SRC .req x11 /* pointer to source scanline start */ - /* for prefetch purposes */ - PF_DST .req x12 /* pointer to destination scanline start */ - /* for prefetch purposes */ - PF_MASK .req x13 /* pointer to mask scanline start */ - /* for prefetch purposes */ - - ORIG_W .req x14 /* saved original width */ - DUMMY .req x15 /* temporary register */ - - sxtw x0, w0 - sxtw x1, w1 - sxtw x3, w3 - sxtw x5, w5 - sxtw x7, w7 - - .set mask_bpp_shift, -1 -.if src_bpp == 32 - .set src_bpp_shift, 2 -.elseif src_bpp == 24 - .set src_bpp_shift, 0 -.elseif src_bpp == 16 - .set src_bpp_shift, 1 -.elseif src_bpp == 8 - .set src_bpp_shift, 0 -.elseif src_bpp == 0 - .set src_bpp_shift, -1 -.else - .error "requested src bpp (src_bpp) is not supported" -.endif -.if mask_bpp == 32 - .set mask_bpp_shift, 2 -.elseif mask_bpp == 24 - .set mask_bpp_shift, 0 -.elseif mask_bpp == 8 - .set mask_bpp_shift, 0 -.elseif mask_bpp == 0 - .set mask_bpp_shift, -1 -.else - .error "requested mask bpp (mask_bpp) is not supported" -.endif -.if dst_w_bpp == 32 - .set dst_bpp_shift, 2 -.elseif dst_w_bpp == 24 - .set dst_bpp_shift, 0 -.elseif dst_w_bpp == 16 - .set dst_bpp_shift, 1 -.elseif dst_w_bpp == 8 - .set dst_bpp_shift, 0 -.else - .error "requested dst bpp (dst_w_bpp) is not supported" -.endif - -.if (((flags) & FLAG_DST_READWRITE) != 0) - .set dst_r_bpp, dst_w_bpp -.else - .set dst_r_bpp, 0 -.endif -.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0) - .set DEINTERLEAVE_32BPP_ENABLED, 1 -.else - .set DEINTERLEAVE_32BPP_ENABLED, 0 -.endif - -.if prefetch_distance < 0 || prefetch_distance > 15 - .error "invalid prefetch distance (prefetch_distance)" -.endif - - PF mov PF_X, #0 - mov DST_R, DST_W - -.if src_bpp == 24 - sub SRC_STRIDE, SRC_STRIDE, W - sub SRC_STRIDE, SRC_STRIDE, W, lsl #1 -.endif -.if mask_bpp == 24 - sub MASK_STRIDE, MASK_STRIDE, W - sub MASK_STRIDE, MASK_STRIDE, W, lsl #1 -.endif -.if dst_w_bpp == 24 - sub DST_STRIDE, DST_STRIDE, W - sub DST_STRIDE, DST_STRIDE, W, lsl #1 -.endif - -/* - * Setup advanced prefetcher initial state - */ - PF mov PF_SRC, SRC - PF mov PF_DST, DST_R - PF mov PF_MASK, MASK - /* PF_CTL = prefetch_distance | ((h - 1) << 4) */ - PF lsl DUMMY, H, #4 - PF mov PF_CTL, DUMMY - PF add PF_CTL, PF_CTL, #(prefetch_distance - 0x10) - - init - subs H, H, #1 - mov ORIG_W, W - blt 9f - cmp W, #(pixblock_size * 2) - blt 800f -/* - * This is the start of the pipelined loop, which if optimized for - * long scanlines - */ -0: - ensure_destination_ptr_alignment process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - - /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */ - pixld_a pixblock_size, dst_r_bpp, \ - (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R - fetch_src_pixblock - pixld pixblock_size, mask_bpp, \ - (mask_basereg - pixblock_size * mask_bpp / 64), MASK - PF add PF_X, PF_X, #pixblock_size - process_pixblock_head - cache_preload 0, pixblock_size - cache_preload_simple - subs W, W, #(pixblock_size * 2) - blt 200f - -100: - process_pixblock_tail_head - cache_preload_simple - subs W, W, #pixblock_size - bge 100b - -200: - process_pixblock_tail - pixst_a pixblock_size, dst_w_bpp, \ - (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W - - /* Process the remaining trailing pixels in the scanline */ - process_trailing_pixels 1, 1, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - advance_to_next_scanline 0b - - cleanup -1000: - /* pop all registers */ - sub x29, x29, 64 - ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32 - ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32 - ldp x8, x9, [x29, -80] - ldp x10, x11, [x29, -96] - ldp x12, x13, [x29, -112] - ldp x14, x15, [x29, -128] - ldp x16, x17, [x29, -144] - ldp x18, x19, [x29, -160] - ldp x20, x21, [x29, -176] - ldp x22, x23, [x29, -192] - ldp x24, x25, [x29, -208] - ldp x26, x27, [x29, -224] - ldr x28, [x29, -232] - mov sp, x29 - ldp x29, x30, [sp], 16 - ret /* exit */ -/* - * This is the start of the loop, designed to process images with small width - * (less than pixblock_size * 2 pixels). In this case neither pipelining - * nor prefetch are used. - */ -800: -.if src_bpp_shift >= 0 - PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift - PF prfm PREFETCH_MODE, [SRC, DUMMY] -.endif -.if dst_r_bpp != 0 - PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift - PF prfm PREFETCH_MODE, [DST_R, DUMMY] -.endif -.if mask_bpp_shift >= 0 - PF lsl DUMMY, MASK_STRIDE, #mask_bpp_shift - PF prfm PREFETCH_MODE, [MASK, DUMMY] -.endif - /* Process exactly pixblock_size pixels if needed */ - tst W, #pixblock_size - beq 100f - pixld pixblock_size, dst_r_bpp, \ - (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R - fetch_src_pixblock - pixld pixblock_size, mask_bpp, \ - (mask_basereg - pixblock_size * mask_bpp / 64), MASK - process_pixblock_head - process_pixblock_tail - pixst pixblock_size, dst_w_bpp, \ - (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W -100: - /* Process the remaining trailing pixels in the scanline */ - process_trailing_pixels 0, 0, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - advance_to_next_scanline 800b -9: - cleanup - /* pop all registers */ - sub x29, x29, 64 - ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32 - ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32 - ldp x8, x9, [x29, -80] - ldp x10, x11, [x29, -96] - ldp x12, x13, [x29, -112] - ldp x14, x15, [x29, -128] - ldp x16, x17, [x29, -144] - ldp x18, x19, [x29, -160] - ldp x20, x21, [x29, -176] - ldp x22, x23, [x29, -192] - ldp x24, x25, [x29, -208] - ldp x26, x27, [x29, -224] - ldr x28, [x29, -232] - mov sp, x29 - ldp x29, x30, [sp], 16 - ret /* exit */ - - .purgem fetch_src_pixblock - .purgem pixld_src - - .unreq SRC - .unreq MASK - .unreq DST_R - .unreq DST_W - .unreq ORIG_W - .unreq W - .unreq H - .unreq SRC_STRIDE - .unreq DST_STRIDE - .unreq MASK_STRIDE - .unreq PF_CTL - .unreq PF_X - .unreq PF_SRC - .unreq PF_DST - .unreq PF_MASK - .unreq DUMMY - .endfunc -.endm - -/* - * A simplified variant of function generation template for a single - * scanline processing (for implementing pixman combine functions) - */ -.macro generate_composite_function_scanline use_nearest_scaling, \ - fname, \ - src_bpp_, \ - mask_bpp_, \ - dst_w_bpp_, \ - flags, \ - pixblock_size_, \ - init, \ - cleanup, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head, \ - dst_w_basereg_ = 28, \ - dst_r_basereg_ = 4, \ - src_basereg_ = 0, \ - mask_basereg_ = 24 - - pixman_asm_function fname - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE - -/* - * Make some macro arguments globally visible and accessible - * from other macros - */ - .set src_bpp, src_bpp_ - .set mask_bpp, mask_bpp_ - .set dst_w_bpp, dst_w_bpp_ - .set pixblock_size, pixblock_size_ - .set dst_w_basereg, dst_w_basereg_ - .set dst_r_basereg, dst_r_basereg_ - .set src_basereg, src_basereg_ - .set mask_basereg, mask_basereg_ - -.if use_nearest_scaling != 0 - /* - * Assign symbolic names to registers for nearest scaling - */ - W .req x0 - DST_W .req x1 - SRC .req x2 - VX .req x3 - UNIT_X .req x4 - SRC_WIDTH_FIXED .req x5 - MASK .req x6 - TMP1 .req x8 - TMP2 .req x9 - DST_R .req x10 - DUMMY .req x30 - - .macro pixld_src x:vararg - pixld_s x - .endm - - sxtw x0, w0 - sxtw x3, w3 - sxtw x4, w4 - sxtw x5, w5 - - stp x29, x30, [sp, -16]! - mov x29, sp - sub sp, sp, 88 - sub x29, x29, 64 - st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32 - st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32 - stp x8, x9, [x29, -80] - str x10, [x29, -88] -.else - /* - * Assign symbolic names to registers - */ - W .req x0 /* width (is updated during processing) */ - DST_W .req x1 /* destination buffer pointer for writes */ - SRC .req x2 /* source buffer pointer */ - MASK .req x3 /* mask pointer */ - DST_R .req x4 /* destination buffer pointer for reads */ - DUMMY .req x30 - - .macro pixld_src x:vararg - pixld x - .endm - - sxtw x0, w0 - - stp x29, x30, [sp, -16]! - mov x29, sp - sub sp, sp, 64 - sub x29, x29, 64 - st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32 - st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32 -.endif - -.if (((flags) & FLAG_DST_READWRITE) != 0) - .set dst_r_bpp, dst_w_bpp -.else - .set dst_r_bpp, 0 -.endif -.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0) - .set DEINTERLEAVE_32BPP_ENABLED, 1 -.else - .set DEINTERLEAVE_32BPP_ENABLED, 0 -.endif - - .macro fetch_src_pixblock - pixld_src pixblock_size, src_bpp, \ - (src_basereg - pixblock_size * src_bpp / 64), SRC - .endm - - init - mov DST_R, DST_W - - cmp W, #pixblock_size - blt 800f - - ensure_destination_ptr_alignment process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - - subs W, W, #pixblock_size - blt 700f - - /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */ - pixld_a pixblock_size, dst_r_bpp, \ - (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R - fetch_src_pixblock - pixld pixblock_size, mask_bpp, \ - (mask_basereg - pixblock_size * mask_bpp / 64), MASK - process_pixblock_head - subs W, W, #pixblock_size - blt 200f -100: - process_pixblock_tail_head - subs W, W, #pixblock_size - bge 100b -200: - process_pixblock_tail - pixst_a pixblock_size, dst_w_bpp, \ - (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W -700: - /* Process the remaining trailing pixels in the scanline (dst aligned) */ - process_trailing_pixels 0, 1, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - - cleanup -.if use_nearest_scaling != 0 - sub x29, x29, 64 - ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32 - ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32 - ldp x8, x9, [x29, -80] - ldr x10, [x29, -96] - mov sp, x29 - ldp x29, x30, [sp], 16 - ret /* exit */ -.else - sub x29, x29, 64 - ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32 - ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32 - mov sp, x29 - ldp x29, x30, [sp], 16 - ret /* exit */ -.endif -800: - /* Process the remaining trailing pixels in the scanline (dst unaligned) */ - process_trailing_pixels 0, 0, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - - cleanup -.if use_nearest_scaling != 0 - sub x29, x29, 64 - ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32 - ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32 - ldp x8, x9, [x29, -80] - ldr x10, [x29, -88] - mov sp, x29 - ldp x29, x30, [sp], 16 - ret /* exit */ - - .unreq DUMMY - .unreq DST_R - .unreq SRC - .unreq W - .unreq VX - .unreq UNIT_X - .unreq TMP1 - .unreq TMP2 - .unreq DST_W - .unreq MASK - .unreq SRC_WIDTH_FIXED - -.else - sub x29, x29, 64 - ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32 - ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32 - mov sp, x29 - ldp x29, x30, [sp], 16 - ret /* exit */ - - .unreq DUMMY - .unreq SRC - .unreq MASK - .unreq DST_R - .unreq DST_W - .unreq W -.endif - - .purgem fetch_src_pixblock - .purgem pixld_src - - .endfunc -.endm - -.macro generate_composite_function_single_scanline x:vararg - generate_composite_function_scanline 0, x -.endm - -.macro generate_composite_function_nearest_scanline x:vararg - generate_composite_function_scanline 1, x -.endm - -/* Default prologue/epilogue, nothing special needs to be done */ - -.macro default_init -.endm - -.macro default_cleanup -.endm - -/* - * Prologue/epilogue variant which additionally saves/restores v8-v15 - * registers (they need to be saved/restored by callee according to ABI). - * This is required if the code needs to use all the NEON registers. - */ - -.macro default_init_need_all_regs -.endm - -.macro default_cleanup_need_all_regs -.endm - -/******************************************************************************/ - -/* - * Conversion of 8 r5g6b6 pixels packed in 128-bit register (in) - * into a planar a8r8g8b8 format (with a, r, g, b color components - * stored into 64-bit registers out_a, out_r, out_g, out_b respectively). - * - * Warning: the conversion is destructive and the original - * value (in) is lost. - */ -.macro convert_0565_to_8888 in, out_a, out_r, out_g, out_b - shrn &out_r&.8b, &in&.8h, #8 - shrn &out_g&.8b, &in&.8h, #3 - sli &in&.8h, &in&.8h, #5 - movi &out_a&.8b, #255 - sri &out_r&.8b, &out_r&.8b, #5 - sri &out_g&.8b, &out_g&.8b, #6 - shrn &out_b&.8b, &in&.8h, #2 -.endm - -.macro convert_0565_to_x888 in, out_r, out_g, out_b - shrn &out_r&.8b, &in&.8h, #8 - shrn &out_g&.8b, &in&.8h, #3 - sli &in&.8h, &in&.8h, #5 - sri &out_r&.8b, &out_r&.8b, #5 - sri &out_g&.8b, &out_g&.8b, #6 - shrn &out_b&.8b, &in&.8h, #2 -.endm - -/* - * Conversion from planar a8r8g8b8 format (with a, r, g, b color components - * in 64-bit registers in_a, in_r, in_g, in_b respectively) into 8 r5g6b6 - * pixels packed in 128-bit register (out). Requires two temporary 128-bit - * registers (tmp1, tmp2) - */ -.macro convert_8888_to_0565 in_r, in_g, in_b, out, tmp1, tmp2 - ushll &tmp1&.8h, &in_g&.8b, #7 - shl &tmp1&.8h, &tmp1&.8h, #1 - ushll &out&.8h, &in_r&.8b, #7 - shl &out&.8h, &out&.8h, #1 - ushll &tmp2&.8h, &in_b&.8b, #7 - shl &tmp2&.8h, &tmp2&.8h, #1 - sri &out&.8h, &tmp1&.8h, #5 - sri &out&.8h, &tmp2&.8h, #11 -.endm - -/* - * Conversion of four r5g6b5 pixels (in) to four x8r8g8b8 pixels - * returned in (out0, out1) registers pair. Requires one temporary - * 64-bit register (tmp). 'out1' and 'in' may overlap, the original - * value from 'in' is lost - */ -.macro convert_four_0565_to_x888_packed in, out0, out1, tmp - shl &out0&.4h, &in&.4h, #5 /* G top 6 bits */ - shl &tmp&.4h, &in&.4h, #11 /* B top 5 bits */ - sri &in&.4h, &in&.4h, #5 /* R is ready in top bits */ - sri &out0&.4h, &out0&.4h, #6 /* G is ready in top bits */ - sri &tmp&.4h, &tmp&.4h, #5 /* B is ready in top bits */ - ushr &out1&.4h, &in&.4h, #8 /* R is in place */ - sri &out0&.4h, &tmp&.4h, #8 /* G & B is in place */ - zip1 &tmp&.4h, &out0&.4h, &out1&.4h /* everything is in place */ - zip2 &out1&.4h, &out0&.4h, &out1&.4h - mov &out0&.d[0], &tmp&.d[0] -.endm diff --git a/vendor/pixman/pixman/pixman-bits-image.c b/vendor/pixman/pixman/pixman-bits-image.c deleted file mode 100644 index 1698d7309..000000000 --- a/vendor/pixman/pixman/pixman-bits-image.c +++ /dev/null @@ -1,1383 +0,0 @@ -/* - * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. - * 2005 Lars Knoll & Zack Rusin, Trolltech - * 2008 Aaron Plattner, NVIDIA Corporation - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007, 2009 Red Hat, Inc. - * Copyright © 2008 André Tupinambá - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif -#include -#include -#include -#include "pixman-private.h" -#include "pixman-combine32.h" -#include "pixman-inlines.h" -#include "dither/blue-noise-64x64.h" - -/* Fetch functions */ - -static force_inline void -fetch_pixel_no_alpha_32 (bits_image_t *image, - int x, int y, pixman_bool_t check_bounds, - void *out) -{ - uint32_t *ret = out; - - if (check_bounds && - (x < 0 || x >= image->width || y < 0 || y >= image->height)) - *ret = 0; - else - *ret = image->fetch_pixel_32 (image, x, y); -} - -static force_inline void -fetch_pixel_no_alpha_float (bits_image_t *image, - int x, int y, pixman_bool_t check_bounds, - void *out) -{ - argb_t *ret = out; - - if (check_bounds && - (x < 0 || x >= image->width || y < 0 || y >= image->height)) - ret->a = ret->r = ret->g = ret->b = 0.f; - else - *ret = image->fetch_pixel_float (image, x, y); -} - -typedef void (* get_pixel_t) (bits_image_t *image, - int x, int y, pixman_bool_t check_bounds, void *out); - -static force_inline void -bits_image_fetch_pixel_nearest (bits_image_t *image, - pixman_fixed_t x, - pixman_fixed_t y, - get_pixel_t get_pixel, - void *out) -{ - int x0 = pixman_fixed_to_int (x - pixman_fixed_e); - int y0 = pixman_fixed_to_int (y - pixman_fixed_e); - - if (image->common.repeat != PIXMAN_REPEAT_NONE) - { - repeat (image->common.repeat, &x0, image->width); - repeat (image->common.repeat, &y0, image->height); - - get_pixel (image, x0, y0, FALSE, out); - } - else - { - get_pixel (image, x0, y0, TRUE, out); - } -} - -static force_inline void -bits_image_fetch_pixel_bilinear_32 (bits_image_t *image, - pixman_fixed_t x, - pixman_fixed_t y, - get_pixel_t get_pixel, - void *out) -{ - pixman_repeat_t repeat_mode = image->common.repeat; - int width = image->width; - int height = image->height; - int x1, y1, x2, y2; - uint32_t tl, tr, bl, br; - int32_t distx, disty; - uint32_t *ret = out; - - x1 = x - pixman_fixed_1 / 2; - y1 = y - pixman_fixed_1 / 2; - - distx = pixman_fixed_to_bilinear_weight (x1); - disty = pixman_fixed_to_bilinear_weight (y1); - - x1 = pixman_fixed_to_int (x1); - y1 = pixman_fixed_to_int (y1); - x2 = x1 + 1; - y2 = y1 + 1; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - repeat (repeat_mode, &x1, width); - repeat (repeat_mode, &y1, height); - repeat (repeat_mode, &x2, width); - repeat (repeat_mode, &y2, height); - - get_pixel (image, x1, y1, FALSE, &tl); - get_pixel (image, x2, y1, FALSE, &tr); - get_pixel (image, x1, y2, FALSE, &bl); - get_pixel (image, x2, y2, FALSE, &br); - } - else - { - get_pixel (image, x1, y1, TRUE, &tl); - get_pixel (image, x2, y1, TRUE, &tr); - get_pixel (image, x1, y2, TRUE, &bl); - get_pixel (image, x2, y2, TRUE, &br); - } - - *ret = bilinear_interpolation (tl, tr, bl, br, distx, disty); -} - -static force_inline void -bits_image_fetch_pixel_bilinear_float (bits_image_t *image, - pixman_fixed_t x, - pixman_fixed_t y, - get_pixel_t get_pixel, - void *out) -{ - pixman_repeat_t repeat_mode = image->common.repeat; - int width = image->width; - int height = image->height; - int x1, y1, x2, y2; - argb_t tl, tr, bl, br; - float distx, disty; - argb_t *ret = out; - - x1 = x - pixman_fixed_1 / 2; - y1 = y - pixman_fixed_1 / 2; - - distx = ((float)pixman_fixed_fraction(x1)) / 65536.f; - disty = ((float)pixman_fixed_fraction(y1)) / 65536.f; - - x1 = pixman_fixed_to_int (x1); - y1 = pixman_fixed_to_int (y1); - x2 = x1 + 1; - y2 = y1 + 1; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - repeat (repeat_mode, &x1, width); - repeat (repeat_mode, &y1, height); - repeat (repeat_mode, &x2, width); - repeat (repeat_mode, &y2, height); - - get_pixel (image, x1, y1, FALSE, &tl); - get_pixel (image, x2, y1, FALSE, &tr); - get_pixel (image, x1, y2, FALSE, &bl); - get_pixel (image, x2, y2, FALSE, &br); - } - else - { - get_pixel (image, x1, y1, TRUE, &tl); - get_pixel (image, x2, y1, TRUE, &tr); - get_pixel (image, x1, y2, TRUE, &bl); - get_pixel (image, x2, y2, TRUE, &br); - } - - *ret = bilinear_interpolation_float (tl, tr, bl, br, distx, disty); -} - -static force_inline void accum_32(unsigned int *satot, unsigned int *srtot, - unsigned int *sgtot, unsigned int *sbtot, - const void *p, pixman_fixed_t f) -{ - uint32_t pixel = *(uint32_t *)p; - - *srtot += (int)RED_8 (pixel) * f; - *sgtot += (int)GREEN_8 (pixel) * f; - *sbtot += (int)BLUE_8 (pixel) * f; - *satot += (int)ALPHA_8 (pixel) * f; -} - -static force_inline void reduce_32(unsigned int satot, unsigned int srtot, - unsigned int sgtot, unsigned int sbtot, - void *p) -{ - uint32_t *ret = p; - - satot = (int32_t)(satot + 0x8000) / 65536; - srtot = (int32_t)(srtot + 0x8000) / 65536; - sgtot = (int32_t)(sgtot + 0x8000) / 65536; - sbtot = (int32_t)(sbtot + 0x8000) / 65536; - - satot = CLIP ((int32_t)satot, 0, 0xff); - srtot = CLIP ((int32_t)srtot, 0, 0xff); - sgtot = CLIP ((int32_t)sgtot, 0, 0xff); - sbtot = CLIP ((int32_t)sbtot, 0, 0xff); - - *ret = ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot)); -} - -static force_inline void accum_float(unsigned int *satot, unsigned int *srtot, - unsigned int *sgtot, unsigned int *sbtot, - const void *p, pixman_fixed_t f) -{ - const argb_t *pixel = p; - - *satot += pixel->a * f; - *srtot += pixel->r * f; - *sgtot += pixel->g * f; - *sbtot += pixel->b * f; -} - -static force_inline void reduce_float(unsigned int satot, unsigned int srtot, - unsigned int sgtot, unsigned int sbtot, - void *p) -{ - argb_t *ret = p; - - ret->a = CLIP ((int32_t)satot / 65536.f, 0.f, 1.f); - ret->r = CLIP ((int32_t)srtot / 65536.f, 0.f, 1.f); - ret->g = CLIP ((int32_t)sgtot / 65536.f, 0.f, 1.f); - ret->b = CLIP ((int32_t)sbtot / 65536.f, 0.f, 1.f); -} - -typedef void (* accumulate_pixel_t) (unsigned int *satot, unsigned int *srtot, - unsigned int *sgtot, unsigned int *sbtot, - const void *pixel, pixman_fixed_t f); - -typedef void (* reduce_pixel_t) (unsigned int satot, unsigned int srtot, - unsigned int sgtot, unsigned int sbtot, - void *out); - -static force_inline void -bits_image_fetch_pixel_convolution (bits_image_t *image, - pixman_fixed_t x, - pixman_fixed_t y, - get_pixel_t get_pixel, - void *out, - accumulate_pixel_t accum, - reduce_pixel_t reduce) -{ - pixman_fixed_t *params = image->common.filter_params; - int x_off = (params[0] - pixman_fixed_1) >> 1; - int y_off = (params[1] - pixman_fixed_1) >> 1; - int32_t cwidth = pixman_fixed_to_int (params[0]); - int32_t cheight = pixman_fixed_to_int (params[1]); - int32_t i, j, x1, x2, y1, y2; - pixman_repeat_t repeat_mode = image->common.repeat; - int width = image->width; - int height = image->height; - unsigned int srtot, sgtot, sbtot, satot; - - params += 2; - - x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); - y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); - x2 = x1 + cwidth; - y2 = y1 + cheight; - - srtot = sgtot = sbtot = satot = 0; - - for (i = y1; i < y2; ++i) - { - for (j = x1; j < x2; ++j) - { - int rx = j; - int ry = i; - - pixman_fixed_t f = *params; - - if (f) - { - /* Must be big enough to hold a argb_t */ - argb_t pixel; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - repeat (repeat_mode, &rx, width); - repeat (repeat_mode, &ry, height); - - get_pixel (image, rx, ry, FALSE, &pixel); - } - else - { - get_pixel (image, rx, ry, TRUE, &pixel); - } - - accum (&satot, &srtot, &sgtot, &sbtot, &pixel, f); - } - - params++; - } - } - - reduce (satot, srtot, sgtot, sbtot, out); -} - -static void -bits_image_fetch_pixel_separable_convolution (bits_image_t *image, - pixman_fixed_t x, - pixman_fixed_t y, - get_pixel_t get_pixel, - void *out, - accumulate_pixel_t accum, - reduce_pixel_t reduce) -{ - pixman_fixed_t *params = image->common.filter_params; - pixman_repeat_t repeat_mode = image->common.repeat; - int width = image->width; - int height = image->height; - int cwidth = pixman_fixed_to_int (params[0]); - int cheight = pixman_fixed_to_int (params[1]); - int x_phase_bits = pixman_fixed_to_int (params[2]); - int y_phase_bits = pixman_fixed_to_int (params[3]); - int x_phase_shift = 16 - x_phase_bits; - int y_phase_shift = 16 - y_phase_bits; - int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1; - int y_off = ((cheight << 16) - pixman_fixed_1) >> 1; - pixman_fixed_t *y_params; - unsigned int srtot, sgtot, sbtot, satot; - int32_t x1, x2, y1, y2; - int32_t px, py; - int i, j; - - /* Round x and y to the middle of the closest phase before continuing. This - * ensures that the convolution matrix is aligned right, since it was - * positioned relative to a particular phase (and not relative to whatever - * exact fraction we happen to get here). - */ - x = ((x >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1); - y = ((y >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1); - - px = (x & 0xffff) >> x_phase_shift; - py = (y & 0xffff) >> y_phase_shift; - - y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight; - - x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); - y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); - x2 = x1 + cwidth; - y2 = y1 + cheight; - - srtot = sgtot = sbtot = satot = 0; - - for (i = y1; i < y2; ++i) - { - pixman_fixed_48_16_t fy = *y_params++; - pixman_fixed_t *x_params = params + 4 + px * cwidth; - - if (fy) - { - for (j = x1; j < x2; ++j) - { - pixman_fixed_t fx = *x_params++; - int rx = j; - int ry = i; - - if (fx) - { - /* Must be big enough to hold a argb_t */ - argb_t pixel; - pixman_fixed_t f; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - repeat (repeat_mode, &rx, width); - repeat (repeat_mode, &ry, height); - - get_pixel (image, rx, ry, FALSE, &pixel); - } - else - { - get_pixel (image, rx, ry, TRUE, &pixel); - } - - f = (fy * fx + 0x8000) >> 16; - - accum(&satot, &srtot, &sgtot, &sbtot, &pixel, f); - } - } - } - } - - - reduce(satot, srtot, sgtot, sbtot, out); -} - -static force_inline void -bits_image_fetch_pixel_filtered (bits_image_t *image, - pixman_bool_t wide, - pixman_fixed_t x, - pixman_fixed_t y, - get_pixel_t get_pixel, - void *out) -{ - switch (image->common.filter) - { - case PIXMAN_FILTER_NEAREST: - case PIXMAN_FILTER_FAST: - bits_image_fetch_pixel_nearest (image, x, y, get_pixel, out); - break; - - case PIXMAN_FILTER_BILINEAR: - case PIXMAN_FILTER_GOOD: - case PIXMAN_FILTER_BEST: - if (wide) - bits_image_fetch_pixel_bilinear_float (image, x, y, get_pixel, out); - else - bits_image_fetch_pixel_bilinear_32 (image, x, y, get_pixel, out); - break; - - case PIXMAN_FILTER_CONVOLUTION: - if (wide) - { - bits_image_fetch_pixel_convolution (image, x, y, - get_pixel, out, - accum_float, - reduce_float); - } - else - { - bits_image_fetch_pixel_convolution (image, x, y, - get_pixel, out, - accum_32, reduce_32); - } - break; - - case PIXMAN_FILTER_SEPARABLE_CONVOLUTION: - if (wide) - { - bits_image_fetch_pixel_separable_convolution (image, x, y, - get_pixel, out, - accum_float, - reduce_float); - } - else - { - bits_image_fetch_pixel_separable_convolution (image, x, y, - get_pixel, out, - accum_32, reduce_32); - } - break; - - default: - assert (0); - break; - } -} - -static uint32_t * -__bits_image_fetch_affine_no_alpha (pixman_iter_t * iter, - pixman_bool_t wide, - const uint32_t * mask) -{ - pixman_image_t *image = iter->image; - int offset = iter->x; - int line = iter->y++; - int width = iter->width; - uint32_t * buffer = iter->buffer; - - const uint32_t wide_zero[4] = {0}; - pixman_fixed_t x, y; - pixman_fixed_t ux, uy; - pixman_vector_t v; - int i; - get_pixel_t get_pixel = - wide ? fetch_pixel_no_alpha_float : fetch_pixel_no_alpha_32; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (image->common.transform) - { - if (!pixman_transform_point_3d (image->common.transform, &v)) - return iter->buffer; - - ux = image->common.transform->matrix[0][0]; - uy = image->common.transform->matrix[1][0]; - } - else - { - ux = pixman_fixed_1; - uy = 0; - } - - x = v.vector[0]; - y = v.vector[1]; - - for (i = 0; i < width; ++i) - { - if (!mask || (!wide && mask[i]) || - (wide && memcmp(&mask[4 * i], wide_zero, 16) != 0)) - { - bits_image_fetch_pixel_filtered ( - &image->bits, wide, x, y, get_pixel, buffer); - } - - x += ux; - y += uy; - buffer += wide ? 4 : 1; - } - - return iter->buffer; -} - -static uint32_t * -bits_image_fetch_affine_no_alpha_32 (pixman_iter_t *iter, - const uint32_t *mask) -{ - return __bits_image_fetch_affine_no_alpha(iter, FALSE, mask); -} - -static uint32_t * -bits_image_fetch_affine_no_alpha_float (pixman_iter_t *iter, - const uint32_t *mask) -{ - return __bits_image_fetch_affine_no_alpha(iter, TRUE, mask); -} - -/* General fetcher */ -static force_inline void -fetch_pixel_general_32 (bits_image_t *image, - int x, int y, pixman_bool_t check_bounds, - void *out) -{ - uint32_t pixel, *ret = out; - - if (check_bounds && - (x < 0 || x >= image->width || y < 0 || y >= image->height)) - { - *ret = 0; - return; - } - - pixel = image->fetch_pixel_32 (image, x, y); - - if (image->common.alpha_map) - { - uint32_t pixel_a; - - x -= image->common.alpha_origin_x; - y -= image->common.alpha_origin_y; - - if (x < 0 || x >= image->common.alpha_map->width || - y < 0 || y >= image->common.alpha_map->height) - { - pixel_a = 0; - } - else - { - pixel_a = image->common.alpha_map->fetch_pixel_32 ( - image->common.alpha_map, x, y); - - pixel_a = ALPHA_8 (pixel_a); - } - - pixel &= 0x00ffffff; - pixel |= (pixel_a << 24); - } - - *ret = pixel; -} - -static force_inline void -fetch_pixel_general_float (bits_image_t *image, - int x, int y, pixman_bool_t check_bounds, - void *out) -{ - argb_t *ret = out; - - if (check_bounds && - (x < 0 || x >= image->width || y < 0 || y >= image->height)) - { - ret->a = ret->r = ret->g = ret->b = 0; - return; - } - - *ret = image->fetch_pixel_float (image, x, y); - - if (image->common.alpha_map) - { - x -= image->common.alpha_origin_x; - y -= image->common.alpha_origin_y; - - if (x < 0 || x >= image->common.alpha_map->width || - y < 0 || y >= image->common.alpha_map->height) - { - ret->a = 0.f; - } - else - { - argb_t alpha; - - alpha = image->common.alpha_map->fetch_pixel_float ( - image->common.alpha_map, x, y); - - ret->a = alpha.a; - } - } -} - -static uint32_t * -__bits_image_fetch_general (pixman_iter_t *iter, - pixman_bool_t wide, - const uint32_t *mask) -{ - pixman_image_t *image = iter->image; - int offset = iter->x; - int line = iter->y++; - int width = iter->width; - uint32_t * buffer = iter->buffer; - get_pixel_t get_pixel = - wide ? fetch_pixel_general_float : fetch_pixel_general_32; - - const uint32_t wide_zero[4] = {0}; - pixman_fixed_t x, y, w; - pixman_fixed_t ux, uy, uw; - pixman_vector_t v; - int i; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (image->common.transform) - { - if (!pixman_transform_point_3d (image->common.transform, &v)) - return buffer; - - ux = image->common.transform->matrix[0][0]; - uy = image->common.transform->matrix[1][0]; - uw = image->common.transform->matrix[2][0]; - } - else - { - ux = pixman_fixed_1; - uy = 0; - uw = 0; - } - - x = v.vector[0]; - y = v.vector[1]; - w = v.vector[2]; - - for (i = 0; i < width; ++i) - { - pixman_fixed_t x0, y0; - - if (!mask || (!wide && mask[i]) || - (wide && memcmp(&mask[4 * i], wide_zero, 16) != 0)) - { - if (w != 0) - { - x0 = ((uint64_t)x << 16) / w; - y0 = ((uint64_t)y << 16) / w; - } - else - { - x0 = 0; - y0 = 0; - } - - bits_image_fetch_pixel_filtered ( - &image->bits, wide, x0, y0, get_pixel, buffer); - } - - x += ux; - y += uy; - w += uw; - buffer += wide ? 4 : 1; - } - - return iter->buffer; -} - -static uint32_t * -bits_image_fetch_general_32 (pixman_iter_t *iter, - const uint32_t *mask) -{ - return __bits_image_fetch_general(iter, FALSE, mask); -} - -static uint32_t * -bits_image_fetch_general_float (pixman_iter_t *iter, - const uint32_t *mask) -{ - return __bits_image_fetch_general(iter, TRUE, mask); -} - -static void -replicate_pixel_32 (bits_image_t * bits, - int x, - int y, - int width, - uint32_t * buffer) -{ - uint32_t color; - uint32_t *end; - - color = bits->fetch_pixel_32 (bits, x, y); - - end = buffer + width; - while (buffer < end) - *(buffer++) = color; -} - -static void -replicate_pixel_float (bits_image_t * bits, - int x, - int y, - int width, - uint32_t * b) -{ - argb_t color; - argb_t *buffer = (argb_t *)b; - argb_t *end; - - color = bits->fetch_pixel_float (bits, x, y); - - end = buffer + width; - while (buffer < end) - *(buffer++) = color; -} - -static void -bits_image_fetch_untransformed_repeat_none (bits_image_t *image, - pixman_bool_t wide, - int x, - int y, - int width, - uint32_t * buffer) -{ - uint32_t w; - - if (y < 0 || y >= image->height) - { - memset (buffer, 0, width * (wide? sizeof (argb_t) : 4)); - return; - } - - if (x < 0) - { - w = MIN (width, -x); - - memset (buffer, 0, w * (wide ? sizeof (argb_t) : 4)); - - width -= w; - buffer += w * (wide? 4 : 1); - x += w; - } - - if (x < image->width) - { - w = MIN (width, image->width - x); - - if (wide) - image->fetch_scanline_float (image, x, y, w, buffer, NULL); - else - image->fetch_scanline_32 (image, x, y, w, buffer, NULL); - - width -= w; - buffer += w * (wide? 4 : 1); - x += w; - } - - memset (buffer, 0, width * (wide ? sizeof (argb_t) : 4)); -} - -static void -bits_image_fetch_untransformed_repeat_normal (bits_image_t *image, - pixman_bool_t wide, - int x, - int y, - int width, - uint32_t * buffer) -{ - uint32_t w; - - while (y < 0) - y += image->height; - - while (y >= image->height) - y -= image->height; - - if (image->width == 1) - { - if (wide) - replicate_pixel_float (image, 0, y, width, buffer); - else - replicate_pixel_32 (image, 0, y, width, buffer); - - return; - } - - while (width) - { - while (x < 0) - x += image->width; - while (x >= image->width) - x -= image->width; - - w = MIN (width, image->width - x); - - if (wide) - image->fetch_scanline_float (image, x, y, w, buffer, NULL); - else - image->fetch_scanline_32 (image, x, y, w, buffer, NULL); - - buffer += w * (wide? 4 : 1); - x += w; - width -= w; - } -} - -static uint32_t * -bits_image_fetch_untransformed_32 (pixman_iter_t * iter, - const uint32_t *mask) -{ - pixman_image_t *image = iter->image; - int x = iter->x; - int y = iter->y; - int width = iter->width; - uint32_t * buffer = iter->buffer; - - if (image->common.repeat == PIXMAN_REPEAT_NONE) - { - bits_image_fetch_untransformed_repeat_none ( - &image->bits, FALSE, x, y, width, buffer); - } - else - { - bits_image_fetch_untransformed_repeat_normal ( - &image->bits, FALSE, x, y, width, buffer); - } - - iter->y++; - return buffer; -} - -static uint32_t * -bits_image_fetch_untransformed_float (pixman_iter_t * iter, - const uint32_t *mask) -{ - pixman_image_t *image = iter->image; - int x = iter->x; - int y = iter->y; - int width = iter->width; - uint32_t * buffer = iter->buffer; - - if (image->common.repeat == PIXMAN_REPEAT_NONE) - { - bits_image_fetch_untransformed_repeat_none ( - &image->bits, TRUE, x, y, width, buffer); - } - else - { - bits_image_fetch_untransformed_repeat_normal ( - &image->bits, TRUE, x, y, width, buffer); - } - - iter->y++; - return buffer; -} - -typedef struct -{ - pixman_format_code_t format; - uint32_t flags; - pixman_iter_get_scanline_t get_scanline_32; - pixman_iter_get_scanline_t get_scanline_float; -} fetcher_info_t; - -static const fetcher_info_t fetcher_info[] = -{ - { PIXMAN_any, - (FAST_PATH_NO_ALPHA_MAP | - FAST_PATH_ID_TRANSFORM | - FAST_PATH_NO_CONVOLUTION_FILTER | - FAST_PATH_NO_PAD_REPEAT | - FAST_PATH_NO_REFLECT_REPEAT), - bits_image_fetch_untransformed_32, - bits_image_fetch_untransformed_float - }, - - /* Affine, no alpha */ - { PIXMAN_any, - (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM), - bits_image_fetch_affine_no_alpha_32, - bits_image_fetch_affine_no_alpha_float, - }, - - /* General */ - { PIXMAN_any, - 0, - bits_image_fetch_general_32, - bits_image_fetch_general_float, - }, - - { PIXMAN_null }, -}; - -static void -bits_image_property_changed (pixman_image_t *image) -{ - _pixman_bits_image_setup_accessors (&image->bits); -} - -void -_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter) -{ - pixman_format_code_t format = image->common.extended_format_code; - uint32_t flags = image->common.flags; - const fetcher_info_t *info; - - for (info = fetcher_info; info->format != PIXMAN_null; ++info) - { - if ((info->format == format || info->format == PIXMAN_any) && - (info->flags & flags) == info->flags) - { - if (iter->iter_flags & ITER_NARROW) - { - iter->get_scanline = info->get_scanline_32; - } - else - { - iter->get_scanline = info->get_scanline_float; - } - return; - } - } - - /* Just in case we somehow didn't find a scanline function */ - iter->get_scanline = _pixman_iter_get_scanline_noop; -} - -static uint32_t * -dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) -{ - pixman_image_t *image = iter->image; - int x = iter->x; - int y = iter->y; - int width = iter->width; - uint32_t * buffer = iter->buffer; - - image->bits.fetch_scanline_32 (&image->bits, x, y, width, buffer, mask); - if (image->common.alpha_map) - { - uint32_t *alpha; - - if ((alpha = malloc (width * sizeof (uint32_t)))) - { - int i; - - x -= image->common.alpha_origin_x; - y -= image->common.alpha_origin_y; - - image->common.alpha_map->fetch_scanline_32 ( - image->common.alpha_map, x, y, width, alpha, mask); - - for (i = 0; i < width; ++i) - { - buffer[i] &= ~0xff000000; - buffer[i] |= (alpha[i] & 0xff000000); - } - - free (alpha); - } - } - - return iter->buffer; -} - -static uint32_t * -dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) -{ - bits_image_t * image = &iter->image->bits; - int x = iter->x; - int y = iter->y; - int width = iter->width; - argb_t * buffer = (argb_t *)iter->buffer; - - image->fetch_scanline_float ( - image, x, y, width, (uint32_t *)buffer, mask); - if (image->common.alpha_map) - { - argb_t *alpha; - - if ((alpha = malloc (width * sizeof (argb_t)))) - { - int i; - - x -= image->common.alpha_origin_x; - y -= image->common.alpha_origin_y; - - image->common.alpha_map->fetch_scanline_float ( - image->common.alpha_map, x, y, width, (uint32_t *)alpha, mask); - - for (i = 0; i < width; ++i) - buffer[i].a = alpha[i].a; - - free (alpha); - } - } - - return iter->buffer; -} - -static void -dest_write_back_narrow (pixman_iter_t *iter) -{ - bits_image_t * image = &iter->image->bits; - int x = iter->x; - int y = iter->y; - int width = iter->width; - const uint32_t *buffer = iter->buffer; - - image->store_scanline_32 (image, x, y, width, buffer); - - if (image->common.alpha_map) - { - x -= image->common.alpha_origin_x; - y -= image->common.alpha_origin_y; - - image->common.alpha_map->store_scanline_32 ( - image->common.alpha_map, x, y, width, buffer); - } - - iter->y++; -} - -static float -dither_factor_blue_noise_64 (int x, int y) -{ - float m = dither_blue_noise_64x64[((y & 0x3f) << 6) | (x & 0x3f)]; - return m * (1. / 4096.f) + (1. / 8192.f); -} - -static float -dither_factor_bayer_8 (int x, int y) -{ - uint32_t m; - - y ^= x; - - /* Compute reverse(interleave(xor(x mod n, y mod n), x mod n)) - * Here n = 8 and `mod n` is the bottom 3 bits. - */ - m = ((y & 0x1) << 5) | ((x & 0x1) << 4) | - ((y & 0x2) << 2) | ((x & 0x2) << 1) | - ((y & 0x4) >> 1) | ((x & 0x4) >> 2); - - /* m is in range [0, 63]. We scale it to [0, 63.0f/64.0f], then - * shift it to to [1.0f/128.0f, 127.0f/128.0f] so that 0 < d < 1. - * This ensures exact values are not changed by dithering. - */ - return (float)(m) * (1 / 64.0f) + (1.0f / 128.0f); -} - -typedef float (* dither_factor_t)(int x, int y); - -static force_inline float -dither_apply_channel (float f, float d, float s) -{ - /* float_to_unorm splits the [0, 1] segment in (1 << n_bits) - * subsections of equal length; however unorm_to_float does not - * map to the center of those sections. In fact, pixel value u is - * mapped to: - * - * u u u 1 - * -------------- = ---------- + -------------- * ---------- - * 2^n_bits - 1 2^n_bits 2^n_bits - 1 2^n_bits - * - * Hence if f = u / (2^n_bits - 1) is exactly representable on a - * n_bits palette, all the numbers between - * - * u - * ---------- = f - f * 2^n_bits = f + (0 - f) * 2^n_bits - * 2^n_bits - * - * and - * - * u + 1 - * ---------- = f - (f - 1) * 2^n_bits = f + (1 - f) * 2^n_bits - * 2^n_bits - * - * are also mapped back to u. - * - * Hence the following calculation ensures that we add as much - * noise as possible without perturbing values which are exactly - * representable in the target colorspace. Note that this corresponds to - * mixing the original color with noise with a ratio of `1 / 2^n_bits`. - */ - return f + (d - f) * s; -} - -static force_inline float -dither_compute_scale (int n_bits) -{ - // No dithering for wide formats - if (n_bits == 0 || n_bits >= 32) - return 0.f; - - return 1.f / (float)(1 << n_bits); -} - -static const uint32_t * -dither_apply_ordered (pixman_iter_t *iter, dither_factor_t factor) -{ - bits_image_t *image = &iter->image->bits; - int x = iter->x + image->dither_offset_x; - int y = iter->y + image->dither_offset_y; - int width = iter->width; - argb_t *buffer = (argb_t *)iter->buffer; - - pixman_format_code_t format = image->format; - int a_size = PIXMAN_FORMAT_A (format); - int r_size = PIXMAN_FORMAT_R (format); - int g_size = PIXMAN_FORMAT_G (format); - int b_size = PIXMAN_FORMAT_B (format); - - float a_scale = dither_compute_scale (a_size); - float r_scale = dither_compute_scale (r_size); - float g_scale = dither_compute_scale (g_size); - float b_scale = dither_compute_scale (b_size); - - int i; - float d; - - for (i = 0; i < width; ++i) - { - d = factor (x + i, y); - - buffer->a = dither_apply_channel (buffer->a, d, a_scale); - buffer->r = dither_apply_channel (buffer->r, d, r_scale); - buffer->g = dither_apply_channel (buffer->g, d, g_scale); - buffer->b = dither_apply_channel (buffer->b, d, b_scale); - - buffer++; - } - - return iter->buffer; -} - -static void -dest_write_back_wide (pixman_iter_t *iter) -{ - bits_image_t * image = &iter->image->bits; - int x = iter->x; - int y = iter->y; - int width = iter->width; - const uint32_t *buffer = iter->buffer; - - switch (image->dither) - { - case PIXMAN_DITHER_NONE: - break; - - case PIXMAN_DITHER_GOOD: - case PIXMAN_DITHER_BEST: - case PIXMAN_DITHER_ORDERED_BLUE_NOISE_64: - buffer = dither_apply_ordered (iter, dither_factor_blue_noise_64); - break; - - case PIXMAN_DITHER_FAST: - case PIXMAN_DITHER_ORDERED_BAYER_8: - buffer = dither_apply_ordered (iter, dither_factor_bayer_8); - break; - } - - image->store_scanline_float (image, x, y, width, buffer); - - if (image->common.alpha_map) - { - x -= image->common.alpha_origin_x; - y -= image->common.alpha_origin_y; - - image->common.alpha_map->store_scanline_float ( - image->common.alpha_map, x, y, width, buffer); - } - - iter->y++; -} - -void -_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter) -{ - if (iter->iter_flags & ITER_NARROW) - { - if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == - (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) - { - iter->get_scanline = _pixman_iter_get_scanline_noop; - } - else - { - iter->get_scanline = dest_get_scanline_narrow; - } - - iter->write_back = dest_write_back_narrow; - } - else - { - iter->get_scanline = dest_get_scanline_wide; - iter->write_back = dest_write_back_wide; - } -} - -static uint32_t * -create_bits (pixman_format_code_t format, - int width, - int height, - int * rowstride_bytes, - pixman_bool_t clear) -{ - int stride; - size_t buf_size; - int bpp; - - /* what follows is a long-winded way, avoiding any possibility of integer - * overflows, of saying: - * stride = ((width * bpp + 0x1f) >> 5) * sizeof (uint32_t); - */ - - bpp = PIXMAN_FORMAT_BPP (format); - if (_pixman_multiply_overflows_int (width, bpp)) - return NULL; - - stride = width * bpp; - if (_pixman_addition_overflows_int (stride, 0x1f)) - return NULL; - - stride += 0x1f; - stride >>= 5; - - stride *= sizeof (uint32_t); - - if (_pixman_multiply_overflows_size (height, stride)) - return NULL; - - buf_size = (size_t)height * stride; - - if (rowstride_bytes) - *rowstride_bytes = stride; - - if (clear) - return calloc (buf_size, 1); - else - return malloc (buf_size); -} - -pixman_bool_t -_pixman_bits_image_init (pixman_image_t * image, - pixman_format_code_t format, - int width, - int height, - uint32_t * bits, - int rowstride, - pixman_bool_t clear) -{ - uint32_t *free_me = NULL; - - if (PIXMAN_FORMAT_BPP (format) == 128) - return_val_if_fail(!(rowstride % 4), FALSE); - - if (!bits && width && height) - { - int rowstride_bytes; - - free_me = bits = create_bits (format, width, height, &rowstride_bytes, clear); - - if (!bits) - return FALSE; - - rowstride = rowstride_bytes / (int) sizeof (uint32_t); - } - - _pixman_image_init (image); - - image->type = BITS; - image->bits.format = format; - image->bits.width = width; - image->bits.height = height; - image->bits.bits = bits; - image->bits.free_me = free_me; - image->bits.dither = PIXMAN_DITHER_NONE; - image->bits.dither_offset_x = 0; - image->bits.dither_offset_y = 0; - image->bits.read_func = NULL; - image->bits.write_func = NULL; - image->bits.rowstride = rowstride; - image->bits.indexed = NULL; - - image->common.property_changed = bits_image_property_changed; - - _pixman_image_reset_clip_region (image); - - return TRUE; -} - -static pixman_image_t * -create_bits_image_internal (pixman_format_code_t format, - int width, - int height, - uint32_t * bits, - int rowstride_bytes, - pixman_bool_t clear) -{ - pixman_image_t *image; - - /* must be a whole number of uint32_t's - */ - return_val_if_fail ( - bits == NULL || (rowstride_bytes % sizeof (uint32_t)) == 0, NULL); - - return_val_if_fail (PIXMAN_FORMAT_BPP (format) >= PIXMAN_FORMAT_DEPTH (format), NULL); - - image = _pixman_image_allocate (); - - if (!image) - return NULL; - - if (!_pixman_bits_image_init (image, format, width, height, bits, - rowstride_bytes / (int) sizeof (uint32_t), - clear)) - { - free (image); - return NULL; - } - - return image; -} - -/* If bits is NULL, a buffer will be allocated and initialized to 0 */ -PIXMAN_EXPORT pixman_image_t * -pixman_image_create_bits (pixman_format_code_t format, - int width, - int height, - uint32_t * bits, - int rowstride_bytes) -{ - return create_bits_image_internal ( - format, width, height, bits, rowstride_bytes, TRUE); -} - - -/* If bits is NULL, a buffer will be allocated and _not_ initialized */ -PIXMAN_EXPORT pixman_image_t * -pixman_image_create_bits_no_clear (pixman_format_code_t format, - int width, - int height, - uint32_t * bits, - int rowstride_bytes) -{ - return create_bits_image_internal ( - format, width, height, bits, rowstride_bytes, FALSE); -} diff --git a/vendor/pixman/pixman/pixman-combine-float.c b/vendor/pixman/pixman/pixman-combine-float.c deleted file mode 100644 index 27392d608..000000000 --- a/vendor/pixman/pixman/pixman-combine-float.c +++ /dev/null @@ -1,1158 +0,0 @@ -/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ -/* - * Copyright © 2010, 2012 Soren Sandmann Pedersen - * Copyright © 2010, 2012 Red Hat, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Soren Sandmann Pedersen (sandmann@cs.au.dk) - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include -#include - -#include "pixman-private.h" - -/* Workaround for http://gcc.gnu.org/PR54965 */ -/* GCC 4.6 has problems with force_inline, so just use normal inline instead */ -#if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 6) -#undef force_inline -#define force_inline __inline__ -#endif - -typedef float (* combine_channel_t) (float sa, float s, float da, float d); - -static force_inline void -combine_inner (pixman_bool_t component, - float *dest, const float *src, const float *mask, int n_pixels, - combine_channel_t combine_a, combine_channel_t combine_c) -{ - int i; - - if (!mask) - { - for (i = 0; i < 4 * n_pixels; i += 4) - { - float sa = src[i + 0]; - float sr = src[i + 1]; - float sg = src[i + 2]; - float sb = src[i + 3]; - - float da = dest[i + 0]; - float dr = dest[i + 1]; - float dg = dest[i + 2]; - float db = dest[i + 3]; - - dest[i + 0] = combine_a (sa, sa, da, da); - dest[i + 1] = combine_c (sa, sr, da, dr); - dest[i + 2] = combine_c (sa, sg, da, dg); - dest[i + 3] = combine_c (sa, sb, da, db); - } - } - else - { - for (i = 0; i < 4 * n_pixels; i += 4) - { - float sa, sr, sg, sb; - float ma, mr, mg, mb; - float da, dr, dg, db; - - sa = src[i + 0]; - sr = src[i + 1]; - sg = src[i + 2]; - sb = src[i + 3]; - - if (component) - { - ma = mask[i + 0]; - mr = mask[i + 1]; - mg = mask[i + 2]; - mb = mask[i + 3]; - - sr *= mr; - sg *= mg; - sb *= mb; - - ma *= sa; - mr *= sa; - mg *= sa; - mb *= sa; - - sa = ma; - } - else - { - ma = mask[i + 0]; - - sa *= ma; - sr *= ma; - sg *= ma; - sb *= ma; - - ma = mr = mg = mb = sa; - } - - da = dest[i + 0]; - dr = dest[i + 1]; - dg = dest[i + 2]; - db = dest[i + 3]; - - dest[i + 0] = combine_a (ma, sa, da, da); - dest[i + 1] = combine_c (mr, sr, da, dr); - dest[i + 2] = combine_c (mg, sg, da, dg); - dest[i + 3] = combine_c (mb, sb, da, db); - } - } -} - -#define MAKE_COMBINER(name, component, combine_a, combine_c) \ - static void \ - combine_ ## name ## _float (pixman_implementation_t *imp, \ - pixman_op_t op, \ - float *dest, \ - const float *src, \ - const float *mask, \ - int n_pixels) \ - { \ - combine_inner (component, dest, src, mask, n_pixels, \ - combine_a, combine_c); \ - } - -#define MAKE_COMBINERS(name, combine_a, combine_c) \ - MAKE_COMBINER(name ## _ca, TRUE, combine_a, combine_c) \ - MAKE_COMBINER(name ## _u, FALSE, combine_a, combine_c) - - -/* - * Porter/Duff operators - */ -typedef enum -{ - ZERO, - ONE, - SRC_ALPHA, - DEST_ALPHA, - INV_SA, - INV_DA, - SA_OVER_DA, - DA_OVER_SA, - INV_SA_OVER_DA, - INV_DA_OVER_SA, - ONE_MINUS_SA_OVER_DA, - ONE_MINUS_DA_OVER_SA, - ONE_MINUS_INV_DA_OVER_SA, - ONE_MINUS_INV_SA_OVER_DA -} combine_factor_t; - -#define CLAMP(f) \ - (((f) < 0)? 0 : (((f) > 1.0) ? 1.0 : (f))) - -static force_inline float -get_factor (combine_factor_t factor, float sa, float da) -{ - float f = -1; - - switch (factor) - { - case ZERO: - f = 0.0f; - break; - - case ONE: - f = 1.0f; - break; - - case SRC_ALPHA: - f = sa; - break; - - case DEST_ALPHA: - f = da; - break; - - case INV_SA: - f = 1 - sa; - break; - - case INV_DA: - f = 1 - da; - break; - - case SA_OVER_DA: - if (FLOAT_IS_ZERO (da)) - f = 1.0f; - else - f = CLAMP (sa / da); - break; - - case DA_OVER_SA: - if (FLOAT_IS_ZERO (sa)) - f = 1.0f; - else - f = CLAMP (da / sa); - break; - - case INV_SA_OVER_DA: - if (FLOAT_IS_ZERO (da)) - f = 1.0f; - else - f = CLAMP ((1.0f - sa) / da); - break; - - case INV_DA_OVER_SA: - if (FLOAT_IS_ZERO (sa)) - f = 1.0f; - else - f = CLAMP ((1.0f - da) / sa); - break; - - case ONE_MINUS_SA_OVER_DA: - if (FLOAT_IS_ZERO (da)) - f = 0.0f; - else - f = CLAMP (1.0f - sa / da); - break; - - case ONE_MINUS_DA_OVER_SA: - if (FLOAT_IS_ZERO (sa)) - f = 0.0f; - else - f = CLAMP (1.0f - da / sa); - break; - - case ONE_MINUS_INV_DA_OVER_SA: - if (FLOAT_IS_ZERO (sa)) - f = 0.0f; - else - f = CLAMP (1.0f - (1.0f - da) / sa); - break; - - case ONE_MINUS_INV_SA_OVER_DA: - if (FLOAT_IS_ZERO (da)) - f = 0.0f; - else - f = CLAMP (1.0f - (1.0f - sa) / da); - break; - } - - return f; -} - -#define MAKE_PD_COMBINERS(name, a, b) \ - static float force_inline \ - pd_combine_ ## name (float sa, float s, float da, float d) \ - { \ - const float fa = get_factor (a, sa, da); \ - const float fb = get_factor (b, sa, da); \ - \ - return MIN (1.0f, s * fa + d * fb); \ - } \ - \ - MAKE_COMBINERS(name, pd_combine_ ## name, pd_combine_ ## name) - -MAKE_PD_COMBINERS (clear, ZERO, ZERO) -MAKE_PD_COMBINERS (src, ONE, ZERO) -MAKE_PD_COMBINERS (dst, ZERO, ONE) -MAKE_PD_COMBINERS (over, ONE, INV_SA) -MAKE_PD_COMBINERS (over_reverse, INV_DA, ONE) -MAKE_PD_COMBINERS (in, DEST_ALPHA, ZERO) -MAKE_PD_COMBINERS (in_reverse, ZERO, SRC_ALPHA) -MAKE_PD_COMBINERS (out, INV_DA, ZERO) -MAKE_PD_COMBINERS (out_reverse, ZERO, INV_SA) -MAKE_PD_COMBINERS (atop, DEST_ALPHA, INV_SA) -MAKE_PD_COMBINERS (atop_reverse, INV_DA, SRC_ALPHA) -MAKE_PD_COMBINERS (xor, INV_DA, INV_SA) -MAKE_PD_COMBINERS (add, ONE, ONE) - -MAKE_PD_COMBINERS (saturate, INV_DA_OVER_SA, ONE) - -MAKE_PD_COMBINERS (disjoint_clear, ZERO, ZERO) -MAKE_PD_COMBINERS (disjoint_src, ONE, ZERO) -MAKE_PD_COMBINERS (disjoint_dst, ZERO, ONE) -MAKE_PD_COMBINERS (disjoint_over, ONE, INV_SA_OVER_DA) -MAKE_PD_COMBINERS (disjoint_over_reverse, INV_DA_OVER_SA, ONE) -MAKE_PD_COMBINERS (disjoint_in, ONE_MINUS_INV_DA_OVER_SA, ZERO) -MAKE_PD_COMBINERS (disjoint_in_reverse, ZERO, ONE_MINUS_INV_SA_OVER_DA) -MAKE_PD_COMBINERS (disjoint_out, INV_DA_OVER_SA, ZERO) -MAKE_PD_COMBINERS (disjoint_out_reverse, ZERO, INV_SA_OVER_DA) -MAKE_PD_COMBINERS (disjoint_atop, ONE_MINUS_INV_DA_OVER_SA, INV_SA_OVER_DA) -MAKE_PD_COMBINERS (disjoint_atop_reverse, INV_DA_OVER_SA, ONE_MINUS_INV_SA_OVER_DA) -MAKE_PD_COMBINERS (disjoint_xor, INV_DA_OVER_SA, INV_SA_OVER_DA) - -MAKE_PD_COMBINERS (conjoint_clear, ZERO, ZERO) -MAKE_PD_COMBINERS (conjoint_src, ONE, ZERO) -MAKE_PD_COMBINERS (conjoint_dst, ZERO, ONE) -MAKE_PD_COMBINERS (conjoint_over, ONE, ONE_MINUS_SA_OVER_DA) -MAKE_PD_COMBINERS (conjoint_over_reverse, ONE_MINUS_DA_OVER_SA, ONE) -MAKE_PD_COMBINERS (conjoint_in, DA_OVER_SA, ZERO) -MAKE_PD_COMBINERS (conjoint_in_reverse, ZERO, SA_OVER_DA) -MAKE_PD_COMBINERS (conjoint_out, ONE_MINUS_DA_OVER_SA, ZERO) -MAKE_PD_COMBINERS (conjoint_out_reverse, ZERO, ONE_MINUS_SA_OVER_DA) -MAKE_PD_COMBINERS (conjoint_atop, DA_OVER_SA, ONE_MINUS_SA_OVER_DA) -MAKE_PD_COMBINERS (conjoint_atop_reverse, ONE_MINUS_DA_OVER_SA, SA_OVER_DA) -MAKE_PD_COMBINERS (conjoint_xor, ONE_MINUS_DA_OVER_SA, ONE_MINUS_SA_OVER_DA) - -/* - * PDF blend modes: - * - * The following blend modes have been taken from the PDF ISO 32000 - * specification, which at this point in time is available from - * - * http://www.adobe.com/devnet/pdf/pdf_reference.html - * - * The specific documents of interest are the PDF spec itself: - * - * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf - * - * chapters 11.3.5 and 11.3.6 and a later supplement for Adobe Acrobat - * 9.1 and Reader 9.1: - * - * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000_1.pdf - * - * that clarifies the specifications for blend modes ColorDodge and - * ColorBurn. - * - * The formula for computing the final pixel color given in 11.3.6 is: - * - * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs) - * - * with B() is the blend function. When B(Cb, Cs) = Cs, this formula - * reduces to the regular OVER operator. - * - * Cs and Cb are not premultiplied, so in our implementation we instead - * use: - * - * cr = (1 – αs) × cb + (1 – αb) × cs + αb × αs × B (cb/αb, cs/αs) - * - * where cr, cs, and cb are premultiplied colors, and where the - * - * αb × αs × B(cb/αb, cs/αs) - * - * part is first arithmetically simplified under the assumption that αb - * and αs are not 0, and then updated to produce a meaningful result when - * they are. - * - * For all the blend mode operators, the alpha channel is given by - * - * αr = αs + αb + αb × αs - */ - -#define MAKE_SEPARABLE_PDF_COMBINERS(name) \ - static force_inline float \ - combine_ ## name ## _a (float sa, float s, float da, float d) \ - { \ - return da + sa - da * sa; \ - } \ - \ - static force_inline float \ - combine_ ## name ## _c (float sa, float s, float da, float d) \ - { \ - float f = (1 - sa) * d + (1 - da) * s; \ - \ - return f + blend_ ## name (sa, s, da, d); \ - } \ - \ - MAKE_COMBINERS (name, combine_ ## name ## _a, combine_ ## name ## _c) - -/* - * Multiply - * - * ad * as * B(d / ad, s / as) - * = ad * as * d/ad * s/as - * = d * s - * - */ -static force_inline float -blend_multiply (float sa, float s, float da, float d) -{ - return d * s; -} - -/* - * Screen - * - * ad * as * B(d/ad, s/as) - * = ad * as * (d/ad + s/as - s/as * d/ad) - * = ad * s + as * d - s * d - */ -static force_inline float -blend_screen (float sa, float s, float da, float d) -{ - return d * sa + s * da - s * d; -} - -/* - * Overlay - * - * ad * as * B(d/ad, s/as) - * = ad * as * Hardlight (s, d) - * = if (d / ad < 0.5) - * as * ad * Multiply (s/as, 2 * d/ad) - * else - * as * ad * Screen (s/as, 2 * d / ad - 1) - * = if (d < 0.5 * ad) - * as * ad * s/as * 2 * d /ad - * else - * as * ad * (s/as + 2 * d / ad - 1 - s / as * (2 * d / ad - 1)) - * = if (2 * d < ad) - * 2 * s * d - * else - * ad * s + 2 * as * d - as * ad - ad * s * (2 * d / ad - 1) - * = if (2 * d < ad) - * 2 * s * d - * else - * as * ad - 2 * (ad - d) * (as - s) - */ -static force_inline float -blend_overlay (float sa, float s, float da, float d) -{ - if (2 * d < da) - return 2 * s * d; - else - return sa * da - 2 * (da - d) * (sa - s); -} - -/* - * Darken - * - * ad * as * B(d/ad, s/as) - * = ad * as * MIN(d/ad, s/as) - * = MIN (as * d, ad * s) - */ -static force_inline float -blend_darken (float sa, float s, float da, float d) -{ - s = s * da; - d = d * sa; - - if (s > d) - return d; - else - return s; -} - -/* - * Lighten - * - * ad * as * B(d/ad, s/as) - * = ad * as * MAX(d/ad, s/as) - * = MAX (as * d, ad * s) - */ -static force_inline float -blend_lighten (float sa, float s, float da, float d) -{ - s = s * da; - d = d * sa; - - if (s > d) - return s; - else - return d; -} - -/* - * Color dodge - * - * ad * as * B(d/ad, s/as) - * = if d/ad = 0 - * ad * as * 0 - * else if (d/ad >= (1 - s/as) - * ad * as * 1 - * else - * ad * as * ((d/ad) / (1 - s/as)) - * = if d = 0 - * 0 - * elif as * d >= ad * (as - s) - * ad * as - * else - * as * (as * d / (as - s)) - * - */ -static force_inline float -blend_color_dodge (float sa, float s, float da, float d) -{ - if (FLOAT_IS_ZERO (d)) - return 0.0f; - else if (d * sa >= sa * da - s * da) - return sa * da; - else if (FLOAT_IS_ZERO (sa - s)) - return sa * da; - else - return sa * sa * d / (sa - s); -} - -/* - * Color burn - * - * We modify the first clause "if d = 1" to "if d >= 1" since with - * premultiplied colors d > 1 can actually happen. - * - * ad * as * B(d/ad, s/as) - * = if d/ad >= 1 - * ad * as * 1 - * elif (1 - d/ad) >= s/as - * ad * as * 0 - * else - * ad * as * (1 - ((1 - d/ad) / (s/as))) - * = if d >= ad - * ad * as - * elif as * ad - as * d >= ad * s - * 0 - * else - * ad * as - as * as * (ad - d) / s - */ -static force_inline float -blend_color_burn (float sa, float s, float da, float d) -{ - if (d >= da) - return sa * da; - else if (sa * (da - d) >= s * da) - return 0.0f; - else if (FLOAT_IS_ZERO (s)) - return 0.0f; - else - return sa * (da - sa * (da - d) / s); -} - -/* - * Hard light - * - * ad * as * B(d/ad, s/as) - * = if (s/as <= 0.5) - * ad * as * Multiply (d/ad, 2 * s/as) - * else - * ad * as * Screen (d/ad, 2 * s/as - 1) - * = if 2 * s <= as - * ad * as * d/ad * 2 * s / as - * else - * ad * as * (d/ad + (2 * s/as - 1) + d/ad * (2 * s/as - 1)) - * = if 2 * s <= as - * 2 * s * d - * else - * as * ad - 2 * (ad - d) * (as - s) - */ -static force_inline float -blend_hard_light (float sa, float s, float da, float d) -{ - if (2 * s < sa) - return 2 * s * d; - else - return sa * da - 2 * (da - d) * (sa - s); -} - -/* - * Soft light - * - * ad * as * B(d/ad, s/as) - * = if (s/as <= 0.5) - * ad * as * (d/ad - (1 - 2 * s/as) * d/ad * (1 - d/ad)) - * else if (d/ad <= 0.25) - * ad * as * (d/ad + (2 * s/as - 1) * ((((16 * d/ad - 12) * d/ad + 4) * d/ad) - d/ad)) - * else - * ad * as * (d/ad + (2 * s/as - 1) * sqrt (d/ad)) - * = if (2 * s <= as) - * d * as - d * (ad - d) * (as - 2 * s) / ad; - * else if (4 * d <= ad) - * (2 * s - as) * d * ((16 * d / ad - 12) * d / ad + 3); - * else - * d * as + (sqrt (d * ad) - d) * (2 * s - as); - */ -static force_inline float -blend_soft_light (float sa, float s, float da, float d) -{ - if (2 * s <= sa) - { - if (FLOAT_IS_ZERO (da)) - return d * sa; - else - return d * sa - d * (da - d) * (sa - 2 * s) / da; - } - else - { - if (FLOAT_IS_ZERO (da)) - { - return d * sa; - } - else - { - if (4 * d <= da) - return d * sa + (2 * s - sa) * d * ((16 * d / da - 12) * d / da + 3); - else - return d * sa + (sqrtf (d * da) - d) * (2 * s - sa); - } - } -} - -/* - * Difference - * - * ad * as * B(s/as, d/ad) - * = ad * as * abs (s/as - d/ad) - * = if (s/as <= d/ad) - * ad * as * (d/ad - s/as) - * else - * ad * as * (s/as - d/ad) - * = if (ad * s <= as * d) - * as * d - ad * s - * else - * ad * s - as * d - */ -static force_inline float -blend_difference (float sa, float s, float da, float d) -{ - float dsa = d * sa; - float sda = s * da; - - if (sda < dsa) - return dsa - sda; - else - return sda - dsa; -} - -/* - * Exclusion - * - * ad * as * B(s/as, d/ad) - * = ad * as * (d/ad + s/as - 2 * d/ad * s/as) - * = as * d + ad * s - 2 * s * d - */ -static force_inline float -blend_exclusion (float sa, float s, float da, float d) -{ - return s * da + d * sa - 2 * d * s; -} - -MAKE_SEPARABLE_PDF_COMBINERS (multiply) -MAKE_SEPARABLE_PDF_COMBINERS (screen) -MAKE_SEPARABLE_PDF_COMBINERS (overlay) -MAKE_SEPARABLE_PDF_COMBINERS (darken) -MAKE_SEPARABLE_PDF_COMBINERS (lighten) -MAKE_SEPARABLE_PDF_COMBINERS (color_dodge) -MAKE_SEPARABLE_PDF_COMBINERS (color_burn) -MAKE_SEPARABLE_PDF_COMBINERS (hard_light) -MAKE_SEPARABLE_PDF_COMBINERS (soft_light) -MAKE_SEPARABLE_PDF_COMBINERS (difference) -MAKE_SEPARABLE_PDF_COMBINERS (exclusion) - -/* - * PDF nonseperable blend modes are implemented using the following functions - * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid - * and min value of the red, green and blue components. - * - * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue - * - * clip_color (C): - * l = LUM (C) - * min = Cmin - * max = Cmax - * if n < 0.0 - * C = l + (((C – l) × l) ⁄ (l – min)) - * if x > 1.0 - * C = l + (((C – l) × (1 – l) ) ⁄ (max – l)) - * return C - * - * set_lum (C, l): - * d = l – LUM (C) - * C += d - * return clip_color (C) - * - * SAT (C) = CH_MAX (C) - CH_MIN (C) - * - * set_sat (C, s): - * if Cmax > Cmin - * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) ) - * Cmax = s - * else - * Cmid = Cmax = 0.0 - * Cmin = 0.0 - * return C - */ - -/* For premultiplied colors, we need to know what happens when C is - * multiplied by a real number. LUM and SAT are linear: - * - * LUM (r × C) = r × LUM (C) SAT (r * C) = r * SAT (C) - * - * If we extend clip_color with an extra argument a and change - * - * if x >= 1.0 - * - * into - * - * if x >= a - * - * then clip_color is also linear: - * - * r * clip_color (C, a) = clip_color (r * C, r * a); - * - * for positive r. - * - * Similarly, we can extend set_lum with an extra argument that is just passed - * on to clip_color: - * - * r * set_lum (C, l, a) - * - * = r × clip_color (C + l - LUM (C), a) - * - * = clip_color (r * C + r × l - r * LUM (C), r * a) - * - * = set_lum (r * C, r * l, r * a) - * - * Finally, set_sat: - * - * r * set_sat (C, s) = set_sat (x * C, r * s) - * - * The above holds for all non-zero x, because the x'es in the fraction for - * C_mid cancel out. Specifically, it holds for x = r: - * - * r * set_sat (C, s) = set_sat (r * C, r * s) - * - */ -typedef struct -{ - float r; - float g; - float b; -} rgb_t; - -static force_inline float -minf (float a, float b) -{ - return a < b? a : b; -} - -static force_inline float -maxf (float a, float b) -{ - return a > b? a : b; -} - -static force_inline float -channel_min (const rgb_t *c) -{ - return minf (minf (c->r, c->g), c->b); -} - -static force_inline float -channel_max (const rgb_t *c) -{ - return maxf (maxf (c->r, c->g), c->b); -} - -static force_inline float -get_lum (const rgb_t *c) -{ - return c->r * 0.3f + c->g * 0.59f + c->b * 0.11f; -} - -static force_inline float -get_sat (const rgb_t *c) -{ - return channel_max (c) - channel_min (c); -} - -static void -clip_color (rgb_t *color, float a) -{ - float l = get_lum (color); - float n = channel_min (color); - float x = channel_max (color); - float t; - - if (n < 0.0f) - { - t = l - n; - if (FLOAT_IS_ZERO (t)) - { - color->r = 0.0f; - color->g = 0.0f; - color->b = 0.0f; - } - else - { - color->r = l + (((color->r - l) * l) / t); - color->g = l + (((color->g - l) * l) / t); - color->b = l + (((color->b - l) * l) / t); - } - } - if (x > a) - { - t = x - l; - if (FLOAT_IS_ZERO (t)) - { - color->r = a; - color->g = a; - color->b = a; - } - else - { - color->r = l + (((color->r - l) * (a - l) / t)); - color->g = l + (((color->g - l) * (a - l) / t)); - color->b = l + (((color->b - l) * (a - l) / t)); - } - } -} - -static void -set_lum (rgb_t *color, float sa, float l) -{ - float d = l - get_lum (color); - - color->r = color->r + d; - color->g = color->g + d; - color->b = color->b + d; - - clip_color (color, sa); -} - -static void -set_sat (rgb_t *src, float sat) -{ - float *max, *mid, *min; - float t; - - if (src->r > src->g) - { - if (src->r > src->b) - { - max = &(src->r); - - if (src->g > src->b) - { - mid = &(src->g); - min = &(src->b); - } - else - { - mid = &(src->b); - min = &(src->g); - } - } - else - { - max = &(src->b); - mid = &(src->r); - min = &(src->g); - } - } - else - { - if (src->r > src->b) - { - max = &(src->g); - mid = &(src->r); - min = &(src->b); - } - else - { - min = &(src->r); - - if (src->g > src->b) - { - max = &(src->g); - mid = &(src->b); - } - else - { - max = &(src->b); - mid = &(src->g); - } - } - } - - t = *max - *min; - - if (FLOAT_IS_ZERO (t)) - { - *mid = *max = 0.0f; - } - else - { - *mid = ((*mid - *min) * sat) / t; - *max = sat; - } - - *min = 0.0f; -} - -/* Hue: - * - * as * ad * B(s/as, d/as) - * = as * ad * set_lum (set_sat (s/as, SAT (d/ad)), LUM (d/ad), 1) - * = set_lum (set_sat (ad * s, as * SAT (d)), as * LUM (d), as * ad) - * - */ -static force_inline void -blend_hsl_hue (rgb_t *res, - const rgb_t *dest, float da, - const rgb_t *src, float sa) -{ - res->r = src->r * da; - res->g = src->g * da; - res->b = src->b * da; - - set_sat (res, get_sat (dest) * sa); - set_lum (res, sa * da, get_lum (dest) * sa); -} - -/* - * Saturation - * - * as * ad * B(s/as, d/ad) - * = as * ad * set_lum (set_sat (d/ad, SAT (s/as)), LUM (d/ad), 1) - * = set_lum (as * ad * set_sat (d/ad, SAT (s/as)), - * as * LUM (d), as * ad) - * = set_lum (set_sat (as * d, ad * SAT (s), as * LUM (d), as * ad)) - */ -static force_inline void -blend_hsl_saturation (rgb_t *res, - const rgb_t *dest, float da, - const rgb_t *src, float sa) -{ - res->r = dest->r * sa; - res->g = dest->g * sa; - res->b = dest->b * sa; - - set_sat (res, get_sat (src) * da); - set_lum (res, sa * da, get_lum (dest) * sa); -} - -/* - * Color - * - * as * ad * B(s/as, d/as) - * = as * ad * set_lum (s/as, LUM (d/ad), 1) - * = set_lum (s * ad, as * LUM (d), as * ad) - */ -static force_inline void -blend_hsl_color (rgb_t *res, - const rgb_t *dest, float da, - const rgb_t *src, float sa) -{ - res->r = src->r * da; - res->g = src->g * da; - res->b = src->b * da; - - set_lum (res, sa * da, get_lum (dest) * sa); -} - -/* - * Luminosity - * - * as * ad * B(s/as, d/ad) - * = as * ad * set_lum (d/ad, LUM (s/as), 1) - * = set_lum (as * d, ad * LUM (s), as * ad) - */ -static force_inline void -blend_hsl_luminosity (rgb_t *res, - const rgb_t *dest, float da, - const rgb_t *src, float sa) -{ - res->r = dest->r * sa; - res->g = dest->g * sa; - res->b = dest->b * sa; - - set_lum (res, sa * da, get_lum (src) * da); -} - -#define MAKE_NON_SEPARABLE_PDF_COMBINERS(name) \ - static void \ - combine_ ## name ## _u_float (pixman_implementation_t *imp, \ - pixman_op_t op, \ - float *dest, \ - const float *src, \ - const float *mask, \ - int n_pixels) \ - { \ - int i; \ - \ - for (i = 0; i < 4 * n_pixels; i += 4) \ - { \ - float sa, da; \ - rgb_t sc, dc, rc; \ - \ - sa = src[i + 0]; \ - sc.r = src[i + 1]; \ - sc.g = src[i + 2]; \ - sc.b = src[i + 3]; \ - \ - da = dest[i + 0]; \ - dc.r = dest[i + 1]; \ - dc.g = dest[i + 2]; \ - dc.b = dest[i + 3]; \ - \ - if (mask) \ - { \ - float ma = mask[i + 0]; \ - \ - /* Component alpha is not supported for HSL modes */ \ - sa *= ma; \ - sc.r *= ma; \ - sc.g *= ma; \ - sc.g *= ma; \ - } \ - \ - blend_ ## name (&rc, &dc, da, &sc, sa); \ - \ - dest[i + 0] = sa + da - sa * da; \ - dest[i + 1] = (1 - sa) * dc.r + (1 - da) * sc.r + rc.r; \ - dest[i + 2] = (1 - sa) * dc.g + (1 - da) * sc.g + rc.g; \ - dest[i + 3] = (1 - sa) * dc.b + (1 - da) * sc.b + rc.b; \ - } \ - } - -MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_hue) -MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_saturation) -MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_color) -MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_luminosity) - -void -_pixman_setup_combiner_functions_float (pixman_implementation_t *imp) -{ - /* Unified alpha */ - imp->combine_float[PIXMAN_OP_CLEAR] = combine_clear_u_float; - imp->combine_float[PIXMAN_OP_SRC] = combine_src_u_float; - imp->combine_float[PIXMAN_OP_DST] = combine_dst_u_float; - imp->combine_float[PIXMAN_OP_OVER] = combine_over_u_float; - imp->combine_float[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u_float; - imp->combine_float[PIXMAN_OP_IN] = combine_in_u_float; - imp->combine_float[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u_float; - imp->combine_float[PIXMAN_OP_OUT] = combine_out_u_float; - imp->combine_float[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u_float; - imp->combine_float[PIXMAN_OP_ATOP] = combine_atop_u_float; - imp->combine_float[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u_float; - imp->combine_float[PIXMAN_OP_XOR] = combine_xor_u_float; - imp->combine_float[PIXMAN_OP_ADD] = combine_add_u_float; - imp->combine_float[PIXMAN_OP_SATURATE] = combine_saturate_u_float; - - /* Disjoint, unified */ - imp->combine_float[PIXMAN_OP_DISJOINT_CLEAR] = combine_disjoint_clear_u_float; - imp->combine_float[PIXMAN_OP_DISJOINT_SRC] = combine_disjoint_src_u_float; - imp->combine_float[PIXMAN_OP_DISJOINT_DST] = combine_disjoint_dst_u_float; - imp->combine_float[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u_float; - imp->combine_float[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_disjoint_over_reverse_u_float; - imp->combine_float[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u_float; - imp->combine_float[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u_float; - imp->combine_float[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u_float; - imp->combine_float[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u_float; - imp->combine_float[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u_float; - imp->combine_float[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u_float; - imp->combine_float[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u_float; - - /* Conjoint, unified */ - imp->combine_float[PIXMAN_OP_CONJOINT_CLEAR] = combine_conjoint_clear_u_float; - imp->combine_float[PIXMAN_OP_CONJOINT_SRC] = combine_conjoint_src_u_float; - imp->combine_float[PIXMAN_OP_CONJOINT_DST] = combine_conjoint_dst_u_float; - imp->combine_float[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u_float; - imp->combine_float[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u_float; - imp->combine_float[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u_float; - imp->combine_float[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u_float; - imp->combine_float[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u_float; - imp->combine_float[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u_float; - imp->combine_float[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u_float; - imp->combine_float[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u_float; - imp->combine_float[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u_float; - - /* PDF operators, unified */ - imp->combine_float[PIXMAN_OP_MULTIPLY] = combine_multiply_u_float; - imp->combine_float[PIXMAN_OP_SCREEN] = combine_screen_u_float; - imp->combine_float[PIXMAN_OP_OVERLAY] = combine_overlay_u_float; - imp->combine_float[PIXMAN_OP_DARKEN] = combine_darken_u_float; - imp->combine_float[PIXMAN_OP_LIGHTEN] = combine_lighten_u_float; - imp->combine_float[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u_float; - imp->combine_float[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u_float; - imp->combine_float[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u_float; - imp->combine_float[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u_float; - imp->combine_float[PIXMAN_OP_DIFFERENCE] = combine_difference_u_float; - imp->combine_float[PIXMAN_OP_EXCLUSION] = combine_exclusion_u_float; - - imp->combine_float[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u_float; - imp->combine_float[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u_float; - imp->combine_float[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u_float; - imp->combine_float[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u_float; - - /* Component alpha combiners */ - imp->combine_float_ca[PIXMAN_OP_CLEAR] = combine_clear_ca_float; - imp->combine_float_ca[PIXMAN_OP_SRC] = combine_src_ca_float; - imp->combine_float_ca[PIXMAN_OP_DST] = combine_dst_ca_float; - imp->combine_float_ca[PIXMAN_OP_OVER] = combine_over_ca_float; - imp->combine_float_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca_float; - imp->combine_float_ca[PIXMAN_OP_IN] = combine_in_ca_float; - imp->combine_float_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca_float; - imp->combine_float_ca[PIXMAN_OP_OUT] = combine_out_ca_float; - imp->combine_float_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca_float; - imp->combine_float_ca[PIXMAN_OP_ATOP] = combine_atop_ca_float; - imp->combine_float_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca_float; - imp->combine_float_ca[PIXMAN_OP_XOR] = combine_xor_ca_float; - imp->combine_float_ca[PIXMAN_OP_ADD] = combine_add_ca_float; - imp->combine_float_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca_float; - - /* Disjoint CA */ - imp->combine_float_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_disjoint_clear_ca_float; - imp->combine_float_ca[PIXMAN_OP_DISJOINT_SRC] = combine_disjoint_src_ca_float; - imp->combine_float_ca[PIXMAN_OP_DISJOINT_DST] = combine_disjoint_dst_ca_float; - imp->combine_float_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca_float; - imp->combine_float_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_disjoint_over_reverse_ca_float; - imp->combine_float_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca_float; - imp->combine_float_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca_float; - imp->combine_float_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca_float; - imp->combine_float_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca_float; - imp->combine_float_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca_float; - imp->combine_float_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca_float; - imp->combine_float_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca_float; - - /* Conjoint CA */ - imp->combine_float_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_conjoint_clear_ca_float; - imp->combine_float_ca[PIXMAN_OP_CONJOINT_SRC] = combine_conjoint_src_ca_float; - imp->combine_float_ca[PIXMAN_OP_CONJOINT_DST] = combine_conjoint_dst_ca_float; - imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca_float; - imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca_float; - imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca_float; - imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca_float; - imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca_float; - imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca_float; - imp->combine_float_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca_float; - imp->combine_float_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca_float; - imp->combine_float_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca_float; - - /* PDF operators CA */ - imp->combine_float_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca_float; - imp->combine_float_ca[PIXMAN_OP_SCREEN] = combine_screen_ca_float; - imp->combine_float_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca_float; - imp->combine_float_ca[PIXMAN_OP_DARKEN] = combine_darken_ca_float; - imp->combine_float_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca_float; - imp->combine_float_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca_float; - imp->combine_float_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca_float; - imp->combine_float_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca_float; - imp->combine_float_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca_float; - imp->combine_float_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca_float; - imp->combine_float_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca_float; - - /* It is not clear that these make sense, so make them noops for now */ - imp->combine_float_ca[PIXMAN_OP_HSL_HUE] = combine_dst_u_float; - imp->combine_float_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst_u_float; - imp->combine_float_ca[PIXMAN_OP_HSL_COLOR] = combine_dst_u_float; - imp->combine_float_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst_u_float; -} diff --git a/vendor/pixman/pixman/pixman-combine32.c b/vendor/pixman/pixman/pixman-combine32.c deleted file mode 100644 index de51f64e1..000000000 --- a/vendor/pixman/pixman/pixman-combine32.c +++ /dev/null @@ -1,1189 +0,0 @@ -/* - * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. - * 2005 Lars Knoll & Zack Rusin, Trolltech - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - */ -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include - -#include "pixman-private.h" -#include "pixman-combine32.h" - -/* component alpha helper functions */ - -static void -combine_mask_ca (uint32_t *src, uint32_t *mask) -{ - uint32_t a = *mask; - - uint32_t x; - uint16_t xa; - - if (!a) - { - *(src) = 0; - return; - } - - x = *(src); - if (a == ~0) - { - x = x >> A_SHIFT; - x |= x << G_SHIFT; - x |= x << R_SHIFT; - *(mask) = x; - return; - } - - xa = x >> A_SHIFT; - UN8x4_MUL_UN8x4 (x, a); - *(src) = x; - - UN8x4_MUL_UN8 (a, xa); - *(mask) = a; -} - -static void -combine_mask_value_ca (uint32_t *src, const uint32_t *mask) -{ - uint32_t a = *mask; - uint32_t x; - - if (!a) - { - *(src) = 0; - return; - } - - if (a == ~0) - return; - - x = *(src); - UN8x4_MUL_UN8x4 (x, a); - *(src) = x; -} - -static void -combine_mask_alpha_ca (const uint32_t *src, uint32_t *mask) -{ - uint32_t a = *(mask); - uint32_t x; - - if (!a) - return; - - x = *(src) >> A_SHIFT; - if (x == MASK) - return; - - if (a == ~0) - { - x |= x << G_SHIFT; - x |= x << R_SHIFT; - *(mask) = x; - return; - } - - UN8x4_MUL_UN8 (a, x); - *(mask) = a; -} - -/* - * There are two ways of handling alpha -- either as a single unified value or - * a separate value for each component, hence each macro must have two - * versions. The unified alpha version has a 'u' at the end of the name, - * the component version has a 'ca'. Similarly, functions which deal with - * this difference will have two versions using the same convention. - */ - -static force_inline uint32_t -combine_mask (const uint32_t *src, const uint32_t *mask, int i) -{ - uint32_t s, m; - - if (mask) - { - m = *(mask + i) >> A_SHIFT; - - if (!m) - return 0; - } - - s = *(src + i); - - if (mask) - UN8x4_MUL_UN8 (s, m); - - return s; -} - -static void -combine_clear (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - memset (dest, 0, width * sizeof (uint32_t)); -} - -static void -combine_dst (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - return; -} - -static void -combine_src_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - if (!mask) - { - memcpy (dest, src, width * sizeof (uint32_t)); - } - else - { - for (i = 0; i < width; ++i) - { - uint32_t s = combine_mask (src, mask, i); - - *(dest + i) = s; - } - } -} - -static void -combine_over_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - if (!mask) - { - for (i = 0; i < width; ++i) - { - uint32_t s = *(src + i); - uint32_t a = ALPHA_8 (s); - if (a == 0xFF) - { - *(dest + i) = s; - } - else if (s) - { - uint32_t d = *(dest + i); - uint32_t ia = a ^ 0xFF; - UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); - *(dest + i) = d; - } - } - } - else - { - for (i = 0; i < width; ++i) - { - uint32_t m = ALPHA_8 (*(mask + i)); - if (m == 0xFF) - { - uint32_t s = *(src + i); - uint32_t a = ALPHA_8 (s); - if (a == 0xFF) - { - *(dest + i) = s; - } - else if (s) - { - uint32_t d = *(dest + i); - uint32_t ia = a ^ 0xFF; - UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); - *(dest + i) = d; - } - } - else if (m) - { - uint32_t s = *(src + i); - if (s) - { - uint32_t d = *(dest + i); - UN8x4_MUL_UN8 (s, m); - UN8x4_MUL_UN8_ADD_UN8x4 (d, ALPHA_8 (~s), s); - *(dest + i) = d; - } - } - } - } -} - -static void -combine_over_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = combine_mask (src, mask, i); - uint32_t d = *(dest + i); - uint32_t ia = ALPHA_8 (~*(dest + i)); - UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); - *(dest + i) = s; - } -} - -static void -combine_in_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = combine_mask (src, mask, i); - uint32_t a = ALPHA_8 (*(dest + i)); - UN8x4_MUL_UN8 (s, a); - *(dest + i) = s; - } -} - -static void -combine_in_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = combine_mask (src, mask, i); - uint32_t d = *(dest + i); - uint32_t a = ALPHA_8 (s); - UN8x4_MUL_UN8 (d, a); - *(dest + i) = d; - } -} - -static void -combine_out_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = combine_mask (src, mask, i); - uint32_t a = ALPHA_8 (~*(dest + i)); - UN8x4_MUL_UN8 (s, a); - *(dest + i) = s; - } -} - -static void -combine_out_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = combine_mask (src, mask, i); - uint32_t d = *(dest + i); - uint32_t a = ALPHA_8 (~s); - UN8x4_MUL_UN8 (d, a); - *(dest + i) = d; - } -} - -static void -combine_atop_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = combine_mask (src, mask, i); - uint32_t d = *(dest + i); - uint32_t dest_a = ALPHA_8 (d); - uint32_t src_ia = ALPHA_8 (~s); - - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); - *(dest + i) = s; - } -} - -static void -combine_atop_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = combine_mask (src, mask, i); - uint32_t d = *(dest + i); - uint32_t src_a = ALPHA_8 (s); - uint32_t dest_ia = ALPHA_8 (~d); - - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); - *(dest + i) = s; - } -} - -static void -combine_xor_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = combine_mask (src, mask, i); - uint32_t d = *(dest + i); - uint32_t src_ia = ALPHA_8 (~s); - uint32_t dest_ia = ALPHA_8 (~d); - - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); - *(dest + i) = s; - } -} - -static void -combine_add_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = combine_mask (src, mask, i); - uint32_t d = *(dest + i); - UN8x4_ADD_UN8x4 (d, s); - *(dest + i) = d; - } -} - -/* - * PDF blend modes: - * - * The following blend modes have been taken from the PDF ISO 32000 - * specification, which at this point in time is available from - * - * http://www.adobe.com/devnet/pdf/pdf_reference.html - * - * The specific documents of interest are the PDF spec itself: - * - * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf - * - * chapters 11.3.5 and 11.3.6 and a later supplement for Adobe Acrobat - * 9.1 and Reader 9.1: - * - * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000_1.pdf - * - * that clarifies the specifications for blend modes ColorDodge and - * ColorBurn. - * - * The formula for computing the final pixel color given in 11.3.6 is: - * - * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs) - * - * with B() is the blend function. When B(Cb, Cs) = Cs, this formula - * reduces to the regular OVER operator. - * - * Cs and Cb are not premultiplied, so in our implementation we instead - * use: - * - * cr = (1 – αs) × cb + (1 – αb) × cs + αb × αs × B (cb/αb, cs/αs) - * - * where cr, cs, and cb are premultiplied colors, and where the - * - * αb × αs × B(cb/αb, cs/αs) - * - * part is first arithmetically simplified under the assumption that αb - * and αs are not 0, and then updated to produce a meaningful result when - * they are. - * - * For all the blend mode operators, the alpha channel is given by - * - * αr = αs + αb + αb × αs - */ - -/* - * Multiply - * - * ad * as * B(d / ad, s / as) - * = ad * as * d/ad * s/as - * = d * s - * - */ -static void -combine_multiply_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = combine_mask (src, mask, i); - uint32_t d = *(dest + i); - uint32_t ss = s; - uint32_t src_ia = ALPHA_8 (~s); - uint32_t dest_ia = ALPHA_8 (~d); - - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (ss, dest_ia, d, src_ia); - UN8x4_MUL_UN8x4 (d, s); - UN8x4_ADD_UN8x4 (d, ss); - - *(dest + i) = d; - } -} - -static void -combine_multiply_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t m = *(mask + i); - uint32_t s = *(src + i); - uint32_t d = *(dest + i); - uint32_t r = d; - uint32_t dest_ia = ALPHA_8 (~d); - - combine_mask_ca (&s, &m); - - UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (r, ~m, s, dest_ia); - UN8x4_MUL_UN8x4 (d, s); - UN8x4_ADD_UN8x4 (r, d); - - *(dest + i) = r; - } -} - -#define CLAMP(v, low, high) \ - do \ - { \ - if (v < (low)) \ - v = (low); \ - if (v > (high)) \ - v = (high); \ - } while (0) - -#define PDF_SEPARABLE_BLEND_MODE(name) \ - static void \ - combine_ ## name ## _u (pixman_implementation_t *imp, \ - pixman_op_t op, \ - uint32_t * dest, \ - const uint32_t * src, \ - const uint32_t * mask, \ - int width) \ - { \ - int i; \ - for (i = 0; i < width; ++i) \ - { \ - uint32_t s = combine_mask (src, mask, i); \ - uint32_t d = *(dest + i); \ - uint8_t sa = ALPHA_8 (s); \ - uint8_t isa = ~sa; \ - uint8_t da = ALPHA_8 (d); \ - uint8_t ida = ~da; \ - uint32_t ra, rr, rg, rb; \ - \ - ra = da * 0xff + sa * 0xff - sa * da; \ - rr = isa * RED_8 (d) + ida * RED_8 (s); \ - rg = isa * GREEN_8 (d) + ida * GREEN_8 (s); \ - rb = isa * BLUE_8 (d) + ida * BLUE_8 (s); \ - \ - rr += blend_ ## name (RED_8 (d), da, RED_8 (s), sa); \ - rg += blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), sa); \ - rb += blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), sa); \ - \ - CLAMP (ra, 0, 255 * 255); \ - CLAMP (rr, 0, 255 * 255); \ - CLAMP (rg, 0, 255 * 255); \ - CLAMP (rb, 0, 255 * 255); \ - \ - ra = DIV_ONE_UN8 (ra); \ - rr = DIV_ONE_UN8 (rr); \ - rg = DIV_ONE_UN8 (rg); \ - rb = DIV_ONE_UN8 (rb); \ - \ - *(dest + i) = ra << 24 | rr << 16 | rg << 8 | rb; \ - } \ - } \ - \ - static void \ - combine_ ## name ## _ca (pixman_implementation_t *imp, \ - pixman_op_t op, \ - uint32_t * dest, \ - const uint32_t * src, \ - const uint32_t * mask, \ - int width) \ - { \ - int i; \ - for (i = 0; i < width; ++i) \ - { \ - uint32_t m = *(mask + i); \ - uint32_t s = *(src + i); \ - uint32_t d = *(dest + i); \ - uint8_t da = ALPHA_8 (d); \ - uint8_t ida = ~da; \ - uint32_t ra, rr, rg, rb; \ - uint8_t ira, iga, iba; \ - \ - combine_mask_ca (&s, &m); \ - \ - ira = ~RED_8 (m); \ - iga = ~GREEN_8 (m); \ - iba = ~BLUE_8 (m); \ - \ - ra = da * 0xff + ALPHA_8 (s) * 0xff - ALPHA_8 (s) * da; \ - rr = ira * RED_8 (d) + ida * RED_8 (s); \ - rg = iga * GREEN_8 (d) + ida * GREEN_8 (s); \ - rb = iba * BLUE_8 (d) + ida * BLUE_8 (s); \ - \ - rr += blend_ ## name (RED_8 (d), da, RED_8 (s), RED_8 (m)); \ - rg += blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), GREEN_8 (m)); \ - rb += blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), BLUE_8 (m)); \ - \ - CLAMP (ra, 0, 255 * 255); \ - CLAMP (rr, 0, 255 * 255); \ - CLAMP (rg, 0, 255 * 255); \ - CLAMP (rb, 0, 255 * 255); \ - \ - ra = DIV_ONE_UN8 (ra); \ - rr = DIV_ONE_UN8 (rr); \ - rg = DIV_ONE_UN8 (rg); \ - rb = DIV_ONE_UN8 (rb); \ - \ - *(dest + i) = ra << 24 | rr << 16 | rg << 8 | rb; \ - } \ - } - -/* - * Screen - * - * ad * as * B(d/ad, s/as) - * = ad * as * (d/ad + s/as - s/as * d/ad) - * = ad * s + as * d - s * d - */ -static inline int32_t -blend_screen (int32_t d, int32_t ad, int32_t s, int32_t as) -{ - return s * ad + d * as - s * d; -} - -PDF_SEPARABLE_BLEND_MODE (screen) - -/* - * Overlay - * - * ad * as * B(d/ad, s/as) - * = ad * as * Hardlight (s, d) - * = if (d / ad < 0.5) - * as * ad * Multiply (s/as, 2 * d/ad) - * else - * as * ad * Screen (s/as, 2 * d / ad - 1) - * = if (d < 0.5 * ad) - * as * ad * s/as * 2 * d /ad - * else - * as * ad * (s/as + 2 * d / ad - 1 - s / as * (2 * d / ad - 1)) - * = if (2 * d < ad) - * 2 * s * d - * else - * ad * s + 2 * as * d - as * ad - ad * s * (2 * d / ad - 1) - * = if (2 * d < ad) - * 2 * s * d - * else - * as * ad - 2 * (ad - d) * (as - s) - */ -static inline int32_t -blend_overlay (int32_t d, int32_t ad, int32_t s, int32_t as) -{ - uint32_t r; - - if (2 * d < ad) - r = 2 * s * d; - else - r = as * ad - 2 * (ad - d) * (as - s); - - return r; -} - -PDF_SEPARABLE_BLEND_MODE (overlay) - -/* - * Darken - * - * ad * as * B(d/ad, s/as) - * = ad * as * MIN(d/ad, s/as) - * = MIN (as * d, ad * s) - */ -static inline int32_t -blend_darken (int32_t d, int32_t ad, int32_t s, int32_t as) -{ - s = ad * s; - d = as * d; - - return s > d ? d : s; -} - -PDF_SEPARABLE_BLEND_MODE (darken) - -/* - * Lighten - * - * ad * as * B(d/ad, s/as) - * = ad * as * MAX(d/ad, s/as) - * = MAX (as * d, ad * s) - */ -static inline int32_t -blend_lighten (int32_t d, int32_t ad, int32_t s, int32_t as) -{ - s = ad * s; - d = as * d; - - return s > d ? s : d; -} - -PDF_SEPARABLE_BLEND_MODE (lighten) - -/* - * Hard light - * - * ad * as * B(d/ad, s/as) - * = if (s/as <= 0.5) - * ad * as * Multiply (d/ad, 2 * s/as) - * else - * ad * as * Screen (d/ad, 2 * s/as - 1) - * = if 2 * s <= as - * ad * as * d/ad * 2 * s / as - * else - * ad * as * (d/ad + (2 * s/as - 1) + d/ad * (2 * s/as - 1)) - * = if 2 * s <= as - * 2 * s * d - * else - * as * ad - 2 * (ad - d) * (as - s) - */ -static inline int32_t -blend_hard_light (int32_t d, int32_t ad, int32_t s, int32_t as) -{ - if (2 * s < as) - return 2 * s * d; - else - return as * ad - 2 * (ad - d) * (as - s); -} - -PDF_SEPARABLE_BLEND_MODE (hard_light) - -/* - * Difference - * - * ad * as * B(s/as, d/ad) - * = ad * as * abs (s/as - d/ad) - * = if (s/as <= d/ad) - * ad * as * (d/ad - s/as) - * else - * ad * as * (s/as - d/ad) - * = if (ad * s <= as * d) - * as * d - ad * s - * else - * ad * s - as * d - */ -static inline int32_t -blend_difference (int32_t d, int32_t ad, int32_t s, int32_t as) -{ - int32_t das = d * as; - int32_t sad = s * ad; - - if (sad < das) - return das - sad; - else - return sad - das; -} - -PDF_SEPARABLE_BLEND_MODE (difference) - -/* - * Exclusion - * - * ad * as * B(s/as, d/ad) - * = ad * as * (d/ad + s/as - 2 * d/ad * s/as) - * = as * d + ad * s - 2 * s * d - */ - -/* This can be made faster by writing it directly and not using - * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */ - -static inline int32_t -blend_exclusion (int32_t d, int32_t ad, int32_t s, int32_t as) -{ - return s * ad + d * as - 2 * d * s; -} - -PDF_SEPARABLE_BLEND_MODE (exclusion) - -#undef PDF_SEPARABLE_BLEND_MODE - -/* Component alpha combiners */ - -static void -combine_clear_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - memset (dest, 0, width * sizeof(uint32_t)); -} - -static void -combine_src_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = *(src + i); - uint32_t m = *(mask + i); - - combine_mask_value_ca (&s, &m); - - *(dest + i) = s; - } -} - -static void -combine_over_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = *(src + i); - uint32_t m = *(mask + i); - uint32_t a; - - combine_mask_ca (&s, &m); - - a = ~m; - if (a) - { - uint32_t d = *(dest + i); - UN8x4_MUL_UN8x4_ADD_UN8x4 (d, a, s); - s = d; - } - - *(dest + i) = s; - } -} - -static void -combine_over_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t d = *(dest + i); - uint32_t a = ~d >> A_SHIFT; - - if (a) - { - uint32_t s = *(src + i); - uint32_t m = *(mask + i); - - UN8x4_MUL_UN8x4 (s, m); - UN8x4_MUL_UN8_ADD_UN8x4 (s, a, d); - - *(dest + i) = s; - } - } -} - -static void -combine_in_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t d = *(dest + i); - uint16_t a = d >> A_SHIFT; - uint32_t s = 0; - - if (a) - { - uint32_t m = *(mask + i); - - s = *(src + i); - combine_mask_value_ca (&s, &m); - - if (a != MASK) - UN8x4_MUL_UN8 (s, a); - } - - *(dest + i) = s; - } -} - -static void -combine_in_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = *(src + i); - uint32_t m = *(mask + i); - uint32_t a; - - combine_mask_alpha_ca (&s, &m); - - a = m; - if (a != ~0) - { - uint32_t d = 0; - - if (a) - { - d = *(dest + i); - UN8x4_MUL_UN8x4 (d, a); - } - - *(dest + i) = d; - } - } -} - -static void -combine_out_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t d = *(dest + i); - uint16_t a = ~d >> A_SHIFT; - uint32_t s = 0; - - if (a) - { - uint32_t m = *(mask + i); - - s = *(src + i); - combine_mask_value_ca (&s, &m); - - if (a != MASK) - UN8x4_MUL_UN8 (s, a); - } - - *(dest + i) = s; - } -} - -static void -combine_out_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = *(src + i); - uint32_t m = *(mask + i); - uint32_t a; - - combine_mask_alpha_ca (&s, &m); - - a = ~m; - if (a != ~0) - { - uint32_t d = 0; - - if (a) - { - d = *(dest + i); - UN8x4_MUL_UN8x4 (d, a); - } - - *(dest + i) = d; - } - } -} - -static void -combine_atop_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t d = *(dest + i); - uint32_t s = *(src + i); - uint32_t m = *(mask + i); - uint32_t ad; - uint16_t as = d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - ad = ~m; - - UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as); - - *(dest + i) = d; - } -} - -static void -combine_atop_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t d = *(dest + i); - uint32_t s = *(src + i); - uint32_t m = *(mask + i); - uint32_t ad; - uint16_t as = ~d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - ad = m; - - UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as); - - *(dest + i) = d; - } -} - -static void -combine_xor_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t d = *(dest + i); - uint32_t s = *(src + i); - uint32_t m = *(mask + i); - uint32_t ad; - uint16_t as = ~d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - ad = ~m; - - UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as); - - *(dest + i) = d; - } -} - -static void -combine_add_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = *(src + i); - uint32_t m = *(mask + i); - uint32_t d = *(dest + i); - - combine_mask_value_ca (&s, &m); - - UN8x4_ADD_UN8x4 (d, s); - - *(dest + i) = d; - } -} - -void -_pixman_setup_combiner_functions_32 (pixman_implementation_t *imp) -{ - /* Unified alpha */ - imp->combine_32[PIXMAN_OP_CLEAR] = combine_clear; - imp->combine_32[PIXMAN_OP_SRC] = combine_src_u; - imp->combine_32[PIXMAN_OP_DST] = combine_dst; - imp->combine_32[PIXMAN_OP_OVER] = combine_over_u; - imp->combine_32[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u; - imp->combine_32[PIXMAN_OP_IN] = combine_in_u; - imp->combine_32[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u; - imp->combine_32[PIXMAN_OP_OUT] = combine_out_u; - imp->combine_32[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u; - imp->combine_32[PIXMAN_OP_ATOP] = combine_atop_u; - imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u; - imp->combine_32[PIXMAN_OP_XOR] = combine_xor_u; - imp->combine_32[PIXMAN_OP_ADD] = combine_add_u; - - imp->combine_32[PIXMAN_OP_MULTIPLY] = combine_multiply_u; - imp->combine_32[PIXMAN_OP_SCREEN] = combine_screen_u; - imp->combine_32[PIXMAN_OP_OVERLAY] = combine_overlay_u; - imp->combine_32[PIXMAN_OP_DARKEN] = combine_darken_u; - imp->combine_32[PIXMAN_OP_LIGHTEN] = combine_lighten_u; - imp->combine_32[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u; - imp->combine_32[PIXMAN_OP_DIFFERENCE] = combine_difference_u; - imp->combine_32[PIXMAN_OP_EXCLUSION] = combine_exclusion_u; - - /* Component alpha combiners */ - imp->combine_32_ca[PIXMAN_OP_CLEAR] = combine_clear_ca; - imp->combine_32_ca[PIXMAN_OP_SRC] = combine_src_ca; - /* dest */ - imp->combine_32_ca[PIXMAN_OP_OVER] = combine_over_ca; - imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_IN] = combine_in_ca; - imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_OUT] = combine_out_ca; - imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_ATOP] = combine_atop_ca; - imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_XOR] = combine_xor_ca; - imp->combine_32_ca[PIXMAN_OP_ADD] = combine_add_ca; - - imp->combine_32_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca; - imp->combine_32_ca[PIXMAN_OP_SCREEN] = combine_screen_ca; - imp->combine_32_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca; - imp->combine_32_ca[PIXMAN_OP_DARKEN] = combine_darken_ca; - imp->combine_32_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca; - imp->combine_32_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca; - imp->combine_32_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca; - imp->combine_32_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca; -} diff --git a/vendor/pixman/pixman/pixman-combine32.h b/vendor/pixman/pixman/pixman-combine32.h deleted file mode 100644 index 59bb2477a..000000000 --- a/vendor/pixman/pixman/pixman-combine32.h +++ /dev/null @@ -1,272 +0,0 @@ -#define COMPONENT_SIZE 8 -#define MASK 0xff -#define ONE_HALF 0x80 - -#define A_SHIFT 8 * 3 -#define R_SHIFT 8 * 2 -#define G_SHIFT 8 -#define A_MASK 0xff000000 -#define R_MASK 0xff0000 -#define G_MASK 0xff00 - -#define RB_MASK 0xff00ff -#define AG_MASK 0xff00ff00 -#define RB_ONE_HALF 0x800080 -#define RB_MASK_PLUS_ONE 0x1000100 - -#define ALPHA_8(x) ((x) >> A_SHIFT) -#define RED_8(x) (((x) >> R_SHIFT) & MASK) -#define GREEN_8(x) (((x) >> G_SHIFT) & MASK) -#define BLUE_8(x) ((x) & MASK) - -/* - * ARMv6 has UQADD8 instruction, which implements unsigned saturated - * addition for 8-bit values packed in 32-bit registers. It is very useful - * for UN8x4_ADD_UN8x4, UN8_rb_ADD_UN8_rb and ADD_UN8 macros (which would - * otherwise need a lot of arithmetic operations to simulate this operation). - * Since most of the major ARM linux distros are built for ARMv7, we are - * much less dependent on runtime CPU detection and can get practical - * benefits from conditional compilation here for a lot of users. - */ - -#if defined(USE_GCC_INLINE_ASM) && defined(__arm__) && \ - !defined(__aarch64__) && (!defined(__thumb__) || defined(__thumb2__)) -#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ - defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ - defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \ - defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_7__) || \ - defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \ - defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) - -static force_inline uint32_t -un8x4_add_un8x4 (uint32_t x, uint32_t y) -{ - uint32_t t; - asm ("uqadd8 %0, %1, %2" : "=r" (t) : "%r" (x), "r" (y)); - return t; -} - -#define UN8x4_ADD_UN8x4(x, y) \ - ((x) = un8x4_add_un8x4 ((x), (y))) - -#define UN8_rb_ADD_UN8_rb(x, y, t) \ - ((t) = un8x4_add_un8x4 ((x), (y)), (x) = (t)) - -#define ADD_UN8(x, y, t) \ - ((t) = (x), un8x4_add_un8x4 ((t), (y))) - -#endif -#endif - -/*****************************************************************************/ - -/* - * Helper macros. - */ - -#define MUL_UN8(a, b, t) \ - ((t) = (a) * (uint16_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT )) - -#define DIV_UN8(a, b) \ - (((uint16_t) (a) * MASK + ((b) / 2)) / (b)) - -#ifndef ADD_UN8 -#define ADD_UN8(x, y, t) \ - ((t) = (x) + (y), \ - (uint32_t) (uint8_t) ((t) | (0 - ((t) >> G_SHIFT)))) -#endif - -#define DIV_ONE_UN8(x) \ - (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT) - -/* - * The methods below use some tricks to be able to do two color - * components at the same time. - */ - -/* - * x_rb = (x_rb * a) / 255 - */ -#define UN8_rb_MUL_UN8(x, a, t) \ - do \ - { \ - t = ((x) & RB_MASK) * (a); \ - t += RB_ONE_HALF; \ - x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ - x &= RB_MASK; \ - } while (0) - -/* - * x_rb = min (x_rb + y_rb, 255) - */ -#ifndef UN8_rb_ADD_UN8_rb -#define UN8_rb_ADD_UN8_rb(x, y, t) \ - do \ - { \ - t = ((x) + (y)); \ - t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ - x = (t & RB_MASK); \ - } while (0) -#endif - -/* - * x_rb = (x_rb * a_rb) / 255 - */ -#define UN8_rb_MUL_UN8_rb(x, a, t) \ - do \ - { \ - t = (x & MASK) * (a & MASK); \ - t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \ - t += RB_ONE_HALF; \ - t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ - x = t & RB_MASK; \ - } while (0) - -/* - * x_c = (x_c * a) / 255 - */ -#define UN8x4_MUL_UN8(x, a) \ - do \ - { \ - uint32_t r1__, r2__, t__; \ - \ - r1__ = (x); \ - UN8_rb_MUL_UN8 (r1__, (a), t__); \ - \ - r2__ = (x) >> G_SHIFT; \ - UN8_rb_MUL_UN8 (r2__, (a), t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a) / 255 + y_c - */ -#define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y) \ - do \ - { \ - uint32_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (y) & RB_MASK; \ - UN8_rb_MUL_UN8 (r1__, (a), t__); \ - UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ - \ - r2__ = (x) >> G_SHIFT; \ - r3__ = ((y) >> G_SHIFT) & RB_MASK; \ - UN8_rb_MUL_UN8 (r2__, (a), t__); \ - UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a + y_c * b) / 255 - */ -#define UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8(x, a, y, b) \ - do \ - { \ - uint32_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (y); \ - UN8_rb_MUL_UN8 (r1__, (a), t__); \ - UN8_rb_MUL_UN8 (r2__, (b), t__); \ - UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ - \ - r2__ = ((x) >> G_SHIFT); \ - r3__ = ((y) >> G_SHIFT); \ - UN8_rb_MUL_UN8 (r2__, (a), t__); \ - UN8_rb_MUL_UN8 (r3__, (b), t__); \ - UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a_c) / 255 - */ -#define UN8x4_MUL_UN8x4(x, a) \ - do \ - { \ - uint32_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (a); \ - UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ - \ - r2__ = (x) >> G_SHIFT; \ - r3__ = (a) >> G_SHIFT; \ - UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a_c) / 255 + y_c - */ -#define UN8x4_MUL_UN8x4_ADD_UN8x4(x, a, y) \ - do \ - { \ - uint32_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (a); \ - UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ - r2__ = (y) & RB_MASK; \ - UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ - \ - r2__ = ((x) >> G_SHIFT); \ - r3__ = ((a) >> G_SHIFT); \ - UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ - r3__ = ((y) >> G_SHIFT) & RB_MASK; \ - UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a_c + y_c * b) / 255 - */ -#define UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8(x, a, y, b) \ - do \ - { \ - uint32_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (a); \ - UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ - r2__ = (y); \ - UN8_rb_MUL_UN8 (r2__, (b), t__); \ - UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ - \ - r2__ = (x) >> G_SHIFT; \ - r3__ = (a) >> G_SHIFT; \ - UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ - r3__ = (y) >> G_SHIFT; \ - UN8_rb_MUL_UN8 (r3__, (b), t__); \ - UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ - \ - x = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - x_c = min(x_c + y_c, 255) -*/ -#ifndef UN8x4_ADD_UN8x4 -#define UN8x4_ADD_UN8x4(x, y) \ - do \ - { \ - uint32_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x) & RB_MASK; \ - r2__ = (y) & RB_MASK; \ - UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ - \ - r2__ = ((x) >> G_SHIFT) & RB_MASK; \ - r3__ = ((y) >> G_SHIFT) & RB_MASK; \ - UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ - \ - x = r1__ | (r2__ << G_SHIFT); \ - } while (0) -#endif diff --git a/vendor/pixman/pixman/pixman-compiler.h b/vendor/pixman/pixman/pixman-compiler.h deleted file mode 100644 index 639415693..000000000 --- a/vendor/pixman/pixman/pixman-compiler.h +++ /dev/null @@ -1,234 +0,0 @@ -/* Pixman uses some non-standard compiler features. This file ensures - * they exist - * - * The features are: - * - * FUNC must be defined to expand to the current function - * PIXMAN_EXPORT should be defined to whatever is required to - * export functions from a shared library - * limits limits for various types must be defined - * inline must be defined - * force_inline must be defined - */ -#if defined (__GNUC__) -# define FUNC ((const char*) (__PRETTY_FUNCTION__)) -#elif defined (__sun) || (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) -# define FUNC ((const char*) (__func__)) -#else -# define FUNC ((const char*) ("???")) -#endif - -#if defined (__GNUC__) -# define unlikely(expr) __builtin_expect ((expr), 0) -#else -# define unlikely(expr) (expr) -#endif - -#if defined (__GNUC__) -# define MAYBE_UNUSED __attribute__((unused)) -#else -# define MAYBE_UNUSED -#endif - -#ifndef INT16_MIN -# define INT16_MIN (-32767-1) -#endif - -#ifndef INT16_MAX -# define INT16_MAX (32767) -#endif - -#ifndef INT32_MIN -# define INT32_MIN (-2147483647-1) -#endif - -#ifndef INT32_MAX -# define INT32_MAX (2147483647) -#endif - -#ifndef UINT32_MIN -# define UINT32_MIN (0) -#endif - -#ifndef UINT32_MAX -# define UINT32_MAX (4294967295U) -#endif - -#ifndef INT64_MIN -# define INT64_MIN (-9223372036854775807-1) -#endif - -#ifndef INT64_MAX -# define INT64_MAX (9223372036854775807) -#endif - -#ifndef SIZE_MAX -# define SIZE_MAX ((size_t)-1) -#endif - - -#ifndef M_PI -# define M_PI 3.14159265358979323846 -#endif - -#ifdef _MSC_VER -/* 'inline' is available only in C++ in MSVC */ -# define inline __inline -# define force_inline __forceinline -# define noinline __declspec(noinline) -#elif defined __GNUC__ || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) -# define inline __inline__ -# define force_inline __inline__ __attribute__ ((__always_inline__)) -# define noinline __attribute__((noinline)) -#else -# ifndef force_inline -# define force_inline inline -# endif -# ifndef noinline -# define noinline -# endif -#endif - -/* GCC visibility */ -#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(_WIN32) -# define PIXMAN_EXPORT __attribute__ ((visibility("default"))) -/* Sun Studio 8 visibility */ -#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550) -# define PIXMAN_EXPORT __global -#elif defined (_MSC_VER) || defined(__MINGW32__) -# define PIXMAN_EXPORT PIXMAN_API -#else -# define PIXMAN_EXPORT -#endif - -/* member offsets */ -#define CONTAINER_OF(type, member, data) \ - ((type *)(((uint8_t *)data) - offsetof (type, member))) - -/* TLS */ -#if defined(PIXMAN_NO_TLS) - -# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \ - static type name; -# define PIXMAN_GET_THREAD_LOCAL(name) \ - (&name) - -#elif defined(TLS) - -# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \ - static TLS type name; -# define PIXMAN_GET_THREAD_LOCAL(name) \ - (&name) - -#elif defined(__MINGW32__) - -# define _NO_W32_PSEUDO_MODIFIERS -# include - -# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \ - static volatile int tls_ ## name ## _initialized = 0; \ - static void *tls_ ## name ## _mutex = NULL; \ - static unsigned tls_ ## name ## _index; \ - \ - static type * \ - tls_ ## name ## _alloc (void) \ - { \ - type *value = calloc (1, sizeof (type)); \ - if (value) \ - TlsSetValue (tls_ ## name ## _index, value); \ - return value; \ - } \ - \ - static force_inline type * \ - tls_ ## name ## _get (void) \ - { \ - type *value; \ - if (!tls_ ## name ## _initialized) \ - { \ - if (!tls_ ## name ## _mutex) \ - { \ - void *mutex = CreateMutexA (NULL, 0, NULL); \ - if (InterlockedCompareExchangePointer ( \ - &tls_ ## name ## _mutex, mutex, NULL) != NULL) \ - { \ - CloseHandle (mutex); \ - } \ - } \ - WaitForSingleObject (tls_ ## name ## _mutex, 0xFFFFFFFF); \ - if (!tls_ ## name ## _initialized) \ - { \ - tls_ ## name ## _index = TlsAlloc (); \ - tls_ ## name ## _initialized = 1; \ - } \ - ReleaseMutex (tls_ ## name ## _mutex); \ - } \ - if (tls_ ## name ## _index == 0xFFFFFFFF) \ - return NULL; \ - value = TlsGetValue (tls_ ## name ## _index); \ - if (!value) \ - value = tls_ ## name ## _alloc (); \ - return value; \ - } - -# define PIXMAN_GET_THREAD_LOCAL(name) \ - tls_ ## name ## _get () - -#elif defined(_MSC_VER) - -# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \ - static __declspec(thread) type name; -# define PIXMAN_GET_THREAD_LOCAL(name) \ - (&name) - -#elif defined(HAVE_PTHREADS) - -#include - -# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \ - static pthread_once_t tls_ ## name ## _once_control = PTHREAD_ONCE_INIT; \ - static pthread_key_t tls_ ## name ## _key; \ - \ - static void \ - tls_ ## name ## _destroy_value (void *value) \ - { \ - free (value); \ - } \ - \ - static void \ - tls_ ## name ## _make_key (void) \ - { \ - pthread_key_create (&tls_ ## name ## _key, \ - tls_ ## name ## _destroy_value); \ - } \ - \ - static type * \ - tls_ ## name ## _alloc (void) \ - { \ - type *value = calloc (1, sizeof (type)); \ - if (value) \ - pthread_setspecific (tls_ ## name ## _key, value); \ - return value; \ - } \ - \ - static force_inline type * \ - tls_ ## name ## _get (void) \ - { \ - type *value = NULL; \ - if (pthread_once (&tls_ ## name ## _once_control, \ - tls_ ## name ## _make_key) == 0) \ - { \ - value = pthread_getspecific (tls_ ## name ## _key); \ - if (!value) \ - value = tls_ ## name ## _alloc (); \ - } \ - return value; \ - } - -# define PIXMAN_GET_THREAD_LOCAL(name) \ - tls_ ## name ## _get () - -#else - -# error "Unknown thread local support for this system. Pixman will not work with multiple threads. Define PIXMAN_NO_TLS to acknowledge and accept this limitation and compile pixman without thread-safety support." - -#endif diff --git a/vendor/pixman/pixman/pixman-conical-gradient.c b/vendor/pixman/pixman/pixman-conical-gradient.c deleted file mode 100644 index 37dfffd73..000000000 --- a/vendor/pixman/pixman/pixman-conical-gradient.c +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. - * 2005 Lars Knoll & Zack Rusin, Trolltech - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include -#include "pixman-private.h" - -static force_inline double -coordinates_to_parameter (double x, double y, double angle) -{ - double t; - - t = atan2 (y, x) + angle; - - while (t < 0) - t += 2 * M_PI; - - while (t >= 2 * M_PI) - t -= 2 * M_PI; - - return 1 - t * (1 / (2 * M_PI)); /* Scale t to [0, 1] and - * make rotation CCW - */ -} - -static uint32_t * -conical_get_scanline (pixman_iter_t *iter, - const uint32_t *mask, - int Bpp, - pixman_gradient_walker_write_t write_pixel) -{ - pixman_image_t *image = iter->image; - int x = iter->x; - int y = iter->y; - int width = iter->width; - uint32_t *buffer = iter->buffer; - - gradient_t *gradient = (gradient_t *)image; - conical_gradient_t *conical = (conical_gradient_t *)image; - uint32_t *end = buffer + width * (Bpp / 4); - pixman_gradient_walker_t walker; - pixman_bool_t affine = TRUE; - double cx = 1.; - double cy = 0.; - double cz = 0.; - double rx = x + 0.5; - double ry = y + 0.5; - double rz = 1.; - - _pixman_gradient_walker_init (&walker, gradient, image->common.repeat); - - if (image->common.transform) - { - pixman_vector_t v; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (image->common.transform, &v)) - return iter->buffer; - - cx = image->common.transform->matrix[0][0] / 65536.; - cy = image->common.transform->matrix[1][0] / 65536.; - cz = image->common.transform->matrix[2][0] / 65536.; - - rx = v.vector[0] / 65536.; - ry = v.vector[1] / 65536.; - rz = v.vector[2] / 65536.; - - affine = - image->common.transform->matrix[2][0] == 0 && - v.vector[2] == pixman_fixed_1; - } - - if (affine) - { - rx -= conical->center.x / 65536.; - ry -= conical->center.y / 65536.; - - while (buffer < end) - { - if (!mask || *mask++) - { - double t = coordinates_to_parameter (rx, ry, conical->angle); - - write_pixel (&walker, - (pixman_fixed_48_16_t)pixman_double_to_fixed (t), - buffer); - } - - buffer += (Bpp / 4); - - rx += cx; - ry += cy; - } - } - else - { - while (buffer < end) - { - double x, y; - - if (!mask || *mask++) - { - double t; - - if (rz != 0) - { - x = rx / rz; - y = ry / rz; - } - else - { - x = y = 0.; - } - - x -= conical->center.x / 65536.; - y -= conical->center.y / 65536.; - - t = coordinates_to_parameter (x, y, conical->angle); - - write_pixel (&walker, - (pixman_fixed_48_16_t)pixman_double_to_fixed (t), - buffer); - } - - buffer += (Bpp / 4); - - rx += cx; - ry += cy; - rz += cz; - } - } - - iter->y++; - return iter->buffer; -} - -static uint32_t * -conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) -{ - return conical_get_scanline (iter, mask, 4, - _pixman_gradient_walker_write_narrow); -} - -static uint32_t * -conical_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) -{ - return conical_get_scanline (iter, NULL, 16, - _pixman_gradient_walker_write_wide); -} - -void -_pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter) -{ - if (iter->iter_flags & ITER_NARROW) - iter->get_scanline = conical_get_scanline_narrow; - else - iter->get_scanline = conical_get_scanline_wide; -} - -PIXMAN_EXPORT pixman_image_t * -pixman_image_create_conical_gradient (const pixman_point_fixed_t * center, - pixman_fixed_t angle, - const pixman_gradient_stop_t *stops, - int n_stops) -{ - pixman_image_t *image = _pixman_image_allocate (); - conical_gradient_t *conical; - - if (!image) - return NULL; - - conical = &image->conical; - - if (!_pixman_init_gradient (&conical->common, stops, n_stops)) - { - free (image); - return NULL; - } - - angle = MOD (angle, pixman_int_to_fixed (360)); - - image->type = CONICAL; - - conical->center = *center; - conical->angle = (pixman_fixed_to_double (angle) / 180.0) * M_PI; - - return image; -} - diff --git a/vendor/pixman/pixman/pixman-edge-accessors.c b/vendor/pixman/pixman/pixman-edge-accessors.c deleted file mode 100644 index ea3a31e2f..000000000 --- a/vendor/pixman/pixman/pixman-edge-accessors.c +++ /dev/null @@ -1,4 +0,0 @@ - -#define PIXMAN_FB_ACCESSORS - -#include "pixman-edge.c" diff --git a/vendor/pixman/pixman/pixman-edge-imp.h b/vendor/pixman/pixman/pixman-edge-imp.h deleted file mode 100644 index a4698eddb..000000000 --- a/vendor/pixman/pixman/pixman-edge-imp.h +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Copyright © 2004 Keith Packard - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef rasterize_span -#endif - -static void -RASTERIZE_EDGES (pixman_image_t *image, - pixman_edge_t *l, - pixman_edge_t *r, - pixman_fixed_t t, - pixman_fixed_t b) -{ - pixman_fixed_t y = t; - uint32_t *line; - uint32_t *buf = (image)->bits.bits; - int stride = (image)->bits.rowstride; - int width = (image)->bits.width; - - line = buf + pixman_fixed_to_int (y) * stride; - - for (;;) - { - pixman_fixed_t lx; - pixman_fixed_t rx; - int lxi; - int rxi; - - lx = l->x; - rx = r->x; -#if N_BITS == 1 - /* For the non-antialiased case, round the coordinates up, in effect - * sampling just slightly to the left of the pixel. This is so that - * when the sample point lies exactly on the line, we round towards - * north-west. - * - * (The AA case does a similar adjustment in RENDER_SAMPLES_X) - */ - lx += X_FRAC_FIRST(1) - pixman_fixed_e; - rx += X_FRAC_FIRST(1) - pixman_fixed_e; -#endif - /* clip X */ - if (lx < 0) - lx = 0; - if (pixman_fixed_to_int (rx) >= width) -#if N_BITS == 1 - rx = pixman_int_to_fixed (width); -#else - /* Use the last pixel of the scanline, covered 100%. - * We can't use the first pixel following the scanline, - * because accessing it could result in a buffer overrun. - */ - rx = pixman_int_to_fixed (width) - 1; -#endif - - /* Skip empty (or backwards) sections */ - if (rx > lx) - { - - /* Find pixel bounds for span */ - lxi = pixman_fixed_to_int (lx); - rxi = pixman_fixed_to_int (rx); - -#if N_BITS == 1 - { - -#define LEFT_MASK(x) \ - (((x) & 0x1f) ? \ - SCREEN_SHIFT_RIGHT (0xffffffff, (x) & 0x1f) : 0) -#define RIGHT_MASK(x) \ - (((32 - (x)) & 0x1f) ? \ - SCREEN_SHIFT_LEFT (0xffffffff, (32 - (x)) & 0x1f) : 0) - -#define MASK_BITS(x,w,l,n,r) { \ - n = (w); \ - r = RIGHT_MASK ((x) + n); \ - l = LEFT_MASK (x); \ - if (l) { \ - n -= 32 - ((x) & 0x1f); \ - if (n < 0) { \ - n = 0; \ - l &= r; \ - r = 0; \ - } \ - } \ - n >>= 5; \ - } - - uint32_t *a = line; - uint32_t startmask; - uint32_t endmask; - int nmiddle; - int width = rxi - lxi; - int x = lxi; - - a += x >> 5; - x &= 0x1f; - - MASK_BITS (x, width, startmask, nmiddle, endmask); - - if (startmask) { - WRITE(image, a, READ(image, a) | startmask); - a++; - } - while (nmiddle--) - WRITE(image, a++, 0xffffffff); - if (endmask) - WRITE(image, a, READ(image, a) | endmask); - } -#else - { - DEFINE_ALPHA(line,lxi); - int lxs; - int rxs; - - /* Sample coverage for edge pixels */ - lxs = RENDER_SAMPLES_X (lx, N_BITS); - rxs = RENDER_SAMPLES_X (rx, N_BITS); - - /* Add coverage across row */ - if (lxi == rxi) - { - ADD_ALPHA (rxs - lxs); - } - else - { - int xi; - - ADD_ALPHA (N_X_FRAC(N_BITS) - lxs); - STEP_ALPHA; - for (xi = lxi + 1; xi < rxi; xi++) - { - ADD_ALPHA (N_X_FRAC(N_BITS)); - STEP_ALPHA; - } - ADD_ALPHA (rxs); - } - } -#endif - } - - if (y == b) - break; - -#if N_BITS > 1 - if (pixman_fixed_frac (y) != Y_FRAC_LAST(N_BITS)) - { - RENDER_EDGE_STEP_SMALL (l); - RENDER_EDGE_STEP_SMALL (r); - y += STEP_Y_SMALL(N_BITS); - } - else -#endif - { - RENDER_EDGE_STEP_BIG (l); - RENDER_EDGE_STEP_BIG (r); - y += STEP_Y_BIG(N_BITS); - line += stride; - } - } -} - -#undef rasterize_span diff --git a/vendor/pixman/pixman/pixman-edge.c b/vendor/pixman/pixman/pixman-edge.c deleted file mode 100644 index c324cd3d4..000000000 --- a/vendor/pixman/pixman/pixman-edge.c +++ /dev/null @@ -1,385 +0,0 @@ -/* - * Copyright © 2004 Keith Packard - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include - -#include "pixman-private.h" -#include "pixman-accessor.h" - -/* - * Step across a small sample grid gap - */ -#define RENDER_EDGE_STEP_SMALL(edge) \ - { \ - edge->x += edge->stepx_small; \ - edge->e += edge->dx_small; \ - if (edge->e > 0) \ - { \ - edge->e -= edge->dy; \ - edge->x += edge->signdx; \ - } \ - } - -/* - * Step across a large sample grid gap - */ -#define RENDER_EDGE_STEP_BIG(edge) \ - { \ - edge->x += edge->stepx_big; \ - edge->e += edge->dx_big; \ - if (edge->e > 0) \ - { \ - edge->e -= edge->dy; \ - edge->x += edge->signdx; \ - } \ - } - -#ifdef PIXMAN_FB_ACCESSORS -#define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_accessors -#else -#define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_no_accessors -#endif - -/* - * 4 bit alpha - */ - -#define N_BITS 4 -#define RASTERIZE_EDGES rasterize_edges_4 - -#ifndef WORDS_BIGENDIAN -#define SHIFT_4(o) ((o) << 2) -#else -#define SHIFT_4(o) ((1 - (o)) << 2) -#endif - -#define GET_4(x, o) (((x) >> SHIFT_4 (o)) & 0xf) -#define PUT_4(x, o, v) \ - (((x) & ~(0xf << SHIFT_4 (o))) | (((v) & 0xf) << SHIFT_4 (o))) - -#define DEFINE_ALPHA(line, x) \ - uint8_t *__ap = (uint8_t *) line + ((x) >> 1); \ - int __ao = (x) & 1 - -#define STEP_ALPHA ((__ap += __ao), (__ao ^= 1)) - -#define ADD_ALPHA(a) \ - { \ - uint8_t __o = READ (image, __ap); \ - uint8_t __a = (a) + GET_4 (__o, __ao); \ - WRITE (image, __ap, PUT_4 (__o, __ao, __a | (0 - ((__a) >> 4)))); \ - } - -#include "pixman-edge-imp.h" - -#undef ADD_ALPHA -#undef STEP_ALPHA -#undef DEFINE_ALPHA -#undef RASTERIZE_EDGES -#undef N_BITS - - -/* - * 1 bit alpha - */ - -#define N_BITS 1 -#define RASTERIZE_EDGES rasterize_edges_1 - -#include "pixman-edge-imp.h" - -#undef RASTERIZE_EDGES -#undef N_BITS - -/* - * 8 bit alpha - */ - -static force_inline uint8_t -clip255 (int x) -{ - if (x > 255) - return 255; - - return x; -} - -#define ADD_SATURATE_8(buf, val, length) \ - do \ - { \ - int i__ = (length); \ - uint8_t *buf__ = (buf); \ - int val__ = (val); \ - \ - while (i__--) \ - { \ - WRITE (image, (buf__), clip255 (READ (image, (buf__)) + (val__))); \ - (buf__)++; \ - } \ - } while (0) - -/* - * We want to detect the case where we add the same value to a long - * span of pixels. The triangles on the end are filled in while we - * count how many sub-pixel scanlines contribute to the middle section. - * - * +--------------------------+ - * fill_height =| \ / - * +------------------+ - * |================| - * fill_start fill_end - */ -static void -rasterize_edges_8 (pixman_image_t *image, - pixman_edge_t * l, - pixman_edge_t * r, - pixman_fixed_t t, - pixman_fixed_t b) -{ - pixman_fixed_t y = t; - uint32_t *line; - int fill_start = -1, fill_end = -1; - int fill_size = 0; - uint32_t *buf = (image)->bits.bits; - int stride = (image)->bits.rowstride; - int width = (image)->bits.width; - - line = buf + pixman_fixed_to_int (y) * stride; - - for (;;) - { - uint8_t *ap = (uint8_t *) line; - pixman_fixed_t lx, rx; - int lxi, rxi; - - /* clip X */ - lx = l->x; - if (lx < 0) - lx = 0; - - rx = r->x; - - if (pixman_fixed_to_int (rx) >= width) - { - /* Use the last pixel of the scanline, covered 100%. - * We can't use the first pixel following the scanline, - * because accessing it could result in a buffer overrun. - */ - rx = pixman_int_to_fixed (width) - 1; - } - - /* Skip empty (or backwards) sections */ - if (rx > lx) - { - int lxs, rxs; - - /* Find pixel bounds for span. */ - lxi = pixman_fixed_to_int (lx); - rxi = pixman_fixed_to_int (rx); - - /* Sample coverage for edge pixels */ - lxs = RENDER_SAMPLES_X (lx, 8); - rxs = RENDER_SAMPLES_X (rx, 8); - - /* Add coverage across row */ - if (lxi == rxi) - { - WRITE (image, ap + lxi, - clip255 (READ (image, ap + lxi) + rxs - lxs)); - } - else - { - WRITE (image, ap + lxi, - clip255 (READ (image, ap + lxi) + N_X_FRAC (8) - lxs)); - - /* Move forward so that lxi/rxi is the pixel span */ - lxi++; - - /* Don't bother trying to optimize the fill unless - * the span is longer than 4 pixels. */ - if (rxi - lxi > 4) - { - if (fill_start < 0) - { - fill_start = lxi; - fill_end = rxi; - fill_size++; - } - else - { - if (lxi >= fill_end || rxi < fill_start) - { - /* We're beyond what we saved, just fill it */ - ADD_SATURATE_8 (ap + fill_start, - fill_size * N_X_FRAC (8), - fill_end - fill_start); - fill_start = lxi; - fill_end = rxi; - fill_size = 1; - } - else - { - /* Update fill_start */ - if (lxi > fill_start) - { - ADD_SATURATE_8 (ap + fill_start, - fill_size * N_X_FRAC (8), - lxi - fill_start); - fill_start = lxi; - } - else if (lxi < fill_start) - { - ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), - fill_start - lxi); - } - - /* Update fill_end */ - if (rxi < fill_end) - { - ADD_SATURATE_8 (ap + rxi, - fill_size * N_X_FRAC (8), - fill_end - rxi); - fill_end = rxi; - } - else if (fill_end < rxi) - { - ADD_SATURATE_8 (ap + fill_end, - N_X_FRAC (8), - rxi - fill_end); - } - fill_size++; - } - } - } - else - { - ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), rxi - lxi); - } - - WRITE (image, ap + rxi, clip255 (READ (image, ap + rxi) + rxs)); - } - } - - if (y == b) - { - /* We're done, make sure we clean up any remaining fill. */ - if (fill_start != fill_end) - { - if (fill_size == N_Y_FRAC (8)) - { - MEMSET_WRAPPED (image, ap + fill_start, - 0xff, fill_end - fill_start); - } - else - { - ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8), - fill_end - fill_start); - } - } - break; - } - - if (pixman_fixed_frac (y) != Y_FRAC_LAST (8)) - { - RENDER_EDGE_STEP_SMALL (l); - RENDER_EDGE_STEP_SMALL (r); - y += STEP_Y_SMALL (8); - } - else - { - RENDER_EDGE_STEP_BIG (l); - RENDER_EDGE_STEP_BIG (r); - y += STEP_Y_BIG (8); - if (fill_start != fill_end) - { - if (fill_size == N_Y_FRAC (8)) - { - MEMSET_WRAPPED (image, ap + fill_start, - 0xff, fill_end - fill_start); - } - else - { - ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8), - fill_end - fill_start); - } - - fill_start = fill_end = -1; - fill_size = 0; - } - - line += stride; - } - } -} - -#ifndef PIXMAN_FB_ACCESSORS -static -#endif -void -PIXMAN_RASTERIZE_EDGES (pixman_image_t *image, - pixman_edge_t * l, - pixman_edge_t * r, - pixman_fixed_t t, - pixman_fixed_t b) -{ - switch (PIXMAN_FORMAT_BPP (image->bits.format)) - { - case 1: - rasterize_edges_1 (image, l, r, t, b); - break; - - case 4: - rasterize_edges_4 (image, l, r, t, b); - break; - - case 8: - rasterize_edges_8 (image, l, r, t, b); - break; - - default: - break; - } -} - -#ifndef PIXMAN_FB_ACCESSORS - -PIXMAN_EXPORT void -pixman_rasterize_edges (pixman_image_t *image, - pixman_edge_t * l, - pixman_edge_t * r, - pixman_fixed_t t, - pixman_fixed_t b) -{ - return_if_fail (image->type == BITS); - return_if_fail (PIXMAN_FORMAT_TYPE (image->bits.format) == PIXMAN_TYPE_A); - - if (image->bits.read_func || image->bits.write_func) - pixman_rasterize_edges_accessors (image, l, r, t, b); - else - pixman_rasterize_edges_no_accessors (image, l, r, t, b); -} - -#endif diff --git a/vendor/pixman/pixman/pixman-fast-path.c b/vendor/pixman/pixman/pixman-fast-path.c deleted file mode 100644 index 4579fce99..000000000 --- a/vendor/pixman/pixman/pixman-fast-path.c +++ /dev/null @@ -1,3298 +0,0 @@ -/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Author: Keith Packard, SuSE, Inc. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif -#include -#include -#include "pixman-private.h" -#include "pixman-combine32.h" -#include "pixman-inlines.h" - -static force_inline uint32_t -fetch_24 (uint8_t *a) -{ - if (((uintptr_t)a) & 1) - { -#ifdef WORDS_BIGENDIAN - return (*a << 16) | (*(uint16_t *)(a + 1)); -#else - return *a | (*(uint16_t *)(a + 1) << 8); -#endif - } - else - { -#ifdef WORDS_BIGENDIAN - return (*(uint16_t *)a << 8) | *(a + 2); -#else - return *(uint16_t *)a | (*(a + 2) << 16); -#endif - } -} - -static force_inline void -store_24 (uint8_t *a, - uint32_t v) -{ - if (((uintptr_t)a) & 1) - { -#ifdef WORDS_BIGENDIAN - *a = (uint8_t) (v >> 16); - *(uint16_t *)(a + 1) = (uint16_t) (v); -#else - *a = (uint8_t) (v); - *(uint16_t *)(a + 1) = (uint16_t) (v >> 8); -#endif - } - else - { -#ifdef WORDS_BIGENDIAN - *(uint16_t *)a = (uint16_t)(v >> 8); - *(a + 2) = (uint8_t)v; -#else - *(uint16_t *)a = (uint16_t)v; - *(a + 2) = (uint8_t)(v >> 16); -#endif - } -} - -static force_inline uint32_t -over (uint32_t src, - uint32_t dest) -{ - uint32_t a = ~src >> 24; - - UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src); - - return dest; -} - -static force_inline uint32_t -in (uint32_t x, - uint8_t y) -{ - uint16_t a = y; - - UN8x4_MUL_UN8 (x, a); - - return x; -} - -/* - * Naming convention: - * - * op_src_mask_dest - */ -static void -fast_composite_over_x888_8_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *src, *src_line; - uint32_t *dst, *dst_line; - uint8_t *mask, *mask_line; - int src_stride, mask_stride, dst_stride; - uint8_t m; - uint32_t s, d; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - src = src_line; - src_line += src_stride; - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - - w = width; - while (w--) - { - m = *mask++; - if (m) - { - s = *src | 0xff000000; - - if (m == 0xff) - { - *dst = s; - } - else - { - d = in (s, m); - *dst = over (d, *dst); - } - } - src++; - dst++; - } - } -} - -static void -fast_composite_in_n_8_8 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, srca; - uint8_t *dst_line, *dst; - uint8_t *mask_line, *mask, m; - int dst_stride, mask_stride; - int32_t w; - uint16_t t; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - srca = src >> 24; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - if (srca == 0xff) - { - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - m = *mask++; - - if (m == 0) - *dst = 0; - else if (m != 0xff) - *dst = MUL_UN8 (m, *dst, t); - - dst++; - } - } - } - else - { - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - m = *mask++; - m = MUL_UN8 (m, srca, t); - - if (m == 0) - *dst = 0; - else if (m != 0xff) - *dst = MUL_UN8 (m, *dst, t); - - dst++; - } - } - } -} - -static void -fast_composite_in_8_8 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint8_t *dst_line, *dst; - uint8_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; - uint8_t s; - uint16_t t; - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - - if (s == 0) - *dst = 0; - else if (s != 0xff) - *dst = MUL_UN8 (s, *dst, t); - - dst++; - } - } -} - -static void -fast_composite_over_n_8_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, srca; - uint32_t *dst_line, *dst, d; - uint8_t *mask_line, *mask, m; - int dst_stride, mask_stride; - int32_t w; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - srca = src >> 24; - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - m = *mask++; - if (m == 0xff) - { - if (srca == 0xff) - *dst = src; - else - *dst = over (src, *dst); - } - else if (m) - { - d = in (src, m); - *dst = over (d, *dst); - } - dst++; - } - } -} - -static void -fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, s; - uint32_t *dst_line, *dst, d; - uint32_t *mask_line, *mask, ma; - int dst_stride, mask_stride; - int32_t w; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - ma = *mask++; - - if (ma) - { - d = *dst; - s = src; - - UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d); - - *dst = s; - } - - dst++; - } - } -} - -static void -fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, srca, s; - uint32_t *dst_line, *dst, d; - uint32_t *mask_line, *mask, ma; - int dst_stride, mask_stride; - int32_t w; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - srca = src >> 24; - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - ma = *mask++; - if (ma == 0xffffffff) - { - if (srca == 0xff) - *dst = src; - else - *dst = over (src, *dst); - } - else if (ma) - { - d = *dst; - s = src; - - UN8x4_MUL_UN8x4 (s, ma); - UN8x4_MUL_UN8 (ma, srca); - ma = ~ma; - UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); - - *dst = d; - } - - dst++; - } - } -} - -static void -fast_composite_over_n_8_0888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, srca; - uint8_t *dst_line, *dst; - uint32_t d; - uint8_t *mask_line, *mask, m; - int dst_stride, mask_stride; - int32_t w; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - srca = src >> 24; - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - m = *mask++; - if (m == 0xff) - { - if (srca == 0xff) - { - d = src; - } - else - { - d = fetch_24 (dst); - d = over (src, d); - } - store_24 (dst, d); - } - else if (m) - { - d = over (in (src, m), fetch_24 (dst)); - store_24 (dst, d); - } - dst += 3; - } - } -} - -static void -fast_composite_over_n_8_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, srca; - uint16_t *dst_line, *dst; - uint32_t d; - uint8_t *mask_line, *mask, m; - int dst_stride, mask_stride; - int32_t w; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - srca = src >> 24; - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - m = *mask++; - if (m == 0xff) - { - if (srca == 0xff) - { - d = src; - } - else - { - d = *dst; - d = over (src, convert_0565_to_0888 (d)); - } - *dst = convert_8888_to_0565 (d); - } - else if (m) - { - d = *dst; - d = over (in (src, m), convert_0565_to_0888 (d)); - *dst = convert_8888_to_0565 (d); - } - dst++; - } - } -} - -static void -fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, srca, s; - uint16_t src16; - uint16_t *dst_line, *dst; - uint32_t d; - uint32_t *mask_line, *mask, ma; - int dst_stride, mask_stride; - int32_t w; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - srca = src >> 24; - if (src == 0) - return; - - src16 = convert_8888_to_0565 (src); - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - ma = *mask++; - if (ma == 0xffffffff) - { - if (srca == 0xff) - { - *dst = src16; - } - else - { - d = *dst; - d = over (src, convert_0565_to_0888 (d)); - *dst = convert_8888_to_0565 (d); - } - } - else if (ma) - { - d = *dst; - d = convert_0565_to_0888 (d); - - s = src; - - UN8x4_MUL_UN8x4 (s, ma); - UN8x4_MUL_UN8 (ma, srca); - ma = ~ma; - UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); - - *dst = convert_8888_to_0565 (d); - } - dst++; - } - } -} - -static void -fast_composite_over_8888_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t *src_line, *src, s; - int dst_stride, src_stride; - uint8_t a; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - a = s >> 24; - if (a == 0xff) - *dst = s; - else if (s) - *dst = over (s, *dst); - dst++; - } - } -} - -static void -fast_composite_src_x888_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - *dst++ = (*src++) | 0xff000000; - } -} - -#if 0 -static void -fast_composite_over_8888_0888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint8_t *dst_line, *dst; - uint32_t d; - uint32_t *src_line, *src, s; - uint8_t a; - int dst_stride, src_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - a = s >> 24; - if (a) - { - if (a == 0xff) - d = s; - else - d = over (s, fetch_24 (dst)); - - store_24 (dst, d); - } - dst += 3; - } - } -} -#endif - -static void -fast_composite_over_8888_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint16_t *dst_line, *dst; - uint32_t d; - uint32_t *src_line, *src, s; - uint8_t a; - int dst_stride, src_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - a = s >> 24; - if (s) - { - if (a == 0xff) - { - d = s; - } - else - { - d = *dst; - d = over (s, convert_0565_to_0888 (d)); - } - *dst = convert_8888_to_0565 (d); - } - dst++; - } - } -} - -static void -fast_composite_add_8_8 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint8_t *dst_line, *dst; - uint8_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; - uint8_t s, d; - uint16_t t; - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - if (s) - { - if (s != 0xff) - { - d = *dst; - t = d + s; - s = t | (0 - (t >> 8)); - } - *dst = s; - } - dst++; - } - } -} - -static void -fast_composite_add_0565_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint16_t *dst_line, *dst; - uint32_t d; - uint16_t *src_line, *src; - uint32_t s; - int dst_stride, src_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - if (s) - { - d = *dst; - s = convert_0565_to_8888 (s); - if (d) - { - d = convert_0565_to_8888 (d); - UN8x4_ADD_UN8x4 (s, d); - } - *dst = convert_8888_to_0565 (s); - } - dst++; - } - } -} - -static void -fast_composite_add_8888_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; - uint32_t s, d; - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - if (s) - { - if (s != 0xffffffff) - { - d = *dst; - if (d) - UN8x4_ADD_UN8x4 (s, d); - } - *dst = s; - } - dst++; - } - } -} - -static void -fast_composite_add_n_8_8 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint8_t *dst_line, *dst; - uint8_t *mask_line, *mask; - int dst_stride, mask_stride; - int32_t w; - uint32_t src; - uint8_t sa; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - sa = (src >> 24); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - uint16_t tmp; - uint16_t a; - uint32_t m, d; - uint32_t r; - - a = *mask++; - d = *dst; - - m = MUL_UN8 (sa, a, tmp); - r = ADD_UN8 (m, d, tmp); - - *dst++ = r; - } - } -} - -#ifdef WORDS_BIGENDIAN -#define CREATE_BITMASK(n) (0x80000000 >> (n)) -#define UPDATE_BITMASK(n) ((n) >> 1) -#else -#define CREATE_BITMASK(n) (1U << (n)) -#define UPDATE_BITMASK(n) ((n) << 1) -#endif - -#define TEST_BIT(p, n) \ - (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31)) -#define SET_BIT(p, n) \ - do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0); - -static void -fast_composite_add_1_1 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t, - src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t, - dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - /* - * TODO: improve performance by processing uint32_t data instead - * of individual bits - */ - if (TEST_BIT (src, src_x + w)) - SET_BIT (dst, dest_x + w); - } - } -} - -static void -fast_composite_over_n_1_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, srca; - uint32_t *dst, *dst_line; - uint32_t *mask, *mask_line; - int mask_stride, dst_stride; - uint32_t bitcache, bitmask; - int32_t w; - - if (width <= 0) - return; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - srca = src >> 24; - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, - dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, - mask_stride, mask_line, 1); - mask_line += mask_x >> 5; - - if (srca == 0xff) - { - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - bitcache = *mask++; - bitmask = CREATE_BITMASK (mask_x & 31); - - while (w--) - { - if (bitmask == 0) - { - bitcache = *mask++; - bitmask = CREATE_BITMASK (0); - } - if (bitcache & bitmask) - *dst = src; - bitmask = UPDATE_BITMASK (bitmask); - dst++; - } - } - } - else - { - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - bitcache = *mask++; - bitmask = CREATE_BITMASK (mask_x & 31); - - while (w--) - { - if (bitmask == 0) - { - bitcache = *mask++; - bitmask = CREATE_BITMASK (0); - } - if (bitcache & bitmask) - *dst = over (src, *dst); - bitmask = UPDATE_BITMASK (bitmask); - dst++; - } - } - } -} - -static void -fast_composite_over_n_1_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, srca; - uint16_t *dst, *dst_line; - uint32_t *mask, *mask_line; - int mask_stride, dst_stride; - uint32_t bitcache, bitmask; - int32_t w; - uint32_t d; - uint16_t src565; - - if (width <= 0) - return; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - srca = src >> 24; - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, - dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, - mask_stride, mask_line, 1); - mask_line += mask_x >> 5; - - if (srca == 0xff) - { - src565 = convert_8888_to_0565 (src); - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - bitcache = *mask++; - bitmask = CREATE_BITMASK (mask_x & 31); - - while (w--) - { - if (bitmask == 0) - { - bitcache = *mask++; - bitmask = CREATE_BITMASK (0); - } - if (bitcache & bitmask) - *dst = src565; - bitmask = UPDATE_BITMASK (bitmask); - dst++; - } - } - } - else - { - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - bitcache = *mask++; - bitmask = CREATE_BITMASK (mask_x & 31); - - while (w--) - { - if (bitmask == 0) - { - bitcache = *mask++; - bitmask = CREATE_BITMASK (0); - } - if (bitcache & bitmask) - { - d = over (src, convert_0565_to_0888 (*dst)); - *dst = convert_8888_to_0565 (d); - } - bitmask = UPDATE_BITMASK (bitmask); - dst++; - } - } - } -} - -/* - * Simple bitblt - */ - -static void -fast_composite_solid_fill (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - if (dest_image->bits.format == PIXMAN_a1) - { - src = src >> 31; - } - else if (dest_image->bits.format == PIXMAN_a8) - { - src = src >> 24; - } - else if (dest_image->bits.format == PIXMAN_r5g6b5 || - dest_image->bits.format == PIXMAN_b5g6r5) - { - src = convert_8888_to_0565 (src); - } - - pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, - PIXMAN_FORMAT_BPP (dest_image->bits.format), - dest_x, dest_y, - width, height, - src); -} - -static void -fast_composite_src_memcpy (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8; - uint32_t n_bytes = width * bpp; - int dst_stride, src_stride; - uint8_t *dst; - uint8_t *src; - - src_stride = src_image->bits.rowstride * 4; - dst_stride = dest_image->bits.rowstride * 4; - - src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp; - dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp; - - while (height--) - { - memcpy (dst, src, n_bytes); - - dst += dst_stride; - src += src_stride; - } -} - -FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER) -FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE) -FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD) -FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL) -FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER) -FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD) -FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL) -FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER) -FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE) -FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD) -FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL) -FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER) -FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE) -FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD) -FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL) -FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL) -FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER) -FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE) -FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD) -FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL) - -#define REPEAT_MIN_WIDTH 32 - -static void -fast_composite_tiled_repeat (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - pixman_composite_func_t func; - pixman_format_code_t mask_format; - uint32_t src_flags, mask_flags; - int32_t sx, sy; - int32_t width_remain; - int32_t num_pixels; - int32_t src_width; - int32_t i, j; - pixman_image_t extended_src_image; - uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; - pixman_bool_t need_src_extension; - uint32_t *src_line; - int32_t src_stride; - int32_t src_bpp; - pixman_composite_info_t info2 = *info; - - src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) | - FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; - - if (mask_image) - { - mask_format = mask_image->common.extended_format_code; - mask_flags = info->mask_flags; - } - else - { - mask_format = PIXMAN_null; - mask_flags = FAST_PATH_IS_OPAQUE; - } - - _pixman_implementation_lookup_composite ( - imp->toplevel, info->op, - src_image->common.extended_format_code, src_flags, - mask_format, mask_flags, - dest_image->common.extended_format_code, info->dest_flags, - &imp, &func); - - src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); - - if (src_image->bits.width < REPEAT_MIN_WIDTH && - (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) && - !src_image->bits.indexed) - { - sx = src_x; - sx = MOD (sx, src_image->bits.width); - sx += width; - src_width = 0; - - while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) - src_width += src_image->bits.width; - - src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); - - /* Initialize/validate stack-allocated temporary image */ - _pixman_bits_image_init (&extended_src_image, src_image->bits.format, - src_width, 1, &extended_src[0], src_stride, - FALSE); - _pixman_image_validate (&extended_src_image); - - info2.src_image = &extended_src_image; - need_src_extension = TRUE; - } - else - { - src_width = src_image->bits.width; - need_src_extension = FALSE; - } - - sx = src_x; - sy = src_y; - - while (--height >= 0) - { - sx = MOD (sx, src_width); - sy = MOD (sy, src_image->bits.height); - - if (need_src_extension) - { - if (src_bpp == 32) - { - PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); - - for (i = 0; i < src_width; ) - { - for (j = 0; j < src_image->bits.width; j++, i++) - extended_src[i] = src_line[j]; - } - } - else if (src_bpp == 16) - { - uint16_t *src_line_16; - - PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, - src_line_16, 1); - src_line = (uint32_t*)src_line_16; - - for (i = 0; i < src_width; ) - { - for (j = 0; j < src_image->bits.width; j++, i++) - ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j]; - } - } - else if (src_bpp == 8) - { - uint8_t *src_line_8; - - PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, - src_line_8, 1); - src_line = (uint32_t*)src_line_8; - - for (i = 0; i < src_width; ) - { - for (j = 0; j < src_image->bits.width; j++, i++) - ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; - } - } - - info2.src_y = 0; - } - else - { - info2.src_y = sy; - } - - width_remain = width; - - while (width_remain > 0) - { - num_pixels = src_width - sx; - - if (num_pixels > width_remain) - num_pixels = width_remain; - - info2.src_x = sx; - info2.width = num_pixels; - info2.height = 1; - - func (imp, &info2); - - width_remain -= num_pixels; - info2.mask_x += num_pixels; - info2.dest_x += num_pixels; - sx = 0; - } - - sx = src_x; - sy++; - info2.mask_x = info->mask_x; - info2.mask_y++; - info2.dest_x = info->dest_x; - info2.dest_y++; - } - - if (need_src_extension) - _pixman_image_fini (&extended_src_image); -} - -/* Use more unrolling for src_0565_0565 because it is typically CPU bound */ -static force_inline void -scaled_nearest_scanline_565_565_SRC (uint16_t * dst, - const uint16_t * src, - int32_t w, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t max_vx, - pixman_bool_t fully_transparent_src) -{ - uint16_t tmp1, tmp2, tmp3, tmp4; - while ((w -= 4) >= 0) - { - tmp1 = *(src + pixman_fixed_to_int (vx)); - vx += unit_x; - tmp2 = *(src + pixman_fixed_to_int (vx)); - vx += unit_x; - tmp3 = *(src + pixman_fixed_to_int (vx)); - vx += unit_x; - tmp4 = *(src + pixman_fixed_to_int (vx)); - vx += unit_x; - *dst++ = tmp1; - *dst++ = tmp2; - *dst++ = tmp3; - *dst++ = tmp4; - } - if (w & 2) - { - tmp1 = *(src + pixman_fixed_to_int (vx)); - vx += unit_x; - tmp2 = *(src + pixman_fixed_to_int (vx)); - vx += unit_x; - *dst++ = tmp1; - *dst++ = tmp2; - } - if (w & 1) - *dst = *(src + pixman_fixed_to_int (vx)); -} - -FAST_NEAREST_MAINLOOP (565_565_cover_SRC, - scaled_nearest_scanline_565_565_SRC, - uint16_t, uint16_t, COVER) -FAST_NEAREST_MAINLOOP (565_565_none_SRC, - scaled_nearest_scanline_565_565_SRC, - uint16_t, uint16_t, NONE) -FAST_NEAREST_MAINLOOP (565_565_pad_SRC, - scaled_nearest_scanline_565_565_SRC, - uint16_t, uint16_t, PAD) - -static force_inline uint32_t -fetch_nearest (pixman_repeat_t src_repeat, - pixman_format_code_t format, - uint32_t *src, int x, int src_width) -{ - if (repeat (src_repeat, &x, src_width)) - { - if (format == PIXMAN_x8r8g8b8 || format == PIXMAN_x8b8g8r8) - return *(src + x) | 0xff000000; - else - return *(src + x); - } - else - { - return 0; - } -} - -static force_inline void -combine_over (uint32_t s, uint32_t *dst) -{ - if (s) - { - uint8_t ia = 0xff - (s >> 24); - - if (ia) - UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s); - else - *dst = s; - } -} - -static force_inline void -combine_src (uint32_t s, uint32_t *dst) -{ - *dst = s; -} - -static void -fast_composite_scaled_nearest (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line; - uint32_t *src_line; - int dst_stride, src_stride; - int src_width, src_height; - pixman_repeat_t src_repeat; - pixman_fixed_t unit_x, unit_y; - pixman_format_code_t src_format; - pixman_vector_t v; - pixman_fixed_t vy; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - /* pass in 0 instead of src_x and src_y because src_x and src_y need to be - * transformed from destination space to source space - */ - PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1); - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (src_image->common.transform, &v)) - return; - - unit_x = src_image->common.transform->matrix[0][0]; - unit_y = src_image->common.transform->matrix[1][1]; - - /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ - v.vector[0] -= pixman_fixed_e; - v.vector[1] -= pixman_fixed_e; - - src_height = src_image->bits.height; - src_width = src_image->bits.width; - src_repeat = src_image->common.repeat; - src_format = src_image->bits.format; - - vy = v.vector[1]; - while (height--) - { - pixman_fixed_t vx = v.vector[0]; - int y = pixman_fixed_to_int (vy); - uint32_t *dst = dst_line; - - dst_line += dst_stride; - - /* adjust the y location by a unit vector in the y direction - * this is equivalent to transforming y+1 of the destination point to source space */ - vy += unit_y; - - if (!repeat (src_repeat, &y, src_height)) - { - if (op == PIXMAN_OP_SRC) - memset (dst, 0, sizeof (*dst) * width); - } - else - { - int w = width; - - uint32_t *src = src_line + y * src_stride; - - while (w >= 2) - { - uint32_t s1, s2; - int x1, x2; - - x1 = pixman_fixed_to_int (vx); - vx += unit_x; - - x2 = pixman_fixed_to_int (vx); - vx += unit_x; - - w -= 2; - - s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width); - s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width); - - if (op == PIXMAN_OP_OVER) - { - combine_over (s1, dst++); - combine_over (s2, dst++); - } - else - { - combine_src (s1, dst++); - combine_src (s2, dst++); - } - } - - while (w--) - { - uint32_t s; - int x; - - x = pixman_fixed_to_int (vx); - vx += unit_x; - - s = fetch_nearest (src_repeat, src_format, src, x, src_width); - - if (op == PIXMAN_OP_OVER) - combine_over (s, dst++); - else - combine_src (s, dst++); - } - } - } -} - -#define CACHE_LINE_SIZE 64 - -#define FAST_SIMPLE_ROTATE(suffix, pix_type) \ - \ -static void \ -blt_rotated_90_trivial_##suffix (pix_type *dst, \ - int dst_stride, \ - const pix_type *src, \ - int src_stride, \ - int w, \ - int h) \ -{ \ - int x, y; \ - for (y = 0; y < h; y++) \ - { \ - const pix_type *s = src + (h - y - 1); \ - pix_type *d = dst + dst_stride * y; \ - for (x = 0; x < w; x++) \ - { \ - *d++ = *s; \ - s += src_stride; \ - } \ - } \ -} \ - \ -static void \ -blt_rotated_270_trivial_##suffix (pix_type *dst, \ - int dst_stride, \ - const pix_type *src, \ - int src_stride, \ - int w, \ - int h) \ -{ \ - int x, y; \ - for (y = 0; y < h; y++) \ - { \ - const pix_type *s = src + src_stride * (w - 1) + y; \ - pix_type *d = dst + dst_stride * y; \ - for (x = 0; x < w; x++) \ - { \ - *d++ = *s; \ - s -= src_stride; \ - } \ - } \ -} \ - \ -static void \ -blt_rotated_90_##suffix (pix_type *dst, \ - int dst_stride, \ - const pix_type *src, \ - int src_stride, \ - int W, \ - int H) \ -{ \ - int x; \ - int leading_pixels = 0, trailing_pixels = 0; \ - const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ - \ - /* \ - * split processing into handling destination as TILE_SIZExH cache line \ - * aligned vertical stripes (optimistically assuming that destination \ - * stride is a multiple of cache line, if not - it will be just a bit \ - * slower) \ - */ \ - \ - if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ - { \ - leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ - (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ - if (leading_pixels > W) \ - leading_pixels = W; \ - \ - /* unaligned leading part NxH (where N < TILE_SIZE) */ \ - blt_rotated_90_trivial_##suffix ( \ - dst, \ - dst_stride, \ - src, \ - src_stride, \ - leading_pixels, \ - H); \ - \ - dst += leading_pixels; \ - src += leading_pixels * src_stride; \ - W -= leading_pixels; \ - } \ - \ - if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ - { \ - trailing_pixels = (((uintptr_t)(dst + W) & \ - (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ - if (trailing_pixels > W) \ - trailing_pixels = W; \ - W -= trailing_pixels; \ - } \ - \ - for (x = 0; x < W; x += TILE_SIZE) \ - { \ - /* aligned middle part TILE_SIZExH */ \ - blt_rotated_90_trivial_##suffix ( \ - dst + x, \ - dst_stride, \ - src + src_stride * x, \ - src_stride, \ - TILE_SIZE, \ - H); \ - } \ - \ - if (trailing_pixels) \ - { \ - /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ - blt_rotated_90_trivial_##suffix ( \ - dst + W, \ - dst_stride, \ - src + W * src_stride, \ - src_stride, \ - trailing_pixels, \ - H); \ - } \ -} \ - \ -static void \ -blt_rotated_270_##suffix (pix_type *dst, \ - int dst_stride, \ - const pix_type *src, \ - int src_stride, \ - int W, \ - int H) \ -{ \ - int x; \ - int leading_pixels = 0, trailing_pixels = 0; \ - const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ - \ - /* \ - * split processing into handling destination as TILE_SIZExH cache line \ - * aligned vertical stripes (optimistically assuming that destination \ - * stride is a multiple of cache line, if not - it will be just a bit \ - * slower) \ - */ \ - \ - if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ - { \ - leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ - (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ - if (leading_pixels > W) \ - leading_pixels = W; \ - \ - /* unaligned leading part NxH (where N < TILE_SIZE) */ \ - blt_rotated_270_trivial_##suffix ( \ - dst, \ - dst_stride, \ - src + src_stride * (W - leading_pixels), \ - src_stride, \ - leading_pixels, \ - H); \ - \ - dst += leading_pixels; \ - W -= leading_pixels; \ - } \ - \ - if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ - { \ - trailing_pixels = (((uintptr_t)(dst + W) & \ - (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ - if (trailing_pixels > W) \ - trailing_pixels = W; \ - W -= trailing_pixels; \ - src += trailing_pixels * src_stride; \ - } \ - \ - for (x = 0; x < W; x += TILE_SIZE) \ - { \ - /* aligned middle part TILE_SIZExH */ \ - blt_rotated_270_trivial_##suffix ( \ - dst + x, \ - dst_stride, \ - src + src_stride * (W - x - TILE_SIZE), \ - src_stride, \ - TILE_SIZE, \ - H); \ - } \ - \ - if (trailing_pixels) \ - { \ - /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ - blt_rotated_270_trivial_##suffix ( \ - dst + W, \ - dst_stride, \ - src - trailing_pixels * src_stride, \ - src_stride, \ - trailing_pixels, \ - H); \ - } \ -} \ - \ -static void \ -fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \ - pixman_composite_info_t *info) \ -{ \ - PIXMAN_COMPOSITE_ARGS (info); \ - pix_type *dst_line; \ - pix_type *src_line; \ - int dst_stride, src_stride; \ - int src_x_t, src_y_t; \ - \ - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \ - dst_stride, dst_line, 1); \ - src_x_t = -src_y + pixman_fixed_to_int ( \ - src_image->common.transform->matrix[0][2] + \ - pixman_fixed_1 / 2 - pixman_fixed_e) - height;\ - src_y_t = src_x + pixman_fixed_to_int ( \ - src_image->common.transform->matrix[1][2] + \ - pixman_fixed_1 / 2 - pixman_fixed_e); \ - PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ - src_stride, src_line, 1); \ - blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \ - width, height); \ -} \ - \ -static void \ -fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \ - pixman_composite_info_t *info) \ -{ \ - PIXMAN_COMPOSITE_ARGS (info); \ - pix_type *dst_line; \ - pix_type *src_line; \ - int dst_stride, src_stride; \ - int src_x_t, src_y_t; \ - \ - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \ - dst_stride, dst_line, 1); \ - src_x_t = src_y + pixman_fixed_to_int ( \ - src_image->common.transform->matrix[0][2] + \ - pixman_fixed_1 / 2 - pixman_fixed_e); \ - src_y_t = -src_x + pixman_fixed_to_int ( \ - src_image->common.transform->matrix[1][2] + \ - pixman_fixed_1 / 2 - pixman_fixed_e) - width; \ - PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ - src_stride, src_line, 1); \ - blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \ - width, height); \ -} - -FAST_SIMPLE_ROTATE (8, uint8_t) -FAST_SIMPLE_ROTATE (565, uint16_t) -FAST_SIMPLE_ROTATE (8888, uint32_t) - -static const pixman_fast_path_t c_fast_paths[] = -{ - PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca), - PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565), - PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, fast_composite_add_0565_0565), - PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, fast_composite_add_0565_0565), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8), - PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1), - PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8), - PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill), - PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill), - PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill), - PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill), - PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill), - PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill), - PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8), - PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8), - - SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888), - SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888), - SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888), - SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888), - - SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888), - SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888), - - SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565), - SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565), - - SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565), - - SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888), - SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888), - SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888), - SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888), - SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888), - SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888), - - SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888), - SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888), - SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888), - SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888), - - SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565), - -#define NEAREST_FAST_PATH(op,s,d) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest, \ - } - - NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8), - NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8), - NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8), - NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8), - - NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8), - NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8), - NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8), - NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8), - - NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8), - NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8), - NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8), - NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8), - - NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8), - NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8), - NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8), - NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8), - -#define SIMPLE_ROTATE_FLAGS(angle) \ - (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \ - FAST_PATH_NEAREST_FILTER | \ - FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | \ - FAST_PATH_STANDARD_FLAGS) - -#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_rotate_90_##suffix, \ - }, \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_rotate_270_##suffix, \ - } - - SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888), - SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888), - SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888), - SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565), - SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8), - - /* Simple repeat fast path entry. */ - { PIXMAN_OP_any, - PIXMAN_any, - (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE | - FAST_PATH_NORMAL_REPEAT), - PIXMAN_any, 0, - PIXMAN_any, FAST_PATH_STD_DEST_FLAGS, - fast_composite_tiled_repeat - }, - - { PIXMAN_OP_NONE }, -}; - -#ifdef WORDS_BIGENDIAN -#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n))) -#else -#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs)) -#endif - -static force_inline void -pixman_fill1_line (uint32_t *dst, int offs, int width, int v) -{ - if (offs) - { - int leading_pixels = 32 - offs; - if (leading_pixels >= width) - { - if (v) - *dst |= A1_FILL_MASK (width, offs); - else - *dst &= ~A1_FILL_MASK (width, offs); - return; - } - else - { - if (v) - *dst++ |= A1_FILL_MASK (leading_pixels, offs); - else - *dst++ &= ~A1_FILL_MASK (leading_pixels, offs); - width -= leading_pixels; - } - } - while (width >= 32) - { - if (v) - *dst++ = 0xFFFFFFFF; - else - *dst++ = 0; - width -= 32; - } - if (width > 0) - { - if (v) - *dst |= A1_FILL_MASK (width, 0); - else - *dst &= ~A1_FILL_MASK (width, 0); - } -} - -static void -pixman_fill1 (uint32_t *bits, - int stride, - int x, - int y, - int width, - int height, - uint32_t filler) -{ - uint32_t *dst = bits + y * stride + (x >> 5); - int offs = x & 31; - - if (filler & 1) - { - while (height--) - { - pixman_fill1_line (dst, offs, width, 1); - dst += stride; - } - } - else - { - while (height--) - { - pixman_fill1_line (dst, offs, width, 0); - dst += stride; - } - } -} - -static void -pixman_fill8 (uint32_t *bits, - int stride, - int x, - int y, - int width, - int height, - uint32_t filler) -{ - int byte_stride = stride * (int) sizeof (uint32_t); - uint8_t *dst = (uint8_t *) bits; - uint8_t v = filler & 0xff; - int i; - - dst = dst + y * byte_stride + x; - - while (height--) - { - for (i = 0; i < width; ++i) - dst[i] = v; - - dst += byte_stride; - } -} - -static void -pixman_fill16 (uint32_t *bits, - int stride, - int x, - int y, - int width, - int height, - uint32_t filler) -{ - int short_stride = - (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t); - uint16_t *dst = (uint16_t *)bits; - uint16_t v = filler & 0xffff; - int i; - - dst = dst + y * short_stride + x; - - while (height--) - { - for (i = 0; i < width; ++i) - dst[i] = v; - - dst += short_stride; - } -} - -static void -pixman_fill32 (uint32_t *bits, - int stride, - int x, - int y, - int width, - int height, - uint32_t filler) -{ - int i; - - bits = bits + y * stride + x; - - while (height--) - { - for (i = 0; i < width; ++i) - bits[i] = filler; - - bits += stride; - } -} - -static pixman_bool_t -fast_path_fill (pixman_implementation_t *imp, - uint32_t * bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t filler) -{ - switch (bpp) - { - case 1: - pixman_fill1 (bits, stride, x, y, width, height, filler); - break; - - case 8: - pixman_fill8 (bits, stride, x, y, width, height, filler); - break; - - case 16: - pixman_fill16 (bits, stride, x, y, width, height, filler); - break; - - case 32: - pixman_fill32 (bits, stride, x, y, width, height, filler); - break; - - default: - return FALSE; - } - - return TRUE; -} - -/*****************************************************************************/ - -static uint32_t * -fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) -{ - int32_t w = iter->width; - uint32_t *dst = iter->buffer; - const uint16_t *src = (const uint16_t *)iter->bits; - - iter->bits += iter->stride; - - /* Align the source buffer at 4 bytes boundary */ - if (w > 0 && ((uintptr_t)src & 3)) - { - *dst++ = convert_0565_to_8888 (*src++); - w--; - } - /* Process two pixels per iteration */ - while ((w -= 2) >= 0) - { - uint32_t sr, sb, sg, t0, t1; - uint32_t s = *(const uint32_t *)src; - src += 2; - sr = (s >> 8) & 0x00F800F8; - sb = (s << 3) & 0x00F800F8; - sg = (s >> 3) & 0x00FC00FC; - sr |= sr >> 5; - sb |= sb >> 5; - sg |= sg >> 6; - t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) | - (sb & 0xFF) | 0xFF000000; - t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) | - (sb >> 16) | 0xFF000000; -#ifdef WORDS_BIGENDIAN - *dst++ = t1; - *dst++ = t0; -#else - *dst++ = t0; - *dst++ = t1; -#endif - } - if (w & 1) - { - *dst = convert_0565_to_8888 (*src); - } - - return iter->buffer; -} - -static uint32_t * -fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask) -{ - iter->bits += iter->stride; - return iter->buffer; -} - -/* Helper function for a workaround, which tries to ensure that 0x1F001F - * constant is always allocated in a register on RISC architectures. - */ -static force_inline uint32_t -convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F) -{ - uint32_t a, b; - a = (s >> 3) & x1F001F; - b = s & 0xFC00; - a |= a >> 5; - a |= b >> 5; - return a; -} - -static void -fast_write_back_r5g6b5 (pixman_iter_t *iter) -{ - int32_t w = iter->width; - uint16_t *dst = (uint16_t *)(iter->bits - iter->stride); - const uint32_t *src = iter->buffer; - /* Workaround to ensure that x1F001F variable is allocated in a register */ - static volatile uint32_t volatile_x1F001F = 0x1F001F; - uint32_t x1F001F = volatile_x1F001F; - - while ((w -= 4) >= 0) - { - uint32_t s1 = *src++; - uint32_t s2 = *src++; - uint32_t s3 = *src++; - uint32_t s4 = *src++; - *dst++ = convert_8888_to_0565_workaround (s1, x1F001F); - *dst++ = convert_8888_to_0565_workaround (s2, x1F001F); - *dst++ = convert_8888_to_0565_workaround (s3, x1F001F); - *dst++ = convert_8888_to_0565_workaround (s4, x1F001F); - } - if (w & 2) - { - *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); - *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); - } - if (w & 1) - { - *dst = convert_8888_to_0565_workaround (*src, x1F001F); - } -} - -typedef struct -{ - int y; - uint64_t * buffer; -} line_t; - -typedef struct -{ - line_t lines[2]; - pixman_fixed_t y; - pixman_fixed_t x; - uint64_t data[1]; -} bilinear_info_t; - -static void -fetch_horizontal (bits_image_t *image, line_t *line, - int y, pixman_fixed_t x, pixman_fixed_t ux, int n) -{ - uint32_t *bits = image->bits + y * image->rowstride; - int i; - - for (i = 0; i < n; ++i) - { - int x0 = pixman_fixed_to_int (x); - int x1 = x0 + 1; - int32_t dist_x; - - uint32_t left = *(bits + x0); - uint32_t right = *(bits + x1); - - dist_x = pixman_fixed_to_bilinear_weight (x); - dist_x <<= (8 - BILINEAR_INTERPOLATION_BITS); - -#if SIZEOF_LONG <= 4 - { - uint32_t lag, rag, ag; - uint32_t lrb, rrb, rb; - - lag = (left & 0xff00ff00) >> 8; - rag = (right & 0xff00ff00) >> 8; - ag = (lag << 8) + dist_x * (rag - lag); - - lrb = (left & 0x00ff00ff); - rrb = (right & 0x00ff00ff); - rb = (lrb << 8) + dist_x * (rrb - lrb); - - *((uint32_t *)(line->buffer + i)) = ag; - *((uint32_t *)(line->buffer + i) + 1) = rb; - } -#else - { - uint64_t lagrb, ragrb; - uint32_t lag, rag; - uint32_t lrb, rrb; - - lag = (left & 0xff00ff00); - lrb = (left & 0x00ff00ff); - rag = (right & 0xff00ff00); - rrb = (right & 0x00ff00ff); - lagrb = (((uint64_t)lag) << 24) | lrb; - ragrb = (((uint64_t)rag) << 24) | rrb; - - line->buffer[i] = (lagrb << 8) + dist_x * (ragrb - lagrb); - } -#endif - - x += ux; - } - - line->y = y; -} - -static uint32_t * -fast_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask) -{ - pixman_fixed_t fx, ux; - bilinear_info_t *info = iter->data; - line_t *line0, *line1; - int y0, y1; - int32_t dist_y; - int i; - - COMPILE_TIME_ASSERT (BILINEAR_INTERPOLATION_BITS < 8); - - fx = info->x; - ux = iter->image->common.transform->matrix[0][0]; - - y0 = pixman_fixed_to_int (info->y); - y1 = y0 + 1; - dist_y = pixman_fixed_to_bilinear_weight (info->y); - dist_y <<= (8 - BILINEAR_INTERPOLATION_BITS); - - line0 = &info->lines[y0 & 0x01]; - line1 = &info->lines[y1 & 0x01]; - - if (line0->y != y0) - { - fetch_horizontal ( - &iter->image->bits, line0, y0, fx, ux, iter->width); - } - - if (line1->y != y1) - { - fetch_horizontal ( - &iter->image->bits, line1, y1, fx, ux, iter->width); - } - - for (i = 0; i < iter->width; ++i) - { -#if SIZEOF_LONG <= 4 - uint32_t ta, tr, tg, tb; - uint32_t ba, br, bg, bb; - uint32_t tag, trb; - uint32_t bag, brb; - uint32_t a, r, g, b; - - tag = *((uint32_t *)(line0->buffer + i)); - trb = *((uint32_t *)(line0->buffer + i) + 1); - bag = *((uint32_t *)(line1->buffer + i)); - brb = *((uint32_t *)(line1->buffer + i) + 1); - - ta = tag >> 16; - ba = bag >> 16; - a = (ta << 8) + dist_y * (ba - ta); - - tr = trb >> 16; - br = brb >> 16; - r = (tr << 8) + dist_y * (br - tr); - - tg = tag & 0xffff; - bg = bag & 0xffff; - g = (tg << 8) + dist_y * (bg - tg); - - tb = trb & 0xffff; - bb = brb & 0xffff; - b = (tb << 8) + dist_y * (bb - tb); - - a = (a << 8) & 0xff000000; - r = (r << 0) & 0x00ff0000; - g = (g >> 8) & 0x0000ff00; - b = (b >> 16) & 0x000000ff; -#else - uint64_t top = line0->buffer[i]; - uint64_t bot = line1->buffer[i]; - uint64_t tar = (top & 0xffff0000ffff0000ULL) >> 16; - uint64_t bar = (bot & 0xffff0000ffff0000ULL) >> 16; - uint64_t tgb = (top & 0x0000ffff0000ffffULL); - uint64_t bgb = (bot & 0x0000ffff0000ffffULL); - uint64_t ar, gb; - uint32_t a, r, g, b; - - ar = (tar << 8) + dist_y * (bar - tar); - gb = (tgb << 8) + dist_y * (bgb - tgb); - - a = ((ar >> 24) & 0xff000000); - r = ((ar >> 0) & 0x00ff0000); - g = ((gb >> 40) & 0x0000ff00); - b = ((gb >> 16) & 0x000000ff); -#endif - - iter->buffer[i] = a | r | g | b; - } - - info->y += iter->image->common.transform->matrix[1][1]; - - return iter->buffer; -} - -static void -bilinear_cover_iter_fini (pixman_iter_t *iter) -{ - free (iter->data); -} - -static void -fast_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info) -{ - int width = iter->width; - bilinear_info_t *info; - pixman_vector_t v; - - /* Reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (iter->image->common.transform, &v)) - goto fail; - - info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t)); - if (!info) - goto fail; - - info->x = v.vector[0] - pixman_fixed_1 / 2; - info->y = v.vector[1] - pixman_fixed_1 / 2; - - /* It is safe to set the y coordinates to -1 initially - * because COVER_CLIP_BILINEAR ensures that we will only - * be asked to fetch lines in the [0, height) interval - */ - info->lines[0].y = -1; - info->lines[0].buffer = &(info->data[0]); - info->lines[1].y = -1; - info->lines[1].buffer = &(info->data[width]); - - iter->get_scanline = fast_fetch_bilinear_cover; - iter->fini = bilinear_cover_iter_fini; - - iter->data = info; - return; - -fail: - /* Something went wrong, either a bad matrix or OOM; in such cases, - * we don't guarantee any particular rendering. - */ - _pixman_log_error ( - FUNC, "Allocation failure or bad matrix, skipping rendering\n"); - - iter->get_scanline = _pixman_iter_get_scanline_noop; - iter->fini = NULL; -} - -static uint32_t * -bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter, - const uint32_t *mask) -{ - - pixman_image_t * ima = iter->image; - int offset = iter->x; - int line = iter->y++; - int width = iter->width; - uint32_t * buffer = iter->buffer; - - bits_image_t *bits = &ima->bits; - pixman_fixed_t x_top, x_bottom, x; - pixman_fixed_t ux_top, ux_bottom, ux; - pixman_vector_t v; - uint32_t top_mask, bottom_mask; - uint32_t *top_row; - uint32_t *bottom_row; - uint32_t *end; - uint32_t zero[2] = { 0, 0 }; - uint32_t one = 1; - int y, y1, y2; - int disty; - int mask_inc; - int w; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (bits->common.transform, &v)) - return iter->buffer; - - ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0]; - x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2; - - y = v.vector[1] - pixman_fixed_1/2; - disty = pixman_fixed_to_bilinear_weight (y); - - /* Load the pointers to the first and second lines from the source - * image that bilinear code must read. - * - * The main trick in this code is about the check if any line are - * outside of the image; - * - * When I realize that a line (any one) is outside, I change - * the pointer to a dummy area with zeros. Once I change this, I - * must be sure the pointer will not change, so I set the - * variables to each pointer increments inside the loop. - */ - y1 = pixman_fixed_to_int (y); - y2 = y1 + 1; - - if (y1 < 0 || y1 >= bits->height) - { - top_row = zero; - x_top = 0; - ux_top = 0; - } - else - { - top_row = bits->bits + y1 * bits->rowstride; - x_top = x; - ux_top = ux; - } - - if (y2 < 0 || y2 >= bits->height) - { - bottom_row = zero; - x_bottom = 0; - ux_bottom = 0; - } - else - { - bottom_row = bits->bits + y2 * bits->rowstride; - x_bottom = x; - ux_bottom = ux; - } - - /* Instead of checking whether the operation uses the mast in - * each loop iteration, verify this only once and prepare the - * variables to make the code smaller inside the loop. - */ - if (!mask) - { - mask_inc = 0; - mask = &one; - } - else - { - /* If have a mask, prepare the variables to check it */ - mask_inc = 1; - } - - /* If both are zero, then the whole thing is zero */ - if (top_row == zero && bottom_row == zero) - { - memset (buffer, 0, width * sizeof (uint32_t)); - return iter->buffer; - } - else if (bits->format == PIXMAN_x8r8g8b8) - { - if (top_row == zero) - { - top_mask = 0; - bottom_mask = 0xff000000; - } - else if (bottom_row == zero) - { - top_mask = 0xff000000; - bottom_mask = 0; - } - else - { - top_mask = 0xff000000; - bottom_mask = 0xff000000; - } - } - else - { - top_mask = 0; - bottom_mask = 0; - } - - end = buffer + width; - - /* Zero fill to the left of the image */ - while (buffer < end && x < pixman_fixed_minus_1) - { - *buffer++ = 0; - x += ux; - x_top += ux_top; - x_bottom += ux_bottom; - mask += mask_inc; - } - - /* Left edge - */ - while (buffer < end && x < 0) - { - uint32_t tr, br; - int32_t distx; - - tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask; - br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; - - distx = pixman_fixed_to_bilinear_weight (x); - - *buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty); - - x += ux; - x_top += ux_top; - x_bottom += ux_bottom; - mask += mask_inc; - } - - /* Main part */ - w = pixman_int_to_fixed (bits->width - 1); - - while (buffer < end && x < w) - { - if (*mask) - { - uint32_t tl, tr, bl, br; - int32_t distx; - - tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; - tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask; - bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; - br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; - - distx = pixman_fixed_to_bilinear_weight (x); - - *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty); - } - - buffer++; - x += ux; - x_top += ux_top; - x_bottom += ux_bottom; - mask += mask_inc; - } - - /* Right Edge */ - w = pixman_int_to_fixed (bits->width); - while (buffer < end && x < w) - { - if (*mask) - { - uint32_t tl, bl; - int32_t distx; - - tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; - bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; - - distx = pixman_fixed_to_bilinear_weight (x); - - *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty); - } - - buffer++; - x += ux; - x_top += ux_top; - x_bottom += ux_bottom; - mask += mask_inc; - } - - /* Zero fill to the left of the image */ - while (buffer < end) - *buffer++ = 0; - - return iter->buffer; -} - -typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x); - -static force_inline void -bits_image_fetch_separable_convolution_affine (pixman_image_t * image, - int offset, - int line, - int width, - uint32_t * buffer, - const uint32_t * mask, - - convert_pixel_t convert_pixel, - pixman_format_code_t format, - pixman_repeat_t repeat_mode) -{ - bits_image_t *bits = &image->bits; - pixman_fixed_t *params = image->common.filter_params; - int cwidth = pixman_fixed_to_int (params[0]); - int cheight = pixman_fixed_to_int (params[1]); - int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1; - int y_off = ((cheight << 16) - pixman_fixed_1) >> 1; - int x_phase_bits = pixman_fixed_to_int (params[2]); - int y_phase_bits = pixman_fixed_to_int (params[3]); - int x_phase_shift = 16 - x_phase_bits; - int y_phase_shift = 16 - y_phase_bits; - pixman_fixed_t vx, vy; - pixman_fixed_t ux, uy; - pixman_vector_t v; - int k; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (image->common.transform, &v)) - return; - - ux = image->common.transform->matrix[0][0]; - uy = image->common.transform->matrix[1][0]; - - vx = v.vector[0]; - vy = v.vector[1]; - - for (k = 0; k < width; ++k) - { - pixman_fixed_t *y_params; - int satot, srtot, sgtot, sbtot; - pixman_fixed_t x, y; - int32_t x1, x2, y1, y2; - int32_t px, py; - int i, j; - - if (mask && !mask[k]) - goto next; - - /* Round x and y to the middle of the closest phase before continuing. This - * ensures that the convolution matrix is aligned right, since it was - * positioned relative to a particular phase (and not relative to whatever - * exact fraction we happen to get here). - */ - x = ((vx >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1); - y = ((vy >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1); - - px = (x & 0xffff) >> x_phase_shift; - py = (y & 0xffff) >> y_phase_shift; - - x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); - y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); - x2 = x1 + cwidth; - y2 = y1 + cheight; - - satot = srtot = sgtot = sbtot = 0; - - y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight; - - for (i = y1; i < y2; ++i) - { - pixman_fixed_t fy = *y_params++; - - if (fy) - { - pixman_fixed_t *x_params = params + 4 + px * cwidth; - - for (j = x1; j < x2; ++j) - { - pixman_fixed_t fx = *x_params++; - int rx = j; - int ry = i; - - if (fx) - { - pixman_fixed_t f; - uint32_t pixel, mask; - uint8_t *row; - - mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - repeat (repeat_mode, &rx, bits->width); - repeat (repeat_mode, &ry, bits->height); - - row = (uint8_t *)(bits->bits + bits->rowstride * ry); - pixel = convert_pixel (row, rx) | mask; - } - else - { - if (rx < 0 || ry < 0 || rx >= bits->width || ry >= bits->height) - { - pixel = 0; - } - else - { - row = (uint8_t *)(bits->bits + bits->rowstride * ry); - pixel = convert_pixel (row, rx) | mask; - } - } - - f = ((pixman_fixed_32_32_t)fx * fy + 0x8000) >> 16; - srtot += (int)RED_8 (pixel) * f; - sgtot += (int)GREEN_8 (pixel) * f; - sbtot += (int)BLUE_8 (pixel) * f; - satot += (int)ALPHA_8 (pixel) * f; - } - } - } - } - - satot = (satot + 0x8000) >> 16; - srtot = (srtot + 0x8000) >> 16; - sgtot = (sgtot + 0x8000) >> 16; - sbtot = (sbtot + 0x8000) >> 16; - - satot = CLIP (satot, 0, 0xff); - srtot = CLIP (srtot, 0, 0xff); - sgtot = CLIP (sgtot, 0, 0xff); - sbtot = CLIP (sbtot, 0, 0xff); - -#ifdef WORDS_BIGENDIAN - buffer[k] = (satot << 0) | (srtot << 8) | (sgtot << 16) | (sbtot << 24); -#else - buffer[k] = (satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot << 0); -#endif - - next: - vx += ux; - vy += uy; - } -} - -static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - -static force_inline void -bits_image_fetch_bilinear_affine (pixman_image_t * image, - int offset, - int line, - int width, - uint32_t * buffer, - const uint32_t * mask, - - convert_pixel_t convert_pixel, - pixman_format_code_t format, - pixman_repeat_t repeat_mode) -{ - pixman_fixed_t x, y; - pixman_fixed_t ux, uy; - pixman_vector_t v; - bits_image_t *bits = &image->bits; - int i; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (image->common.transform, &v)) - return; - - ux = image->common.transform->matrix[0][0]; - uy = image->common.transform->matrix[1][0]; - - x = v.vector[0]; - y = v.vector[1]; - - for (i = 0; i < width; ++i) - { - int x1, y1, x2, y2; - uint32_t tl, tr, bl, br; - int32_t distx, disty; - int width = image->bits.width; - int height = image->bits.height; - const uint8_t *row1; - const uint8_t *row2; - - if (mask && !mask[i]) - goto next; - - x1 = x - pixman_fixed_1 / 2; - y1 = y - pixman_fixed_1 / 2; - - distx = pixman_fixed_to_bilinear_weight (x1); - disty = pixman_fixed_to_bilinear_weight (y1); - - y1 = pixman_fixed_to_int (y1); - y2 = y1 + 1; - x1 = pixman_fixed_to_int (x1); - x2 = x1 + 1; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - uint32_t mask; - - mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - - repeat (repeat_mode, &x1, width); - repeat (repeat_mode, &y1, height); - repeat (repeat_mode, &x2, width); - repeat (repeat_mode, &y2, height); - - row1 = (uint8_t *)(bits->bits + bits->rowstride * y1); - row2 = (uint8_t *)(bits->bits + bits->rowstride * y2); - - tl = convert_pixel (row1, x1) | mask; - tr = convert_pixel (row1, x2) | mask; - bl = convert_pixel (row2, x1) | mask; - br = convert_pixel (row2, x2) | mask; - } - else - { - uint32_t mask1, mask2; - int bpp; - - /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value, - * which means if you use it in expressions, those - * expressions become unsigned themselves. Since - * the variables below can be negative in some cases, - * that will lead to crashes on 64 bit architectures. - * - * So this line makes sure bpp is signed - */ - bpp = PIXMAN_FORMAT_BPP (format); - - if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0) - { - buffer[i] = 0; - goto next; - } - - if (y2 == 0) - { - row1 = zero; - mask1 = 0; - } - else - { - row1 = (uint8_t *)(bits->bits + bits->rowstride * y1); - row1 += bpp / 8 * x1; - - mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - } - - if (y1 == height - 1) - { - row2 = zero; - mask2 = 0; - } - else - { - row2 = (uint8_t *)(bits->bits + bits->rowstride * y2); - row2 += bpp / 8 * x1; - - mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - } - - if (x2 == 0) - { - tl = 0; - bl = 0; - } - else - { - tl = convert_pixel (row1, 0) | mask1; - bl = convert_pixel (row2, 0) | mask2; - } - - if (x1 == width - 1) - { - tr = 0; - br = 0; - } - else - { - tr = convert_pixel (row1, 1) | mask1; - br = convert_pixel (row2, 1) | mask2; - } - } - - buffer[i] = bilinear_interpolation ( - tl, tr, bl, br, distx, disty); - - next: - x += ux; - y += uy; - } -} - -static force_inline void -bits_image_fetch_nearest_affine (pixman_image_t * image, - int offset, - int line, - int width, - uint32_t * buffer, - const uint32_t * mask, - - convert_pixel_t convert_pixel, - pixman_format_code_t format, - pixman_repeat_t repeat_mode) -{ - pixman_fixed_t x, y; - pixman_fixed_t ux, uy; - pixman_vector_t v; - bits_image_t *bits = &image->bits; - int i; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (image->common.transform, &v)) - return; - - ux = image->common.transform->matrix[0][0]; - uy = image->common.transform->matrix[1][0]; - - x = v.vector[0]; - y = v.vector[1]; - - for (i = 0; i < width; ++i) - { - int width, height, x0, y0; - const uint8_t *row; - - if (mask && !mask[i]) - goto next; - - width = image->bits.width; - height = image->bits.height; - x0 = pixman_fixed_to_int (x - pixman_fixed_e); - y0 = pixman_fixed_to_int (y - pixman_fixed_e); - - if (repeat_mode == PIXMAN_REPEAT_NONE && - (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width)) - { - buffer[i] = 0; - } - else - { - uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - repeat (repeat_mode, &x0, width); - repeat (repeat_mode, &y0, height); - } - - row = (uint8_t *)(bits->bits + bits->rowstride * y0); - - buffer[i] = convert_pixel (row, x0) | mask; - } - - next: - x += ux; - y += uy; - } -} - -static force_inline uint32_t -convert_a8r8g8b8 (const uint8_t *row, int x) -{ - return *(((uint32_t *)row) + x); -} - -static force_inline uint32_t -convert_x8r8g8b8 (const uint8_t *row, int x) -{ - return *(((uint32_t *)row) + x); -} - -static force_inline uint32_t -convert_a8 (const uint8_t *row, int x) -{ - return (uint32_t) *(row + x) << 24; -} - -static force_inline uint32_t -convert_r5g6b5 (const uint8_t *row, int x) -{ - return convert_0565_to_0888 (*((uint16_t *)row + x)); -} - -#define MAKE_SEPARABLE_CONVOLUTION_FETCHER(name, format, repeat_mode) \ - static uint32_t * \ - bits_image_fetch_separable_convolution_affine_ ## name (pixman_iter_t *iter, \ - const uint32_t * mask) \ - { \ - bits_image_fetch_separable_convolution_affine ( \ - iter->image, \ - iter->x, iter->y++, \ - iter->width, \ - iter->buffer, mask, \ - convert_ ## format, \ - PIXMAN_ ## format, \ - repeat_mode); \ - \ - return iter->buffer; \ - } - -#define MAKE_BILINEAR_FETCHER(name, format, repeat_mode) \ - static uint32_t * \ - bits_image_fetch_bilinear_affine_ ## name (pixman_iter_t *iter, \ - const uint32_t * mask) \ - { \ - bits_image_fetch_bilinear_affine (iter->image, \ - iter->x, iter->y++, \ - iter->width, \ - iter->buffer, mask, \ - convert_ ## format, \ - PIXMAN_ ## format, \ - repeat_mode); \ - return iter->buffer; \ - } - -#define MAKE_NEAREST_FETCHER(name, format, repeat_mode) \ - static uint32_t * \ - bits_image_fetch_nearest_affine_ ## name (pixman_iter_t *iter, \ - const uint32_t * mask) \ - { \ - bits_image_fetch_nearest_affine (iter->image, \ - iter->x, iter->y++, \ - iter->width, \ - iter->buffer, mask, \ - convert_ ## format, \ - PIXMAN_ ## format, \ - repeat_mode); \ - return iter->buffer; \ - } - -#define MAKE_FETCHERS(name, format, repeat_mode) \ - MAKE_NEAREST_FETCHER (name, format, repeat_mode) \ - MAKE_BILINEAR_FETCHER (name, format, repeat_mode) \ - MAKE_SEPARABLE_CONVOLUTION_FETCHER (name, format, repeat_mode) - -MAKE_FETCHERS (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD) -MAKE_FETCHERS (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE) -MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT) -MAKE_FETCHERS (normal_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NORMAL) -MAKE_FETCHERS (pad_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_PAD) -MAKE_FETCHERS (none_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NONE) -MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT) -MAKE_FETCHERS (normal_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NORMAL) -MAKE_FETCHERS (pad_a8, a8, PIXMAN_REPEAT_PAD) -MAKE_FETCHERS (none_a8, a8, PIXMAN_REPEAT_NONE) -MAKE_FETCHERS (reflect_a8, a8, PIXMAN_REPEAT_REFLECT) -MAKE_FETCHERS (normal_a8, a8, PIXMAN_REPEAT_NORMAL) -MAKE_FETCHERS (pad_r5g6b5, r5g6b5, PIXMAN_REPEAT_PAD) -MAKE_FETCHERS (none_r5g6b5, r5g6b5, PIXMAN_REPEAT_NONE) -MAKE_FETCHERS (reflect_r5g6b5, r5g6b5, PIXMAN_REPEAT_REFLECT) -MAKE_FETCHERS (normal_r5g6b5, r5g6b5, PIXMAN_REPEAT_NORMAL) - -#define IMAGE_FLAGS \ - (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ - FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) - -static const pixman_iter_info_t fast_iters[] = -{ - { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW | ITER_SRC, - _pixman_iter_init_bits_stride, fast_fetch_r5g6b5, NULL }, - - { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS, - ITER_NARROW | ITER_DEST, - _pixman_iter_init_bits_stride, - fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, - - { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS, - ITER_NARROW | ITER_DEST | ITER_IGNORE_RGB | ITER_IGNORE_ALPHA, - _pixman_iter_init_bits_stride, - fast_dest_fetch_noop, fast_write_back_r5g6b5 }, - - { PIXMAN_a8r8g8b8, - (FAST_PATH_STANDARD_FLAGS | - FAST_PATH_SCALE_TRANSFORM | - FAST_PATH_BILINEAR_FILTER | - FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR), - ITER_NARROW | ITER_SRC, - fast_bilinear_cover_iter_init, - NULL, NULL - }, - -#define FAST_BILINEAR_FLAGS \ - (FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_HAS_TRANSFORM | \ - FAST_PATH_AFFINE_TRANSFORM | \ - FAST_PATH_X_UNIT_POSITIVE | \ - FAST_PATH_Y_UNIT_ZERO | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_BILINEAR_FILTER) - - { PIXMAN_a8r8g8b8, - FAST_BILINEAR_FLAGS, - ITER_NARROW | ITER_SRC, - NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL - }, - - { PIXMAN_x8r8g8b8, - FAST_BILINEAR_FLAGS, - ITER_NARROW | ITER_SRC, - NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL - }, - -#define GENERAL_BILINEAR_FLAGS \ - (FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_HAS_TRANSFORM | \ - FAST_PATH_AFFINE_TRANSFORM | \ - FAST_PATH_BILINEAR_FILTER) - -#define GENERAL_NEAREST_FLAGS \ - (FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_HAS_TRANSFORM | \ - FAST_PATH_AFFINE_TRANSFORM | \ - FAST_PATH_NEAREST_FILTER) - -#define GENERAL_SEPARABLE_CONVOLUTION_FLAGS \ - (FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_HAS_TRANSFORM | \ - FAST_PATH_AFFINE_TRANSFORM | \ - FAST_PATH_SEPARABLE_CONVOLUTION_FILTER) - -#define SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \ - { PIXMAN_ ## format, \ - GENERAL_SEPARABLE_CONVOLUTION_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ - ITER_NARROW | ITER_SRC, \ - NULL, bits_image_fetch_separable_convolution_affine_ ## name, NULL \ - }, - -#define BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ - { PIXMAN_ ## format, \ - GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ - ITER_NARROW | ITER_SRC, \ - NULL, bits_image_fetch_bilinear_affine_ ## name, NULL, \ - }, - -#define NEAREST_AFFINE_FAST_PATH(name, format, repeat) \ - { PIXMAN_ ## format, \ - GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ - ITER_NARROW | ITER_SRC, \ - NULL, bits_image_fetch_nearest_affine_ ## name, NULL \ - }, - -#define AFFINE_FAST_PATHS(name, format, repeat) \ - NEAREST_AFFINE_FAST_PATH(name, format, repeat) \ - BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ - SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) - - AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD) - AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE) - AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT) - AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL) - AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD) - AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE) - AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT) - AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL) - AFFINE_FAST_PATHS (pad_a8, a8, PAD) - AFFINE_FAST_PATHS (none_a8, a8, NONE) - AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT) - AFFINE_FAST_PATHS (normal_a8, a8, NORMAL) - AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD) - AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE) - AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT) - AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL) - - { PIXMAN_null }, -}; - -pixman_implementation_t * -_pixman_implementation_create_fast_path (pixman_implementation_t *fallback) -{ - pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); - - imp->fill = fast_path_fill; - imp->iter_info = fast_iters; - - return imp; -} diff --git a/vendor/pixman/pixman/pixman-filter.c b/vendor/pixman/pixman/pixman-filter.c deleted file mode 100644 index 33327df83..000000000 --- a/vendor/pixman/pixman/pixman-filter.c +++ /dev/null @@ -1,491 +0,0 @@ -/* - * Copyright 2012, Red Hat, Inc. - * Copyright 2012, Soren Sandmann - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Soren Sandmann - */ -#include -#include -#include -#include -#include -#ifdef HAVE_CONFIG_H -#include -#endif -#include "pixman-private.h" - -typedef double (* kernel_func_t) (double x); - -typedef struct -{ - pixman_kernel_t kernel; - kernel_func_t func; - double width; -} filter_info_t; - -static double -impulse_kernel (double x) -{ - return (x == 0.0)? 1.0 : 0.0; -} - -static double -box_kernel (double x) -{ - return 1; -} - -static double -linear_kernel (double x) -{ - return 1 - fabs (x); -} - -static double -gaussian_kernel (double x) -{ -#define SQRT2 (1.4142135623730950488016887242096980785696718753769480) -#define SIGMA (SQRT2 / 2.0) - - return exp (- x * x / (2 * SIGMA * SIGMA)) / (SIGMA * sqrt (2.0 * M_PI)); -} - -static double -sinc (double x) -{ - if (x == 0.0) - return 1.0; - else - return sin (M_PI * x) / (M_PI * x); -} - -static double -lanczos (double x, int n) -{ - return sinc (x) * sinc (x * (1.0 / n)); -} - -static double -lanczos2_kernel (double x) -{ - return lanczos (x, 2); -} - -static double -lanczos3_kernel (double x) -{ - return lanczos (x, 3); -} - -static double -nice_kernel (double x) -{ - return lanczos3_kernel (x * 0.75); -} - -static double -general_cubic (double x, double B, double C) -{ - double ax = fabs(x); - - if (ax < 1) - { - return (((12 - 9 * B - 6 * C) * ax + - (-18 + 12 * B + 6 * C)) * ax * ax + - (6 - 2 * B)) / 6; - } - else if (ax < 2) - { - return ((((-B - 6 * C) * ax + - (6 * B + 30 * C)) * ax + - (-12 * B - 48 * C)) * ax + - (8 * B + 24 * C)) / 6; - } - else - { - return 0; - } -} - -static double -cubic_kernel (double x) -{ - /* This is the Mitchell-Netravali filter. - * - * (0.0, 0.5) would give us the Catmull-Rom spline, - * but that one seems to be indistinguishable from Lanczos2. - */ - return general_cubic (x, 1/3.0, 1/3.0); -} - -static const filter_info_t filters[] = -{ - { PIXMAN_KERNEL_IMPULSE, impulse_kernel, 0.0 }, - { PIXMAN_KERNEL_BOX, box_kernel, 1.0 }, - { PIXMAN_KERNEL_LINEAR, linear_kernel, 2.0 }, - { PIXMAN_KERNEL_CUBIC, cubic_kernel, 4.0 }, - { PIXMAN_KERNEL_GAUSSIAN, gaussian_kernel, 5.0 }, - { PIXMAN_KERNEL_LANCZOS2, lanczos2_kernel, 4.0 }, - { PIXMAN_KERNEL_LANCZOS3, lanczos3_kernel, 6.0 }, - { PIXMAN_KERNEL_LANCZOS3_STRETCHED, nice_kernel, 8.0 }, -}; - -/* This function scales @kernel2 by @scale, then - * aligns @x1 in @kernel1 with @x2 in @kernel2 and - * and integrates the product of the kernels across @width. - * - * This function assumes that the intervals are within - * the kernels in question. E.g., the caller must not - * try to integrate a linear kernel ouside of [-1:1] - */ -static double -integral (pixman_kernel_t kernel1, double x1, - pixman_kernel_t kernel2, double scale, double x2, - double width) -{ - if (kernel1 == PIXMAN_KERNEL_BOX && kernel2 == PIXMAN_KERNEL_BOX) - { - return width; - } - /* The LINEAR filter is not differentiable at 0, so if the - * integration interval crosses zero, break it into two - * separate integrals. - */ - else if (kernel1 == PIXMAN_KERNEL_LINEAR && x1 < 0 && x1 + width > 0) - { - return - integral (kernel1, x1, kernel2, scale, x2, - x1) + - integral (kernel1, 0, kernel2, scale, x2 - x1, width + x1); - } - else if (kernel2 == PIXMAN_KERNEL_LINEAR && x2 < 0 && x2 + width > 0) - { - return - integral (kernel1, x1, kernel2, scale, x2, - x2) + - integral (kernel1, x1 - x2, kernel2, scale, 0, width + x2); - } - else if (kernel1 == PIXMAN_KERNEL_IMPULSE) - { - assert (width == 0.0); - return filters[kernel2].func (x2 * scale); - } - else if (kernel2 == PIXMAN_KERNEL_IMPULSE) - { - assert (width == 0.0); - return filters[kernel1].func (x1); - } - else - { - /* Integration via Simpson's rule - * See http://www.intmath.com/integration/6-simpsons-rule.php - * 12 segments (6 cubic approximations) seems to produce best - * result for lanczos3.linear, which was the combination that - * showed the most errors. This makes sense as the lanczos3 - * filter is 6 wide. - */ -#define N_SEGMENTS 12 -#define SAMPLE(a1, a2) \ - (filters[kernel1].func ((a1)) * filters[kernel2].func ((a2) * scale)) - - double s = 0.0; - double h = width / N_SEGMENTS; - int i; - - s = SAMPLE (x1, x2); - - for (i = 1; i < N_SEGMENTS; i += 2) - { - double a1 = x1 + h * i; - double a2 = x2 + h * i; - s += 4 * SAMPLE (a1, a2); - } - - for (i = 2; i < N_SEGMENTS; i += 2) - { - double a1 = x1 + h * i; - double a2 = x2 + h * i; - s += 2 * SAMPLE (a1, a2); - } - - s += SAMPLE (x1 + width, x2 + width); - - return h * s * (1.0 / 3.0); - } -} - -static void -create_1d_filter (int width, - pixman_kernel_t reconstruct, - pixman_kernel_t sample, - double scale, - int n_phases, - pixman_fixed_t *pstart, - pixman_fixed_t *pend - ) -{ - pixman_fixed_t *p = pstart; - double step; - int i; - if(width <= 0) return; - step = 1.0 / n_phases; - - for (i = 0; i < n_phases; ++i) - { - double frac = step / 2.0 + i * step; - pixman_fixed_t new_total; - int x, x1, x2; - double total, e; - - /* Sample convolution of reconstruction and sampling - * filter. See rounding.txt regarding the rounding - * and sample positions. - */ - - x1 = ceil (frac - width / 2.0 - 0.5); - x2 = x1 + width; - assert( p >= pstart && p + (x2 - x1) <= pend ); /* assert validity of the following loop */ - total = 0; - for (x = x1; x < x2; ++x) - { - double pos = x + 0.5 - frac; - double rlow = - filters[reconstruct].width / 2.0; - double rhigh = rlow + filters[reconstruct].width; - double slow = pos - scale * filters[sample].width / 2.0; - double shigh = slow + scale * filters[sample].width; - double c = 0.0; - double ilow, ihigh; - - if (rhigh >= slow && rlow <= shigh) - { - ilow = MAX (slow, rlow); - ihigh = MIN (shigh, rhigh); - - c = integral (reconstruct, ilow, - sample, 1.0 / scale, ilow - pos, - ihigh - ilow); - } - - *p = (pixman_fixed_t)floor (c * 65536.0 + 0.5); - total += *p; - p++; - } - - /* Normalize, with error diffusion */ - p -= width; - assert(p >= pstart && p + (x2 - x1) <= pend); /* assert validity of the following loop */ - - total = 65536.0 / total; - new_total = 0; - e = 0.0; - for (x = x1; x < x2; ++x) - { - double v = (*p) * total + e; - pixman_fixed_t t = floor (v + 0.5); - - e = v - t; - new_total += t; - *p++ = t; - } - - /* pixman_fixed_e's worth of error may remain; put it - * at the first sample, since that is the only one that - * hasn't had any error diffused into it. - */ - - assert(p - width >= pstart && p - width < pend); /* assert... */ - *(p - width) += pixman_fixed_1 - new_total; - } -} - - -static int -filter_width (pixman_kernel_t reconstruct, pixman_kernel_t sample, double size) -{ - return ceil (filters[reconstruct].width + size * filters[sample].width); -} - -#ifdef PIXMAN_GNUPLOT - -/* If enable-gnuplot is configured, then you can pipe the output of a - * pixman-using program to gnuplot and get a continuously-updated plot - * of the horizontal filter. This works well with demos/scale to test - * the filter generation. - * - * The plot is all the different subposition filters shuffled - * together. This is misleading in a few cases: - * - * IMPULSE.BOX - goes up and down as the subfilters have different - * numbers of non-zero samples - * IMPULSE.TRIANGLE - somewhat crooked for the same reason - * 1-wide filters - looks triangular, but a 1-wide box would be more - * accurate - */ -static void -gnuplot_filter (int width, int n_phases, const pixman_fixed_t* p) -{ - double step; - int i, j; - int first; - - step = 1.0 / n_phases; - - printf ("set style line 1 lc rgb '#0060ad' lt 1 lw 0.5 pt 7 pi 1 ps 0.5\n"); - printf ("plot [x=%g:%g] '-' with linespoints ls 1\n", -width*0.5, width*0.5); - /* Print a point at the origin so that y==0 line is included: */ - printf ("0 0\n\n"); - - /* The position of the first sample of the phase corresponding to - * frac is given by: - * - * ceil (frac - width / 2.0 - 0.5) + 0.5 - frac - * - * We have to find the frac that minimizes this expression. - * - * For odd widths, we have - * - * ceil (frac - width / 2.0 - 0.5) + 0.5 - frac - * = ceil (frac) + K - frac - * = 1 + K - frac - * - * for some K, so this is minimized when frac is maximized and - * strictly growing with frac. So for odd widths, we can simply - * start at the last phase and go backwards. - * - * For even widths, we have - * - * ceil (frac - width / 2.0 - 0.5) + 0.5 - frac - * = ceil (frac - 0.5) + K - frac - * - * The graph for this function (ignoring K) looks like this: - * - * 0.5 - * | |\ - * | | \ - * | | \ - * 0 | | \ - * |\ | - * | \ | - * | \ | - * -0.5 | \| - * --------------------------------- - * 0 0.5 1 - * - * So in this case we need to start with the phase whose frac is - * less than, but as close as possible to 0.5, then go backwards - * until we hit the first phase, then wrap around to the last - * phase and continue backwards. - * - * Which phase is as close as possible 0.5? The locations of the - * sampling point corresponding to the kth phase is given by - * 1/(2 * n_phases) + k / n_phases: - * - * 1/(2 * n_phases) + k / n_phases = 0.5 - * - * from which it follows that - * - * k = (n_phases - 1) / 2 - * - * rounded down is the phase in question. - */ - if (width & 1) - first = n_phases - 1; - else - first = (n_phases - 1) / 2; - - for (j = 0; j < width; ++j) - { - for (i = 0; i < n_phases; ++i) - { - int phase = first - i; - double frac, pos; - - if (phase < 0) - phase = n_phases + phase; - - frac = step / 2.0 + phase * step; - pos = ceil (frac - width / 2.0 - 0.5) + 0.5 - frac + j; - - printf ("%g %g\n", - pos, - pixman_fixed_to_double (*(p + phase * width + j))); - } - } - - printf ("e\n"); - fflush (stdout); -} - -#endif - -/* Create the parameter list for a SEPARABLE_CONVOLUTION filter - * with the given kernels and scale parameters - */ -PIXMAN_EXPORT pixman_fixed_t * -pixman_filter_create_separable_convolution (int *n_values, - pixman_fixed_t scale_x, - pixman_fixed_t scale_y, - pixman_kernel_t reconstruct_x, - pixman_kernel_t reconstruct_y, - pixman_kernel_t sample_x, - pixman_kernel_t sample_y, - int subsample_bits_x, - int subsample_bits_y) -{ - double sx = fabs (pixman_fixed_to_double (scale_x)); - double sy = fabs (pixman_fixed_to_double (scale_y)); - pixman_fixed_t *params; - int subsample_x, subsample_y; - int width, height; - - width = filter_width (reconstruct_x, sample_x, sx); - subsample_x = (1 << subsample_bits_x); - - height = filter_width (reconstruct_y, sample_y, sy); - subsample_y = (1 << subsample_bits_y); - - *n_values = 4 + width * subsample_x + height * subsample_y; - - params = malloc (*n_values * sizeof (pixman_fixed_t)); - if (!params) - return NULL; - - params[0] = pixman_int_to_fixed (width); - params[1] = pixman_int_to_fixed (height); - params[2] = pixman_int_to_fixed (subsample_bits_x); - params[3] = pixman_int_to_fixed (subsample_bits_y); - - { - pixman_fixed_t - *xparams = params+4, - *yparams = xparams + width*subsample_x, - *endparams = params + *n_values; - create_1d_filter(width, reconstruct_x, sample_x, sx, subsample_x, - xparams, yparams); - create_1d_filter(height, reconstruct_y, sample_y, sy, subsample_y, - yparams, endparams); - } - -#ifdef PIXMAN_GNUPLOT - gnuplot_filter(width, subsample_x, params + 4); -#endif - - return params; -} diff --git a/vendor/pixman/pixman/pixman-general.c b/vendor/pixman/pixman/pixman-general.c deleted file mode 100644 index b4450cbec..000000000 --- a/vendor/pixman/pixman/pixman-general.c +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Copyright © 2009 Red Hat, Inc. - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. - * 2005 Lars Knoll & Zack Rusin, Trolltech - * 2008 Aaron Plattner, NVIDIA Corporation - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Red Hat not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. Red Hat makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - */ -#ifdef HAVE_CONFIG_H -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include "pixman-private.h" - -static void -general_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *info) -{ - pixman_image_t *image = iter->image; - - switch (image->type) - { - case BITS: - if ((iter->iter_flags & ITER_SRC) == ITER_SRC) - _pixman_bits_image_src_iter_init (image, iter); - else - _pixman_bits_image_dest_iter_init (image, iter); - break; - - case LINEAR: - _pixman_linear_gradient_iter_init (image, iter); - break; - - case RADIAL: - _pixman_radial_gradient_iter_init (image, iter); - break; - - case CONICAL: - _pixman_conical_gradient_iter_init (image, iter); - break; - - case SOLID: - _pixman_log_error (FUNC, "Solid image not handled by noop"); - break; - - default: - _pixman_log_error (FUNC, "Pixman bug: unknown image type\n"); - break; - } -} - -static const pixman_iter_info_t general_iters[] = -{ - { PIXMAN_any, 0, 0, general_iter_init, NULL, NULL }, - { PIXMAN_null }, -}; - -typedef struct op_info_t op_info_t; -struct op_info_t -{ - uint8_t src, dst; -}; - -#define ITER_IGNORE_BOTH \ - (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_LOCALIZED_ALPHA) - -static const op_info_t op_flags[PIXMAN_N_OPERATORS] = -{ - /* Src Dst */ - { ITER_IGNORE_BOTH, ITER_IGNORE_BOTH }, /* CLEAR */ - { ITER_LOCALIZED_ALPHA, ITER_IGNORE_BOTH }, /* SRC */ - { ITER_IGNORE_BOTH, ITER_LOCALIZED_ALPHA }, /* DST */ - { 0, ITER_LOCALIZED_ALPHA }, /* OVER */ - { ITER_LOCALIZED_ALPHA, 0 }, /* OVER_REVERSE */ - { ITER_LOCALIZED_ALPHA, ITER_IGNORE_RGB }, /* IN */ - { ITER_IGNORE_RGB, ITER_LOCALIZED_ALPHA }, /* IN_REVERSE */ - { ITER_LOCALIZED_ALPHA, ITER_IGNORE_RGB }, /* OUT */ - { ITER_IGNORE_RGB, ITER_LOCALIZED_ALPHA }, /* OUT_REVERSE */ - { 0, 0 }, /* ATOP */ - { 0, 0 }, /* ATOP_REVERSE */ - { 0, 0 }, /* XOR */ - { ITER_LOCALIZED_ALPHA, ITER_LOCALIZED_ALPHA }, /* ADD */ - { 0, 0 }, /* SATURATE */ -}; - -#define SCANLINE_BUFFER_LENGTH 8192 - -static pixman_bool_t -operator_needs_division (pixman_op_t op) -{ - static const uint8_t needs_division[] = - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, /* SATURATE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, /* DISJOINT */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, /* CONJOINT */ - 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, /* blend ops */ - }; - - return needs_division[op]; -} - -static void -general_composite_rect (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint8_t stack_scanline_buffer[3 * SCANLINE_BUFFER_LENGTH]; - uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer; - uint8_t *src_buffer, *mask_buffer, *dest_buffer; - pixman_iter_t src_iter, mask_iter, dest_iter; - pixman_combine_32_func_t compose; - pixman_bool_t component_alpha; - iter_flags_t width_flag, src_iter_flags; - int Bpp; - int i; - - if ((src_image->common.flags & FAST_PATH_NARROW_FORMAT) && - (!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT) && - (dest_image->common.flags & FAST_PATH_NARROW_FORMAT) && - !(operator_needs_division (op)) && - (dest_image->bits.dither == PIXMAN_DITHER_NONE)) - { - width_flag = ITER_NARROW; - Bpp = 4; - } - else - { - width_flag = ITER_WIDE; - Bpp = 16; - } - -#define ALIGN(addr) \ - ((uint8_t *)((((uintptr_t)(addr)) + 15) & (~15))) - - if (width <= 0 || _pixman_multiply_overflows_int (width, Bpp * 3)) - return; - - if (width * Bpp * 3 > sizeof (stack_scanline_buffer) - 15 * 3) - { - scanline_buffer = pixman_malloc_ab_plus_c (width, Bpp * 3, 15 * 3); - - if (!scanline_buffer) - return; - - memset (scanline_buffer, 0, width * Bpp * 3 + 15 * 3); - } - else - { - memset (stack_scanline_buffer, 0, sizeof (stack_scanline_buffer)); - } - - src_buffer = ALIGN (scanline_buffer); - mask_buffer = ALIGN (src_buffer + width * Bpp); - dest_buffer = ALIGN (mask_buffer + width * Bpp); - - if (width_flag == ITER_WIDE) - { - /* To make sure there aren't any NANs in the buffers */ - memset (src_buffer, 0, width * Bpp); - memset (mask_buffer, 0, width * Bpp); - memset (dest_buffer, 0, width * Bpp); - } - - /* src iter */ - src_iter_flags = width_flag | op_flags[op].src | ITER_SRC; - - _pixman_implementation_iter_init (imp->toplevel, &src_iter, src_image, - src_x, src_y, width, height, - src_buffer, src_iter_flags, - info->src_flags); - - /* mask iter */ - if ((src_iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) == - (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) - { - /* If it doesn't matter what the source is, then it doesn't matter - * what the mask is - */ - mask_image = NULL; - } - - component_alpha = mask_image && mask_image->common.component_alpha; - - _pixman_implementation_iter_init ( - imp->toplevel, &mask_iter, - mask_image, mask_x, mask_y, width, height, mask_buffer, - ITER_SRC | width_flag | (component_alpha? 0 : ITER_IGNORE_RGB), - info->mask_flags); - - /* dest iter */ - _pixman_implementation_iter_init ( - imp->toplevel, &dest_iter, dest_image, dest_x, dest_y, width, height, - dest_buffer, ITER_DEST | width_flag | op_flags[op].dst, info->dest_flags); - - compose = _pixman_implementation_lookup_combiner ( - imp->toplevel, op, component_alpha, width_flag != ITER_WIDE); - - for (i = 0; i < height; ++i) - { - uint32_t *s, *m, *d; - - m = mask_iter.get_scanline (&mask_iter, NULL); - s = src_iter.get_scanline (&src_iter, m); - d = dest_iter.get_scanline (&dest_iter, NULL); - - compose (imp->toplevel, op, d, s, m, width); - - dest_iter.write_back (&dest_iter); - } - - if (src_iter.fini) - src_iter.fini (&src_iter); - if (mask_iter.fini) - mask_iter.fini (&mask_iter); - if (dest_iter.fini) - dest_iter.fini (&dest_iter); - - if (scanline_buffer != (uint8_t *) stack_scanline_buffer) - free (scanline_buffer); -} - -static const pixman_fast_path_t general_fast_path[] = -{ - { PIXMAN_OP_any, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, general_composite_rect }, - { PIXMAN_OP_NONE } -}; - -pixman_implementation_t * -_pixman_implementation_create_general (void) -{ - pixman_implementation_t *imp = _pixman_implementation_create (NULL, general_fast_path); - - _pixman_setup_combiner_functions_32 (imp); - _pixman_setup_combiner_functions_float (imp); - - imp->iter_info = general_iters; - - return imp; -} - diff --git a/vendor/pixman/pixman/pixman-glyph.c b/vendor/pixman/pixman/pixman-glyph.c deleted file mode 100644 index dc9041180..000000000 --- a/vendor/pixman/pixman/pixman-glyph.c +++ /dev/null @@ -1,676 +0,0 @@ -/* - * Copyright 2010, 2012, Soren Sandmann - * Copyright 2010, 2011, 2012, Red Hat, Inc - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Soren Sandmann - */ - -#ifdef HAVE_CONFIG_H -#include -#endif -#include "pixman-private.h" - -#include - -typedef struct glyph_metrics_t glyph_metrics_t; -typedef struct glyph_t glyph_t; - -#define TOMBSTONE ((glyph_t *)0x1) - -/* XXX: These numbers are arbitrary---we've never done any measurements. - */ -#define N_GLYPHS_HIGH_WATER (16384) -#define N_GLYPHS_LOW_WATER (8192) -#define HASH_SIZE (2 * N_GLYPHS_HIGH_WATER) -#define HASH_MASK (HASH_SIZE - 1) - -struct glyph_t -{ - void * font_key; - void * glyph_key; - int origin_x; - int origin_y; - pixman_image_t * image; - pixman_link_t mru_link; -}; - -struct pixman_glyph_cache_t -{ - int n_glyphs; - int n_tombstones; - int freeze_count; - pixman_list_t mru; - glyph_t * glyphs[HASH_SIZE]; -}; - -static void -free_glyph (glyph_t *glyph) -{ - pixman_list_unlink (&glyph->mru_link); - pixman_image_unref (glyph->image); - free (glyph); -} - -static unsigned int -hash (const void *font_key, const void *glyph_key) -{ - size_t key = (size_t)font_key + (size_t)glyph_key; - - /* This hash function is based on one found on Thomas Wang's - * web page at - * - * http://www.concentric.net/~Ttwang/tech/inthash.htm - * - */ - key = (key << 15) - key - 1; - key = key ^ (key >> 12); - key = key + (key << 2); - key = key ^ (key >> 4); - key = key + (key << 3) + (key << 11); - key = key ^ (key >> 16); - - return key; -} - -static glyph_t * -lookup_glyph (pixman_glyph_cache_t *cache, - void *font_key, - void *glyph_key) -{ - unsigned idx; - glyph_t *g; - - idx = hash (font_key, glyph_key); - while ((g = cache->glyphs[idx++ & HASH_MASK])) - { - if (g != TOMBSTONE && - g->font_key == font_key && - g->glyph_key == glyph_key) - { - return g; - } - } - - return NULL; -} - -static void -insert_glyph (pixman_glyph_cache_t *cache, - glyph_t *glyph) -{ - unsigned idx; - glyph_t **loc; - - idx = hash (glyph->font_key, glyph->glyph_key); - - /* Note: we assume that there is room in the table. If there isn't, - * this will be an infinite loop. - */ - do - { - loc = &cache->glyphs[idx++ & HASH_MASK]; - } while (*loc && *loc != TOMBSTONE); - - if (*loc == TOMBSTONE) - cache->n_tombstones--; - cache->n_glyphs++; - - *loc = glyph; -} - -static void -remove_glyph (pixman_glyph_cache_t *cache, - glyph_t *glyph) -{ - unsigned idx; - - idx = hash (glyph->font_key, glyph->glyph_key); - while (cache->glyphs[idx & HASH_MASK] != glyph) - idx++; - - cache->glyphs[idx & HASH_MASK] = TOMBSTONE; - cache->n_tombstones++; - cache->n_glyphs--; - - /* Eliminate tombstones if possible */ - if (cache->glyphs[(idx + 1) & HASH_MASK] == NULL) - { - while (cache->glyphs[idx & HASH_MASK] == TOMBSTONE) - { - cache->glyphs[idx & HASH_MASK] = NULL; - cache->n_tombstones--; - idx--; - } - } -} - -static void -clear_table (pixman_glyph_cache_t *cache) -{ - int i; - - for (i = 0; i < HASH_SIZE; ++i) - { - glyph_t *glyph = cache->glyphs[i]; - - if (glyph && glyph != TOMBSTONE) - free_glyph (glyph); - - cache->glyphs[i] = NULL; - } - - cache->n_glyphs = 0; - cache->n_tombstones = 0; -} - -PIXMAN_EXPORT pixman_glyph_cache_t * -pixman_glyph_cache_create (void) -{ - pixman_glyph_cache_t *cache; - - if (!(cache = malloc (sizeof *cache))) - return NULL; - - memset (cache->glyphs, 0, sizeof (cache->glyphs)); - cache->n_glyphs = 0; - cache->n_tombstones = 0; - cache->freeze_count = 0; - - pixman_list_init (&cache->mru); - - return cache; -} - -PIXMAN_EXPORT void -pixman_glyph_cache_destroy (pixman_glyph_cache_t *cache) -{ - return_if_fail (cache->freeze_count == 0); - - clear_table (cache); - - free (cache); -} - -PIXMAN_EXPORT void -pixman_glyph_cache_freeze (pixman_glyph_cache_t *cache) -{ - cache->freeze_count++; -} - -PIXMAN_EXPORT void -pixman_glyph_cache_thaw (pixman_glyph_cache_t *cache) -{ - if (--cache->freeze_count == 0 && - cache->n_glyphs + cache->n_tombstones > N_GLYPHS_HIGH_WATER) - { - if (cache->n_tombstones > N_GLYPHS_HIGH_WATER) - { - /* More than half the entries are - * tombstones. Just dump the whole table. - */ - clear_table (cache); - } - - while (cache->n_glyphs > N_GLYPHS_LOW_WATER) - { - glyph_t *glyph = CONTAINER_OF (glyph_t, mru_link, cache->mru.tail); - - remove_glyph (cache, glyph); - free_glyph (glyph); - } - } -} - -PIXMAN_EXPORT const void * -pixman_glyph_cache_lookup (pixman_glyph_cache_t *cache, - void *font_key, - void *glyph_key) -{ - return lookup_glyph (cache, font_key, glyph_key); -} - -PIXMAN_EXPORT const void * -pixman_glyph_cache_insert (pixman_glyph_cache_t *cache, - void *font_key, - void *glyph_key, - int origin_x, - int origin_y, - pixman_image_t *image) -{ - glyph_t *glyph; - int32_t width, height; - - return_val_if_fail (cache->freeze_count > 0, NULL); - return_val_if_fail (image->type == BITS, NULL); - - width = image->bits.width; - height = image->bits.height; - - if (cache->n_glyphs >= HASH_SIZE) - return NULL; - - if (!(glyph = malloc (sizeof *glyph))) - return NULL; - - glyph->font_key = font_key; - glyph->glyph_key = glyph_key; - glyph->origin_x = origin_x; - glyph->origin_y = origin_y; - - if (!(glyph->image = pixman_image_create_bits ( - image->bits.format, width, height, NULL, -1))) - { - free (glyph); - return NULL; - } - - pixman_image_composite32 (PIXMAN_OP_SRC, - image, NULL, glyph->image, 0, 0, 0, 0, 0, 0, - width, height); - - if (PIXMAN_FORMAT_A (glyph->image->bits.format) != 0 && - PIXMAN_FORMAT_RGB (glyph->image->bits.format) != 0) - { - pixman_image_set_component_alpha (glyph->image, TRUE); - } - - pixman_list_prepend (&cache->mru, &glyph->mru_link); - - _pixman_image_validate (glyph->image); - insert_glyph (cache, glyph); - - return glyph; -} - -PIXMAN_EXPORT void -pixman_glyph_cache_remove (pixman_glyph_cache_t *cache, - void *font_key, - void *glyph_key) -{ - glyph_t *glyph; - - if ((glyph = lookup_glyph (cache, font_key, glyph_key))) - { - remove_glyph (cache, glyph); - - free_glyph (glyph); - } -} - -PIXMAN_EXPORT void -pixman_glyph_get_extents (pixman_glyph_cache_t *cache, - int n_glyphs, - pixman_glyph_t *glyphs, - pixman_box32_t *extents) -{ - int i; - - extents->x1 = extents->y1 = INT32_MAX; - extents->x2 = extents->y2 = INT32_MIN; - - for (i = 0; i < n_glyphs; ++i) - { - glyph_t *glyph = (glyph_t *)glyphs[i].glyph; - int x1, y1, x2, y2; - - x1 = glyphs[i].x - glyph->origin_x; - y1 = glyphs[i].y - glyph->origin_y; - x2 = glyphs[i].x - glyph->origin_x + glyph->image->bits.width; - y2 = glyphs[i].y - glyph->origin_y + glyph->image->bits.height; - - if (x1 < extents->x1) - extents->x1 = x1; - if (y1 < extents->y1) - extents->y1 = y1; - if (x2 > extents->x2) - extents->x2 = x2; - if (y2 > extents->y2) - extents->y2 = y2; - } -} - -/* This function returns a format that is suitable for use as a mask for the - * set of glyphs in question. - */ -PIXMAN_EXPORT pixman_format_code_t -pixman_glyph_get_mask_format (pixman_glyph_cache_t *cache, - int n_glyphs, - const pixman_glyph_t *glyphs) -{ - pixman_format_code_t format = PIXMAN_a1; - int i; - - for (i = 0; i < n_glyphs; ++i) - { - const glyph_t *glyph = glyphs[i].glyph; - pixman_format_code_t glyph_format = glyph->image->bits.format; - - if (PIXMAN_FORMAT_TYPE (glyph_format) == PIXMAN_TYPE_A) - { - if (PIXMAN_FORMAT_A (glyph_format) > PIXMAN_FORMAT_A (format)) - format = glyph_format; - } - else - { - return PIXMAN_a8r8g8b8; - } - } - - return format; -} - -static pixman_bool_t -box32_intersect (pixman_box32_t *dest, - const pixman_box32_t *box1, - const pixman_box32_t *box2) -{ - dest->x1 = MAX (box1->x1, box2->x1); - dest->y1 = MAX (box1->y1, box2->y1); - dest->x2 = MIN (box1->x2, box2->x2); - dest->y2 = MIN (box1->y2, box2->y2); - - return dest->x2 > dest->x1 && dest->y2 > dest->y1; -} - -#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) -__attribute__((__force_align_arg_pointer__)) -#endif -PIXMAN_EXPORT void -pixman_composite_glyphs_no_mask (pixman_op_t op, - pixman_image_t *src, - pixman_image_t *dest, - int32_t src_x, - int32_t src_y, - int32_t dest_x, - int32_t dest_y, - pixman_glyph_cache_t *cache, - int n_glyphs, - const pixman_glyph_t *glyphs) -{ - pixman_region32_t region; - pixman_format_code_t glyph_format = PIXMAN_null; - uint32_t glyph_flags = 0; - pixman_format_code_t dest_format; - uint32_t dest_flags; - pixman_composite_func_t func = NULL; - pixman_implementation_t *implementation = NULL; - pixman_composite_info_t info; - int i; - - _pixman_image_validate (src); - _pixman_image_validate (dest); - - dest_format = dest->common.extended_format_code; - dest_flags = dest->common.flags; - - pixman_region32_init (®ion); - if (!_pixman_compute_composite_region32 ( - ®ion, - src, NULL, dest, - src_x - dest_x, src_y - dest_y, 0, 0, 0, 0, - dest->bits.width, dest->bits.height)) - { - goto out; - } - - info.op = op; - info.src_image = src; - info.dest_image = dest; - info.src_flags = src->common.flags; - info.dest_flags = dest->common.flags; - - for (i = 0; i < n_glyphs; ++i) - { - glyph_t *glyph = (glyph_t *)glyphs[i].glyph; - pixman_image_t *glyph_img = glyph->image; - pixman_box32_t glyph_box; - pixman_box32_t *pbox; - uint32_t extra = FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; - pixman_box32_t composite_box; - int n; - - glyph_box.x1 = dest_x + glyphs[i].x - glyph->origin_x; - glyph_box.y1 = dest_y + glyphs[i].y - glyph->origin_y; - glyph_box.x2 = glyph_box.x1 + glyph->image->bits.width; - glyph_box.y2 = glyph_box.y1 + glyph->image->bits.height; - - pbox = pixman_region32_rectangles (®ion, &n); - - info.mask_image = glyph_img; - - while (n--) - { - if (box32_intersect (&composite_box, pbox, &glyph_box)) - { - if (glyph_img->common.extended_format_code != glyph_format || - glyph_img->common.flags != glyph_flags) - { - glyph_format = glyph_img->common.extended_format_code; - glyph_flags = glyph_img->common.flags; - - _pixman_implementation_lookup_composite ( - get_implementation(), op, - src->common.extended_format_code, src->common.flags, - glyph_format, glyph_flags | extra, - dest_format, dest_flags, - &implementation, &func); - } - - info.src_x = src_x + composite_box.x1 - dest_x; - info.src_y = src_y + composite_box.y1 - dest_y; - info.mask_x = composite_box.x1 - (dest_x + glyphs[i].x - glyph->origin_x); - info.mask_y = composite_box.y1 - (dest_y + glyphs[i].y - glyph->origin_y); - info.dest_x = composite_box.x1; - info.dest_y = composite_box.y1; - info.width = composite_box.x2 - composite_box.x1; - info.height = composite_box.y2 - composite_box.y1; - - info.mask_flags = glyph_flags; - - func (implementation, &info); - } - - pbox++; - } - pixman_list_move_to_front (&cache->mru, &glyph->mru_link); - } - -out: - pixman_region32_fini (®ion); -} - -static void -add_glyphs (pixman_glyph_cache_t *cache, - pixman_image_t *dest, - int off_x, int off_y, - int n_glyphs, const pixman_glyph_t *glyphs) -{ - pixman_format_code_t glyph_format = PIXMAN_null; - uint32_t glyph_flags = 0; - pixman_composite_func_t func = NULL; - pixman_implementation_t *implementation = NULL; - pixman_format_code_t dest_format; - uint32_t dest_flags; - pixman_box32_t dest_box; - pixman_composite_info_t info; - pixman_image_t *white_img = NULL; - pixman_bool_t white_src = FALSE; - int i; - - _pixman_image_validate (dest); - - dest_format = dest->common.extended_format_code; - dest_flags = dest->common.flags; - - info.op = PIXMAN_OP_ADD; - info.dest_image = dest; - info.src_x = 0; - info.src_y = 0; - info.dest_flags = dest_flags; - - dest_box.x1 = 0; - dest_box.y1 = 0; - dest_box.x2 = dest->bits.width; - dest_box.y2 = dest->bits.height; - - for (i = 0; i < n_glyphs; ++i) - { - glyph_t *glyph = (glyph_t *)glyphs[i].glyph; - pixman_image_t *glyph_img = glyph->image; - pixman_box32_t glyph_box; - pixman_box32_t composite_box; - - if (glyph_img->common.extended_format_code != glyph_format || - glyph_img->common.flags != glyph_flags) - { - pixman_format_code_t src_format, mask_format; - - glyph_format = glyph_img->common.extended_format_code; - glyph_flags = glyph_img->common.flags; - - if (glyph_format == dest->bits.format) - { - src_format = glyph_format; - mask_format = PIXMAN_null; - info.src_flags = glyph_flags | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; - info.mask_flags = FAST_PATH_IS_OPAQUE; - info.mask_image = NULL; - white_src = FALSE; - } - else - { - if (!white_img) - { - static const pixman_color_t white = { 0xffff, 0xffff, 0xffff, 0xffff }; - - if (!(white_img = pixman_image_create_solid_fill (&white))) - goto out; - - _pixman_image_validate (white_img); - } - - src_format = PIXMAN_solid; - mask_format = glyph_format; - info.src_flags = white_img->common.flags; - info.mask_flags = glyph_flags | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; - info.src_image = white_img; - white_src = TRUE; - } - - _pixman_implementation_lookup_composite ( - get_implementation(), PIXMAN_OP_ADD, - src_format, info.src_flags, - mask_format, info.mask_flags, - dest_format, dest_flags, - &implementation, &func); - } - - glyph_box.x1 = glyphs[i].x - glyph->origin_x + off_x; - glyph_box.y1 = glyphs[i].y - glyph->origin_y + off_y; - glyph_box.x2 = glyph_box.x1 + glyph->image->bits.width; - glyph_box.y2 = glyph_box.y1 + glyph->image->bits.height; - - if (box32_intersect (&composite_box, &glyph_box, &dest_box)) - { - int src_x = composite_box.x1 - glyph_box.x1; - int src_y = composite_box.y1 - glyph_box.y1; - - if (white_src) - info.mask_image = glyph_img; - else - info.src_image = glyph_img; - - info.mask_x = info.src_x = src_x; - info.mask_y = info.src_y = src_y; - info.dest_x = composite_box.x1; - info.dest_y = composite_box.y1; - info.width = composite_box.x2 - composite_box.x1; - info.height = composite_box.y2 - composite_box.y1; - - func (implementation, &info); - - pixman_list_move_to_front (&cache->mru, &glyph->mru_link); - } - } - -out: - if (white_img) - pixman_image_unref (white_img); -} - -/* Conceptually, for each glyph, (white IN glyph) is PIXMAN_OP_ADDed to an - * infinitely big mask image at the position such that the glyph origin point - * is positioned at the (glyphs[i].x, glyphs[i].y) point. - * - * Then (mask_x, mask_y) in the infinite mask and (src_x, src_y) in the source - * image are both aligned with (dest_x, dest_y) in the destination image. Then - * these three images are composited within the - * - * (dest_x, dest_y, dst_x + width, dst_y + height) - * - * rectangle. - * - * TODO: - * - Trim the mask to the destination clip/image? - * - Trim composite region based on sources, when the op ignores 0s. - */ -#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) -__attribute__((__force_align_arg_pointer__)) -#endif -PIXMAN_EXPORT void -pixman_composite_glyphs (pixman_op_t op, - pixman_image_t *src, - pixman_image_t *dest, - pixman_format_code_t mask_format, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height, - pixman_glyph_cache_t *cache, - int n_glyphs, - const pixman_glyph_t *glyphs) -{ - pixman_image_t *mask; - - if (!(mask = pixman_image_create_bits (mask_format, width, height, NULL, -1))) - return; - - if (PIXMAN_FORMAT_A (mask_format) != 0 && - PIXMAN_FORMAT_RGB (mask_format) != 0) - { - pixman_image_set_component_alpha (mask, TRUE); - } - - add_glyphs (cache, mask, - mask_x, - mask_y, n_glyphs, glyphs); - - pixman_image_composite32 (op, src, mask, dest, - src_x, src_y, - 0, 0, - dest_x, dest_y, - width, height); - - pixman_image_unref (mask); -} diff --git a/vendor/pixman/pixman/pixman-gradient-walker.c b/vendor/pixman/pixman/pixman-gradient-walker.c deleted file mode 100644 index b31d5ad7a..000000000 --- a/vendor/pixman/pixman/pixman-gradient-walker.c +++ /dev/null @@ -1,264 +0,0 @@ -/* - * - * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. - * 2005 Lars Knoll & Zack Rusin, Trolltech - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif -#include "pixman-private.h" - -void -_pixman_gradient_walker_init (pixman_gradient_walker_t *walker, - gradient_t * gradient, - pixman_repeat_t repeat) -{ - walker->num_stops = gradient->n_stops; - walker->stops = gradient->stops; - walker->left_x = 0; - walker->right_x = 0x10000; - walker->a_s = 0.0f; - walker->a_b = 0.0f; - walker->r_s = 0.0f; - walker->r_b = 0.0f; - walker->g_s = 0.0f; - walker->g_b = 0.0f; - walker->b_s = 0.0f; - walker->b_b = 0.0f; - walker->repeat = repeat; - - walker->need_reset = TRUE; -} - -static void -gradient_walker_reset (pixman_gradient_walker_t *walker, - pixman_fixed_48_16_t pos) -{ - int64_t x, left_x, right_x; - pixman_color_t *left_c, *right_c; - int n, count = walker->num_stops; - pixman_gradient_stop_t *stops = walker->stops; - float la, lr, lg, lb; - float ra, rr, rg, rb; - float lx, rx; - - if (walker->repeat == PIXMAN_REPEAT_NORMAL) - { - x = (int32_t)pos & 0xffff; - } - else if (walker->repeat == PIXMAN_REPEAT_REFLECT) - { - x = (int32_t)pos & 0xffff; - if ((int32_t)pos & 0x10000) - x = 0x10000 - x; - } - else - { - x = pos; - } - - for (n = 0; n < count; n++) - { - if (x < stops[n].x) - break; - } - - left_x = stops[n - 1].x; - left_c = &stops[n - 1].color; - - right_x = stops[n].x; - right_c = &stops[n].color; - - if (walker->repeat == PIXMAN_REPEAT_NORMAL) - { - left_x += (pos - x); - right_x += (pos - x); - } - else if (walker->repeat == PIXMAN_REPEAT_REFLECT) - { - if ((int32_t)pos & 0x10000) - { - pixman_color_t *tmp_c; - int32_t tmp_x; - - tmp_x = 0x10000 - right_x; - right_x = 0x10000 - left_x; - left_x = tmp_x; - - tmp_c = right_c; - right_c = left_c; - left_c = tmp_c; - - x = 0x10000 - x; - } - left_x += (pos - x); - right_x += (pos - x); - } - else if (walker->repeat == PIXMAN_REPEAT_NONE) - { - if (n == 0) - right_c = left_c; - else if (n == count) - left_c = right_c; - } - - /* The alpha/red/green/blue channels are scaled to be in [0, 1]. - * This ensures that after premultiplication all channels will - * be in the [0, 1] interval. - */ - la = (left_c->alpha * (1.0f/257.0f)); - lr = (left_c->red * (1.0f/257.0f)); - lg = (left_c->green * (1.0f/257.0f)); - lb = (left_c->blue * (1.0f/257.0f)); - - ra = (right_c->alpha * (1.0f/257.0f)); - rr = (right_c->red * (1.0f/257.0f)); - rg = (right_c->green * (1.0f/257.0f)); - rb = (right_c->blue * (1.0f/257.0f)); - - lx = left_x * (1.0f/65536.0f); - rx = right_x * (1.0f/65536.0f); - - if (FLOAT_IS_ZERO (rx - lx) || left_x == INT32_MIN || right_x == INT32_MAX) - { - walker->a_s = walker->r_s = walker->g_s = walker->b_s = 0.0f; - walker->a_b = (la + ra) / 510.0f; - walker->r_b = (lr + rr) / 510.0f; - walker->g_b = (lg + rg) / 510.0f; - walker->b_b = (lb + rb) / 510.0f; - } - else - { - float w_rec = 1.0f / (rx - lx); - - walker->a_b = (la * rx - ra * lx) * w_rec * (1.0f/255.0f); - walker->r_b = (lr * rx - rr * lx) * w_rec * (1.0f/255.0f); - walker->g_b = (lg * rx - rg * lx) * w_rec * (1.0f/255.0f); - walker->b_b = (lb * rx - rb * lx) * w_rec * (1.0f/255.0f); - - walker->a_s = (ra - la) * w_rec * (1.0f/255.0f); - walker->r_s = (rr - lr) * w_rec * (1.0f/255.0f); - walker->g_s = (rg - lg) * w_rec * (1.0f/255.0f); - walker->b_s = (rb - lb) * w_rec * (1.0f/255.0f); - } - - walker->left_x = left_x; - walker->right_x = right_x; - - walker->need_reset = FALSE; -} - -static argb_t -pixman_gradient_walker_pixel_float (pixman_gradient_walker_t *walker, - pixman_fixed_48_16_t x) -{ - argb_t f; - float y; - - if (walker->need_reset || x < walker->left_x || x >= walker->right_x) - gradient_walker_reset (walker, x); - - y = x * (1.0f / 65536.0f); - - f.a = walker->a_s * y + walker->a_b; - f.r = f.a * (walker->r_s * y + walker->r_b); - f.g = f.a * (walker->g_s * y + walker->g_b); - f.b = f.a * (walker->b_s * y + walker->b_b); - - return f; -} - -static uint32_t -pixman_gradient_walker_pixel_32 (pixman_gradient_walker_t *walker, - pixman_fixed_48_16_t x) -{ - argb_t f; - float y; - - if (walker->need_reset || x < walker->left_x || x >= walker->right_x) - gradient_walker_reset (walker, x); - - y = x * (1.0f / 65536.0f); - - /* Instead of [0...1] for ARGB, we want [0...255], - * multiply alpha with 255 and the color channels - * also get multiplied by the alpha multiplier. - * - * We don't use pixman_contract_from_float because it causes a 2x - * slowdown to do so, and the values are already normalized, - * so we don't have to worry about values < 0.f or > 1.f - */ - f.a = 255.f * (walker->a_s * y + walker->a_b); - f.r = f.a * (walker->r_s * y + walker->r_b); - f.g = f.a * (walker->g_s * y + walker->g_b); - f.b = f.a * (walker->b_s * y + walker->b_b); - - return (((uint32_t)(f.a + .5f) << 24) & 0xff000000) | - (((uint32_t)(f.r + .5f) << 16) & 0x00ff0000) | - (((uint32_t)(f.g + .5f) << 8) & 0x0000ff00) | - (((uint32_t)(f.b + .5f) >> 0) & 0x000000ff); -} - -void -_pixman_gradient_walker_write_narrow (pixman_gradient_walker_t *walker, - pixman_fixed_48_16_t x, - uint32_t *buffer) -{ - *buffer = pixman_gradient_walker_pixel_32 (walker, x); -} - -void -_pixman_gradient_walker_write_wide (pixman_gradient_walker_t *walker, - pixman_fixed_48_16_t x, - uint32_t *buffer) -{ - *(argb_t *)buffer = pixman_gradient_walker_pixel_float (walker, x); -} - -void -_pixman_gradient_walker_fill_narrow (pixman_gradient_walker_t *walker, - pixman_fixed_48_16_t x, - uint32_t *buffer, - uint32_t *end) -{ - register uint32_t color; - - color = pixman_gradient_walker_pixel_32 (walker, x); - while (buffer < end) - *buffer++ = color; -} - -void -_pixman_gradient_walker_fill_wide (pixman_gradient_walker_t *walker, - pixman_fixed_48_16_t x, - uint32_t *buffer, - uint32_t *end) -{ - register argb_t color; - argb_t *buffer_wide = (argb_t *)buffer; - argb_t *end_wide = (argb_t *)end; - - color = pixman_gradient_walker_pixel_float (walker, x); - while (buffer_wide < end_wide) - *buffer_wide++ = color; -} diff --git a/vendor/pixman/pixman/pixman-image.c b/vendor/pixman/pixman/pixman-image.c deleted file mode 100644 index 72796fc9c..000000000 --- a/vendor/pixman/pixman/pixman-image.c +++ /dev/null @@ -1,994 +0,0 @@ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include -#include -#include - -#include "pixman-private.h" - -static const pixman_color_t transparent_black = { 0, 0, 0, 0 }; - -static void -gradient_property_changed (pixman_image_t *image) -{ - gradient_t *gradient = &image->gradient; - int n = gradient->n_stops; - pixman_gradient_stop_t *stops = gradient->stops; - pixman_gradient_stop_t *begin = &(gradient->stops[-1]); - pixman_gradient_stop_t *end = &(gradient->stops[n]); - - switch (gradient->common.repeat) - { - default: - case PIXMAN_REPEAT_NONE: - begin->x = INT32_MIN; - begin->color = transparent_black; - end->x = INT32_MAX; - end->color = transparent_black; - break; - - case PIXMAN_REPEAT_NORMAL: - begin->x = stops[n - 1].x - pixman_fixed_1; - begin->color = stops[n - 1].color; - end->x = stops[0].x + pixman_fixed_1; - end->color = stops[0].color; - break; - - case PIXMAN_REPEAT_REFLECT: - begin->x = - stops[0].x; - begin->color = stops[0].color; - end->x = pixman_int_to_fixed (2) - stops[n - 1].x; - end->color = stops[n - 1].color; - break; - - case PIXMAN_REPEAT_PAD: - begin->x = INT32_MIN; - begin->color = stops[0].color; - end->x = INT32_MAX; - end->color = stops[n - 1].color; - break; - } -} - -pixman_bool_t -_pixman_init_gradient (gradient_t * gradient, - const pixman_gradient_stop_t *stops, - int n_stops) -{ - return_val_if_fail (n_stops > 0, FALSE); - - /* We allocate two extra stops, one before the beginning of the stop list, - * and one after the end. These stops are initialized to whatever color - * would be used for positions outside the range of the stop list. - * - * This saves a bit of computation in the gradient walker. - * - * The pointer we store in the gradient_t struct still points to the - * first user-supplied struct, so when freeing, we will have to - * subtract one. - */ - gradient->stops = - pixman_malloc_ab (n_stops + 2, sizeof (pixman_gradient_stop_t)); - if (!gradient->stops) - return FALSE; - - gradient->stops += 1; - memcpy (gradient->stops, stops, n_stops * sizeof (pixman_gradient_stop_t)); - gradient->n_stops = n_stops; - - gradient->common.property_changed = gradient_property_changed; - - return TRUE; -} - -void -_pixman_image_init (pixman_image_t *image) -{ - image_common_t *common = &image->common; - - pixman_region32_init (&common->clip_region); - - common->alpha_count = 0; - common->have_clip_region = FALSE; - common->clip_sources = FALSE; - common->transform = NULL; - common->repeat = PIXMAN_REPEAT_NONE; - common->filter = PIXMAN_FILTER_NEAREST; - common->filter_params = NULL; - common->n_filter_params = 0; - common->alpha_map = NULL; - common->component_alpha = FALSE; - common->ref_count = 1; - common->property_changed = NULL; - common->client_clip = FALSE; - common->destroy_func = NULL; - common->destroy_data = NULL; - common->dirty = TRUE; -} - -pixman_bool_t -_pixman_image_fini (pixman_image_t *image) -{ - image_common_t *common = (image_common_t *)image; - - common->ref_count--; - - if (common->ref_count == 0) - { - if (image->common.destroy_func) - image->common.destroy_func (image, image->common.destroy_data); - - pixman_region32_fini (&common->clip_region); - - free (common->transform); - free (common->filter_params); - - if (common->alpha_map) - pixman_image_unref ((pixman_image_t *)common->alpha_map); - - if (image->type == LINEAR || - image->type == RADIAL || - image->type == CONICAL) - { - if (image->gradient.stops) - { - /* See _pixman_init_gradient() for an explanation of the - 1 */ - free (image->gradient.stops - 1); - } - - /* This will trigger if someone adds a property_changed - * method to the linear/radial/conical gradient overwriting - * the general one. - */ - assert ( - image->common.property_changed == gradient_property_changed); - } - - if (image->type == BITS && image->bits.free_me) - free (image->bits.free_me); - - return TRUE; - } - - return FALSE; -} - -pixman_image_t * -_pixman_image_allocate (void) -{ - pixman_image_t *image = malloc (sizeof (pixman_image_t)); - - if (image) - _pixman_image_init (image); - - return image; -} - -static void -image_property_changed (pixman_image_t *image) -{ - image->common.dirty = TRUE; -} - -/* Ref Counting */ -PIXMAN_EXPORT pixman_image_t * -pixman_image_ref (pixman_image_t *image) -{ - image->common.ref_count++; - - return image; -} - -/* returns TRUE when the image is freed */ -PIXMAN_EXPORT pixman_bool_t -pixman_image_unref (pixman_image_t *image) -{ - if (_pixman_image_fini (image)) - { - free (image); - return TRUE; - } - - return FALSE; -} - -PIXMAN_EXPORT void -pixman_image_set_destroy_function (pixman_image_t * image, - pixman_image_destroy_func_t func, - void * data) -{ - image->common.destroy_func = func; - image->common.destroy_data = data; -} - -PIXMAN_EXPORT void * -pixman_image_get_destroy_data (pixman_image_t *image) -{ - return image->common.destroy_data; -} - -void -_pixman_image_reset_clip_region (pixman_image_t *image) -{ - image->common.have_clip_region = FALSE; -} - -/* Executive Summary: This function is a no-op that only exists - * for historical reasons. - * - * There used to be a bug in the X server where it would rely on - * out-of-bounds accesses when it was asked to composite with a - * window as the source. It would create a pixman image pointing - * to some bogus position in memory, but then set a clip region - * to the position where the actual bits were. - * - * Due to a bug in old versions of pixman, where it would not clip - * against the image bounds when a clip region was set, this would - * actually work. So when the pixman bug was fixed, a workaround was - * added to allow certain out-of-bound accesses. This function disabled - * those workarounds. - * - * Since 0.21.2, pixman doesn't do these workarounds anymore, so now - * this function is a no-op. - */ -PIXMAN_EXPORT void -pixman_disable_out_of_bounds_workaround (void) -{ -} - -static void -compute_image_info (pixman_image_t *image) -{ - pixman_format_code_t code; - uint32_t flags = 0; - - /* Transform */ - if (!image->common.transform) - { - flags |= (FAST_PATH_ID_TRANSFORM | - FAST_PATH_X_UNIT_POSITIVE | - FAST_PATH_Y_UNIT_ZERO | - FAST_PATH_AFFINE_TRANSFORM); - } - else - { - flags |= FAST_PATH_HAS_TRANSFORM; - - if (image->common.transform->matrix[2][0] == 0 && - image->common.transform->matrix[2][1] == 0 && - image->common.transform->matrix[2][2] == pixman_fixed_1) - { - flags |= FAST_PATH_AFFINE_TRANSFORM; - - if (image->common.transform->matrix[0][1] == 0 && - image->common.transform->matrix[1][0] == 0) - { - if (image->common.transform->matrix[0][0] == -pixman_fixed_1 && - image->common.transform->matrix[1][1] == -pixman_fixed_1) - { - flags |= FAST_PATH_ROTATE_180_TRANSFORM; - } - flags |= FAST_PATH_SCALE_TRANSFORM; - } - else if (image->common.transform->matrix[0][0] == 0 && - image->common.transform->matrix[1][1] == 0) - { - pixman_fixed_t m01 = image->common.transform->matrix[0][1]; - pixman_fixed_t m10 = image->common.transform->matrix[1][0]; - - if (m01 == -pixman_fixed_1 && m10 == pixman_fixed_1) - flags |= FAST_PATH_ROTATE_90_TRANSFORM; - else if (m01 == pixman_fixed_1 && m10 == -pixman_fixed_1) - flags |= FAST_PATH_ROTATE_270_TRANSFORM; - } - } - - if (image->common.transform->matrix[0][0] > 0) - flags |= FAST_PATH_X_UNIT_POSITIVE; - - if (image->common.transform->matrix[1][0] == 0) - flags |= FAST_PATH_Y_UNIT_ZERO; - } - - /* Filter */ - switch (image->common.filter) - { - case PIXMAN_FILTER_NEAREST: - case PIXMAN_FILTER_FAST: - flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER); - break; - - case PIXMAN_FILTER_BILINEAR: - case PIXMAN_FILTER_GOOD: - case PIXMAN_FILTER_BEST: - flags |= (FAST_PATH_BILINEAR_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER); - - /* Here we have a chance to optimize BILINEAR filter to NEAREST if - * they are equivalent for the currently used transformation matrix. - */ - if (flags & FAST_PATH_ID_TRANSFORM) - { - flags |= FAST_PATH_NEAREST_FILTER; - } - else if (flags & FAST_PATH_AFFINE_TRANSFORM) - { - /* Suppose the transform is - * - * [ t00, t01, t02 ] - * [ t10, t11, t12 ] - * [ 0, 0, 1 ] - * - * and the destination coordinates are (n + 0.5, m + 0.5). Then - * the transformed x coordinate is: - * - * tx = t00 * (n + 0.5) + t01 * (m + 0.5) + t02 - * = t00 * n + t01 * m + t02 + (t00 + t01) * 0.5 - * - * which implies that if t00, t01 and t02 are all integers - * and (t00 + t01) is odd, then tx will be an integer plus 0.5, - * which means a BILINEAR filter will reduce to NEAREST. The same - * applies in the y direction - */ - pixman_fixed_t (*t)[3] = image->common.transform->matrix; - - if ((pixman_fixed_frac ( - t[0][0] | t[0][1] | t[0][2] | - t[1][0] | t[1][1] | t[1][2]) == 0) && - (pixman_fixed_to_int ( - (t[0][0] + t[0][1]) & (t[1][0] + t[1][1])) % 2) == 1) - { - /* FIXME: there are some affine-test failures, showing that - * handling of BILINEAR and NEAREST filter is not quite - * equivalent when getting close to 32K for the translation - * components of the matrix. That's likely some bug, but for - * now just skip BILINEAR->NEAREST optimization in this case. - */ - pixman_fixed_t magic_limit = pixman_int_to_fixed (30000); - if (image->common.transform->matrix[0][2] <= magic_limit && - image->common.transform->matrix[1][2] <= magic_limit && - image->common.transform->matrix[0][2] >= -magic_limit && - image->common.transform->matrix[1][2] >= -magic_limit) - { - flags |= FAST_PATH_NEAREST_FILTER; - } - } - } - break; - - case PIXMAN_FILTER_CONVOLUTION: - break; - - case PIXMAN_FILTER_SEPARABLE_CONVOLUTION: - flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER; - break; - - default: - flags |= FAST_PATH_NO_CONVOLUTION_FILTER; - break; - } - - /* Repeat mode */ - switch (image->common.repeat) - { - case PIXMAN_REPEAT_NONE: - flags |= - FAST_PATH_NO_REFLECT_REPEAT | - FAST_PATH_NO_PAD_REPEAT | - FAST_PATH_NO_NORMAL_REPEAT; - break; - - case PIXMAN_REPEAT_REFLECT: - flags |= - FAST_PATH_NO_PAD_REPEAT | - FAST_PATH_NO_NONE_REPEAT | - FAST_PATH_NO_NORMAL_REPEAT; - break; - - case PIXMAN_REPEAT_PAD: - flags |= - FAST_PATH_NO_REFLECT_REPEAT | - FAST_PATH_NO_NONE_REPEAT | - FAST_PATH_NO_NORMAL_REPEAT; - break; - - default: - flags |= - FAST_PATH_NO_REFLECT_REPEAT | - FAST_PATH_NO_PAD_REPEAT | - FAST_PATH_NO_NONE_REPEAT; - break; - } - - /* Component alpha */ - if (image->common.component_alpha) - flags |= FAST_PATH_COMPONENT_ALPHA; - else - flags |= FAST_PATH_UNIFIED_ALPHA; - - flags |= (FAST_PATH_NO_ACCESSORS | FAST_PATH_NARROW_FORMAT); - - /* Type specific checks */ - switch (image->type) - { - case SOLID: - code = PIXMAN_solid; - - if (image->solid.color.alpha == 0xffff) - flags |= FAST_PATH_IS_OPAQUE; - break; - - case BITS: - if (image->bits.width == 1 && - image->bits.height == 1 && - image->common.repeat != PIXMAN_REPEAT_NONE) - { - code = PIXMAN_solid; - } - else - { - code = image->bits.format; - flags |= FAST_PATH_BITS_IMAGE; - } - - if (!PIXMAN_FORMAT_A (image->bits.format) && - PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_GRAY && - PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_COLOR) - { - flags |= FAST_PATH_SAMPLES_OPAQUE; - - if (image->common.repeat != PIXMAN_REPEAT_NONE) - flags |= FAST_PATH_IS_OPAQUE; - } - - if (image->bits.read_func || image->bits.write_func) - flags &= ~FAST_PATH_NO_ACCESSORS; - - if (PIXMAN_FORMAT_IS_WIDE (image->bits.format)) - flags &= ~FAST_PATH_NARROW_FORMAT; - break; - - case RADIAL: - code = PIXMAN_unknown; - - /* - * As explained in pixman-radial-gradient.c, every point of - * the plane has a valid associated radius (and thus will be - * colored) if and only if a is negative (i.e. one of the two - * circles contains the other one). - */ - - if (image->radial.a >= 0) - break; - - /* Fall through */ - - case CONICAL: - case LINEAR: - code = PIXMAN_unknown; - - if (image->common.repeat != PIXMAN_REPEAT_NONE) - { - int i; - - flags |= FAST_PATH_IS_OPAQUE; - for (i = 0; i < image->gradient.n_stops; ++i) - { - if (image->gradient.stops[i].color.alpha != 0xffff) - { - flags &= ~FAST_PATH_IS_OPAQUE; - break; - } - } - } - break; - - default: - code = PIXMAN_unknown; - break; - } - - /* Alpha maps are only supported for BITS images, so it's always - * safe to ignore their presense for non-BITS images - */ - if (!image->common.alpha_map || image->type != BITS) - { - flags |= FAST_PATH_NO_ALPHA_MAP; - } - else - { - if (PIXMAN_FORMAT_IS_WIDE (image->common.alpha_map->format)) - flags &= ~FAST_PATH_NARROW_FORMAT; - } - - /* Both alpha maps and convolution filters can introduce - * non-opaqueness in otherwise opaque images. Also - * an image with component alpha turned on is only opaque - * if all channels are opaque, so we simply turn it off - * unconditionally for those images. - */ - if (image->common.alpha_map || - image->common.filter == PIXMAN_FILTER_CONVOLUTION || - image->common.filter == PIXMAN_FILTER_SEPARABLE_CONVOLUTION || - image->common.component_alpha) - { - flags &= ~(FAST_PATH_IS_OPAQUE | FAST_PATH_SAMPLES_OPAQUE); - } - - image->common.flags = flags; - image->common.extended_format_code = code; -} - -void -_pixman_image_validate (pixman_image_t *image) -{ - if (image->common.dirty) - { - compute_image_info (image); - - /* It is important that property_changed is - * called *after* compute_image_info() because - * property_changed() can make use of the flags - * to set up accessors etc. - */ - if (image->common.property_changed) - image->common.property_changed (image); - - image->common.dirty = FALSE; - } - - if (image->common.alpha_map) - _pixman_image_validate ((pixman_image_t *)image->common.alpha_map); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_image_set_clip_region32 (pixman_image_t * image, - const pixman_region32_t *region) -{ - image_common_t *common = (image_common_t *)image; - pixman_bool_t result; - - if (region) - { - if ((result = pixman_region32_copy (&common->clip_region, region))) - image->common.have_clip_region = TRUE; - } - else - { - _pixman_image_reset_clip_region (image); - - result = TRUE; - } - - image_property_changed (image); - - return result; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_image_set_clip_region (pixman_image_t * image, - const pixman_region16_t *region) -{ - image_common_t *common = (image_common_t *)image; - pixman_bool_t result; - - if (region) - { - if ((result = pixman_region32_copy_from_region16 (&common->clip_region, region))) - image->common.have_clip_region = TRUE; - } - else - { - _pixman_image_reset_clip_region (image); - - result = TRUE; - } - - image_property_changed (image); - - return result; -} - -PIXMAN_EXPORT void -pixman_image_set_has_client_clip (pixman_image_t *image, - pixman_bool_t client_clip) -{ - image->common.client_clip = client_clip; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_image_set_transform (pixman_image_t * image, - const pixman_transform_t *transform) -{ - static const pixman_transform_t id = - { - { { pixman_fixed_1, 0, 0 }, - { 0, pixman_fixed_1, 0 }, - { 0, 0, pixman_fixed_1 } } - }; - - image_common_t *common = (image_common_t *)image; - pixman_bool_t result; - - if (common->transform == transform) - return TRUE; - - if (!transform || memcmp (&id, transform, sizeof (pixman_transform_t)) == 0) - { - free (common->transform); - common->transform = NULL; - result = TRUE; - - goto out; - } - - if (common->transform && - memcmp (common->transform, transform, sizeof (pixman_transform_t)) == 0) - { - return TRUE; - } - - if (common->transform == NULL) - common->transform = malloc (sizeof (pixman_transform_t)); - - if (common->transform == NULL) - { - result = FALSE; - - goto out; - } - - memcpy (common->transform, transform, sizeof(pixman_transform_t)); - - result = TRUE; - -out: - image_property_changed (image); - - return result; -} - -PIXMAN_EXPORT void -pixman_image_set_repeat (pixman_image_t *image, - pixman_repeat_t repeat) -{ - if (image->common.repeat == repeat) - return; - - image->common.repeat = repeat; - - image_property_changed (image); -} - -PIXMAN_EXPORT void -pixman_image_set_dither (pixman_image_t *image, - pixman_dither_t dither) -{ - if (image->type == BITS) - { - if (image->bits.dither == dither) - return; - - image->bits.dither = dither; - - image_property_changed (image); - } -} - -PIXMAN_EXPORT void -pixman_image_set_dither_offset (pixman_image_t *image, - int offset_x, - int offset_y) -{ - if (image->type == BITS) - { - if (image->bits.dither_offset_x == offset_x && - image->bits.dither_offset_y == offset_y) - { - return; - } - - image->bits.dither_offset_x = offset_x; - image->bits.dither_offset_y = offset_y; - - image_property_changed (image); - } -} - -PIXMAN_EXPORT pixman_bool_t -pixman_image_set_filter (pixman_image_t * image, - pixman_filter_t filter, - const pixman_fixed_t *params, - int n_params) -{ - image_common_t *common = (image_common_t *)image; - pixman_fixed_t *new_params; - - if (params == common->filter_params && filter == common->filter) - return TRUE; - - if (filter == PIXMAN_FILTER_SEPARABLE_CONVOLUTION) - { - int width = pixman_fixed_to_int (params[0]); - int height = pixman_fixed_to_int (params[1]); - int x_phase_bits = pixman_fixed_to_int (params[2]); - int y_phase_bits = pixman_fixed_to_int (params[3]); - int n_x_phases = (1 << x_phase_bits); - int n_y_phases = (1 << y_phase_bits); - - return_val_if_fail ( - n_params == 4 + n_x_phases * width + n_y_phases * height, FALSE); - } - - new_params = NULL; - if (params) - { - new_params = pixman_malloc_ab (n_params, sizeof (pixman_fixed_t)); - if (!new_params) - return FALSE; - - memcpy (new_params, - params, n_params * sizeof (pixman_fixed_t)); - } - - common->filter = filter; - - if (common->filter_params) - free (common->filter_params); - - common->filter_params = new_params; - common->n_filter_params = n_params; - - image_property_changed (image); - return TRUE; -} - -PIXMAN_EXPORT void -pixman_image_set_source_clipping (pixman_image_t *image, - pixman_bool_t clip_sources) -{ - if (image->common.clip_sources == clip_sources) - return; - - image->common.clip_sources = clip_sources; - - image_property_changed (image); -} - -/* Unlike all the other property setters, this function does not - * copy the content of indexed. Doing this copying is simply - * way, way too expensive. - */ -PIXMAN_EXPORT void -pixman_image_set_indexed (pixman_image_t * image, - const pixman_indexed_t *indexed) -{ - bits_image_t *bits = (bits_image_t *)image; - - if (bits->indexed == indexed) - return; - - bits->indexed = indexed; - - image_property_changed (image); -} - -PIXMAN_EXPORT void -pixman_image_set_alpha_map (pixman_image_t *image, - pixman_image_t *alpha_map, - int16_t x, - int16_t y) -{ - image_common_t *common = (image_common_t *)image; - - return_if_fail (!alpha_map || alpha_map->type == BITS); - - if (alpha_map && common->alpha_count > 0) - { - /* If this image is being used as an alpha map itself, - * then you can't give it an alpha map of its own. - */ - return; - } - - if (alpha_map && alpha_map->common.alpha_map) - { - /* If the image has an alpha map of its own, - * then it can't be used as an alpha map itself - */ - return; - } - - if (common->alpha_map != (bits_image_t *)alpha_map) - { - if (common->alpha_map) - { - common->alpha_map->common.alpha_count--; - - pixman_image_unref ((pixman_image_t *)common->alpha_map); - } - - if (alpha_map) - { - common->alpha_map = (bits_image_t *)pixman_image_ref (alpha_map); - - common->alpha_map->common.alpha_count++; - } - else - { - common->alpha_map = NULL; - } - } - - common->alpha_origin_x = x; - common->alpha_origin_y = y; - - image_property_changed (image); -} - -PIXMAN_EXPORT void -pixman_image_set_component_alpha (pixman_image_t *image, - pixman_bool_t component_alpha) -{ - if (image->common.component_alpha == component_alpha) - return; - - image->common.component_alpha = component_alpha; - - image_property_changed (image); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_image_get_component_alpha (pixman_image_t *image) -{ - return image->common.component_alpha; -} - -PIXMAN_EXPORT void -pixman_image_set_accessors (pixman_image_t * image, - pixman_read_memory_func_t read_func, - pixman_write_memory_func_t write_func) -{ - return_if_fail (image != NULL); - - if (image->type == BITS) - { - /* Accessors only work for <= 32 bpp. */ - if (PIXMAN_FORMAT_BPP(image->bits.format) > 32) - return_if_fail (!read_func && !write_func); - - image->bits.read_func = read_func; - image->bits.write_func = write_func; - - image_property_changed (image); - } -} - -PIXMAN_EXPORT uint32_t * -pixman_image_get_data (pixman_image_t *image) -{ - if (image->type == BITS) - return image->bits.bits; - - return NULL; -} - -PIXMAN_EXPORT int -pixman_image_get_width (pixman_image_t *image) -{ - if (image->type == BITS) - return image->bits.width; - - return 0; -} - -PIXMAN_EXPORT int -pixman_image_get_height (pixman_image_t *image) -{ - if (image->type == BITS) - return image->bits.height; - - return 0; -} - -PIXMAN_EXPORT int -pixman_image_get_stride (pixman_image_t *image) -{ - if (image->type == BITS) - return image->bits.rowstride * (int) sizeof (uint32_t); - - return 0; -} - -PIXMAN_EXPORT int -pixman_image_get_depth (pixman_image_t *image) -{ - if (image->type == BITS) - return PIXMAN_FORMAT_DEPTH (image->bits.format); - - return 0; -} - -PIXMAN_EXPORT pixman_format_code_t -pixman_image_get_format (pixman_image_t *image) -{ - if (image->type == BITS) - return image->bits.format; - - return PIXMAN_null; -} - -uint32_t -_pixman_image_get_solid (pixman_implementation_t *imp, - pixman_image_t * image, - pixman_format_code_t format) -{ - uint32_t result; - - if (image->type == SOLID) - { - result = image->solid.color_32; - } - else if (image->type == BITS) - { - if (image->bits.format == PIXMAN_a8r8g8b8) - result = image->bits.bits[0]; - else if (image->bits.format == PIXMAN_x8r8g8b8) - result = image->bits.bits[0] | 0xff000000; - else if (image->bits.format == PIXMAN_a8) - result = (uint32_t)(*(uint8_t *)image->bits.bits) << 24; - else - goto otherwise; - } - else - { - pixman_iter_t iter; - - otherwise: - _pixman_implementation_iter_init ( - imp, &iter, image, 0, 0, 1, 1, - (uint8_t *)&result, - ITER_NARROW | ITER_SRC, image->common.flags); - - result = *iter.get_scanline (&iter, NULL); - - if (iter.fini) - iter.fini (&iter); - } - - /* If necessary, convert RGB <--> BGR. */ - if (PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB - && PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB_SRGB) - { - result = (((result & 0xff000000) >> 0) | - ((result & 0x00ff0000) >> 16) | - ((result & 0x0000ff00) >> 0) | - ((result & 0x000000ff) << 16)); - } - - return result; -} diff --git a/vendor/pixman/pixman/pixman-implementation.c b/vendor/pixman/pixman/pixman-implementation.c deleted file mode 100644 index 69fa70bc3..000000000 --- a/vendor/pixman/pixman/pixman-implementation.c +++ /dev/null @@ -1,417 +0,0 @@ -/* - * Copyright © 2009 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Red Hat not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. Red Hat makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif -#include -#include "pixman-private.h" - -pixman_implementation_t * -_pixman_implementation_create (pixman_implementation_t *fallback, - const pixman_fast_path_t *fast_paths) -{ - pixman_implementation_t *imp; - - assert (fast_paths); - - if ((imp = malloc (sizeof (pixman_implementation_t)))) - { - pixman_implementation_t *d; - - memset (imp, 0, sizeof *imp); - - imp->fallback = fallback; - imp->fast_paths = fast_paths; - - /* Make sure the whole fallback chain has the right toplevel */ - for (d = imp; d != NULL; d = d->fallback) - d->toplevel = imp; - } - - return imp; -} - -#define N_CACHED_FAST_PATHS 8 - -typedef struct -{ - struct - { - pixman_implementation_t * imp; - pixman_fast_path_t fast_path; - } cache [N_CACHED_FAST_PATHS]; -} cache_t; - -PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache) - -static void -dummy_composite_rect (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ -} - -void -_pixman_implementation_lookup_composite (pixman_implementation_t *toplevel, - pixman_op_t op, - pixman_format_code_t src_format, - uint32_t src_flags, - pixman_format_code_t mask_format, - uint32_t mask_flags, - pixman_format_code_t dest_format, - uint32_t dest_flags, - pixman_implementation_t **out_imp, - pixman_composite_func_t *out_func) -{ - pixman_implementation_t *imp; - cache_t *cache; - int i; - - /* Check cache for fast paths */ - cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache); - - for (i = 0; i < N_CACHED_FAST_PATHS; ++i) - { - const pixman_fast_path_t *info = &(cache->cache[i].fast_path); - - /* Note that we check for equality here, not whether - * the cached fast path matches. This is to prevent - * us from selecting an overly general fast path - * when a more specific one would work. - */ - if (info->op == op && - info->src_format == src_format && - info->mask_format == mask_format && - info->dest_format == dest_format && - info->src_flags == src_flags && - info->mask_flags == mask_flags && - info->dest_flags == dest_flags && - info->func) - { - *out_imp = cache->cache[i].imp; - *out_func = cache->cache[i].fast_path.func; - - goto update_cache; - } - } - - for (imp = toplevel; imp != NULL; imp = imp->fallback) - { - const pixman_fast_path_t *info = imp->fast_paths; - - while (info->op != PIXMAN_OP_NONE) - { - if ((info->op == op || info->op == PIXMAN_OP_any) && - /* Formats */ - ((info->src_format == src_format) || - (info->src_format == PIXMAN_any)) && - ((info->mask_format == mask_format) || - (info->mask_format == PIXMAN_any)) && - ((info->dest_format == dest_format) || - (info->dest_format == PIXMAN_any)) && - /* Flags */ - (info->src_flags & src_flags) == info->src_flags && - (info->mask_flags & mask_flags) == info->mask_flags && - (info->dest_flags & dest_flags) == info->dest_flags) - { - *out_imp = imp; - *out_func = info->func; - - /* Set i to the last spot in the cache so that the - * move-to-front code below will work - */ - i = N_CACHED_FAST_PATHS - 1; - - goto update_cache; - } - - ++info; - } - } - - /* We should never reach this point */ - _pixman_log_error ( - FUNC, - "No composite function found\n" - "\n" - "The most likely cause of this is that this system has issues with\n" - "thread local storage\n"); - - *out_imp = NULL; - *out_func = dummy_composite_rect; - return; - -update_cache: - if (i) - { - while (i--) - cache->cache[i + 1] = cache->cache[i]; - - cache->cache[0].imp = *out_imp; - cache->cache[0].fast_path.op = op; - cache->cache[0].fast_path.src_format = src_format; - cache->cache[0].fast_path.src_flags = src_flags; - cache->cache[0].fast_path.mask_format = mask_format; - cache->cache[0].fast_path.mask_flags = mask_flags; - cache->cache[0].fast_path.dest_format = dest_format; - cache->cache[0].fast_path.dest_flags = dest_flags; - cache->cache[0].fast_path.func = *out_func; - } -} - -static void -dummy_combine (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ -} - -pixman_combine_32_func_t -_pixman_implementation_lookup_combiner (pixman_implementation_t *imp, - pixman_op_t op, - pixman_bool_t component_alpha, - pixman_bool_t narrow) -{ - while (imp) - { - pixman_combine_32_func_t f = NULL; - - switch ((narrow << 1) | component_alpha) - { - case 0: /* not narrow, not component alpha */ - f = (pixman_combine_32_func_t)imp->combine_float[op]; - break; - - case 1: /* not narrow, component_alpha */ - f = (pixman_combine_32_func_t)imp->combine_float_ca[op]; - break; - - case 2: /* narrow, not component alpha */ - f = imp->combine_32[op]; - break; - - case 3: /* narrow, component_alpha */ - f = imp->combine_32_ca[op]; - break; - } - - if (f) - return f; - - imp = imp->fallback; - } - - /* We should never reach this point */ - _pixman_log_error (FUNC, "No known combine function\n"); - return dummy_combine; -} - -pixman_bool_t -_pixman_implementation_blt (pixman_implementation_t * imp, - uint32_t * src_bits, - uint32_t * dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dest_x, - int dest_y, - int width, - int height) -{ - while (imp) - { - if (imp->blt && - (*imp->blt) (imp, src_bits, dst_bits, src_stride, dst_stride, - src_bpp, dst_bpp, src_x, src_y, dest_x, dest_y, - width, height)) - { - return TRUE; - } - - imp = imp->fallback; - } - - return FALSE; -} - -pixman_bool_t -_pixman_implementation_fill (pixman_implementation_t *imp, - uint32_t * bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t filler) -{ - while (imp) - { - if (imp->fill && - ((*imp->fill) (imp, bits, stride, bpp, x, y, width, height, filler))) - { - return TRUE; - } - - imp = imp->fallback; - } - - return FALSE; -} - -static uint32_t * -get_scanline_null (pixman_iter_t *iter, const uint32_t *mask) -{ - return NULL; -} - -void -_pixman_implementation_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t iter_flags, - uint32_t image_flags) -{ - pixman_format_code_t format; - - iter->image = image; - iter->buffer = (uint32_t *)buffer; - iter->x = x; - iter->y = y; - iter->width = width; - iter->height = height; - iter->iter_flags = iter_flags; - iter->image_flags = image_flags; - iter->fini = NULL; - - if (!iter->image) - { - iter->get_scanline = get_scanline_null; - return; - } - - format = iter->image->common.extended_format_code; - - while (imp) - { - if (imp->iter_info) - { - const pixman_iter_info_t *info; - - for (info = imp->iter_info; info->format != PIXMAN_null; ++info) - { - if ((info->format == PIXMAN_any || info->format == format) && - (info->image_flags & image_flags) == info->image_flags && - (info->iter_flags & iter_flags) == info->iter_flags) - { - iter->get_scanline = info->get_scanline; - iter->write_back = info->write_back; - - if (info->initializer) - info->initializer (iter, info); - return; - } - } - } - - imp = imp->fallback; - } -} - -pixman_bool_t -_pixman_disabled (const char *name) -{ - const char *env; - - if ((env = getenv ("PIXMAN_DISABLE"))) - { - do - { - const char *end; - int len; - - if ((end = strchr (env, ' '))) - len = end - env; - else - len = strlen (env); - - if (strlen (name) == len && strncmp (name, env, len) == 0) - { - printf ("pixman: Disabled %s implementation\n", name); - return TRUE; - } - - env += len; - } - while (*env++); - } - - return FALSE; -} - -static const pixman_fast_path_t empty_fast_path[] = -{ - { PIXMAN_OP_NONE } -}; - -pixman_implementation_t * -_pixman_choose_implementation (void) -{ - pixman_implementation_t *imp; - - imp = _pixman_implementation_create_general(); - - if (!_pixman_disabled ("fast")) - imp = _pixman_implementation_create_fast_path (imp); - - imp = _pixman_x86_get_implementations (imp); - imp = _pixman_arm_get_implementations (imp); - imp = _pixman_ppc_get_implementations (imp); - imp = _pixman_mips_get_implementations (imp); - - imp = _pixman_implementation_create_noop (imp); - - if (_pixman_disabled ("wholeops")) - { - pixman_implementation_t *cur; - - /* Disable all whole-operation paths except the general one, - * so that optimized iterators are used as much as possible. - */ - for (cur = imp; cur->fallback; cur = cur->fallback) - cur->fast_paths = empty_fast_path; - } - - return imp; -} diff --git a/vendor/pixman/pixman/pixman-inlines.h b/vendor/pixman/pixman/pixman-inlines.h deleted file mode 100644 index f785910f8..000000000 --- a/vendor/pixman/pixman/pixman-inlines.h +++ /dev/null @@ -1,1365 +0,0 @@ -/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Author: Keith Packard, SuSE, Inc. - */ - -#ifndef PIXMAN_FAST_PATH_H__ -#define PIXMAN_FAST_PATH_H__ - -#include "pixman-private.h" - -#define PIXMAN_REPEAT_COVER -1 - -/* Flags describing input parameters to fast path macro template. - * Turning on some flag values may indicate that - * "some property X is available so template can use this" or - * "some property X should be handled by template". - * - * FLAG_HAVE_SOLID_MASK - * Input mask is solid so template should handle this. - * - * FLAG_HAVE_NON_SOLID_MASK - * Input mask is bits mask so template should handle this. - * - * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually - * exclusive. (It's not allowed to turn both flags on) - */ -#define FLAG_NONE (0) -#define FLAG_HAVE_SOLID_MASK (1 << 1) -#define FLAG_HAVE_NON_SOLID_MASK (1 << 2) - -/* To avoid too short repeated scanline function calls, extend source - * scanlines having width less than below constant value. - */ -#define REPEAT_NORMAL_MIN_WIDTH 64 - -static force_inline pixman_bool_t -repeat (pixman_repeat_t repeat, int *c, int size) -{ - if (repeat == PIXMAN_REPEAT_NONE) - { - if (*c < 0 || *c >= size) - return FALSE; - } - else if (repeat == PIXMAN_REPEAT_NORMAL) - { - while (*c >= size) - *c -= size; - while (*c < 0) - *c += size; - } - else if (repeat == PIXMAN_REPEAT_PAD) - { - *c = CLIP (*c, 0, size - 1); - } - else /* REFLECT */ - { - *c = MOD (*c, size * 2); - if (*c >= size) - *c = size * 2 - *c - 1; - } - return TRUE; -} - -static force_inline int -pixman_fixed_to_bilinear_weight (pixman_fixed_t x) -{ - return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) & - ((1 << BILINEAR_INTERPOLATION_BITS) - 1); -} - -#if BILINEAR_INTERPOLATION_BITS <= 4 -/* Inspired by Filter_32_opaque from Skia */ -static force_inline uint32_t -bilinear_interpolation (uint32_t tl, uint32_t tr, - uint32_t bl, uint32_t br, - int distx, int disty) -{ - int distxy, distxiy, distixy, distixiy; - uint32_t lo, hi; - - distx <<= (4 - BILINEAR_INTERPOLATION_BITS); - disty <<= (4 - BILINEAR_INTERPOLATION_BITS); - - distxy = distx * disty; - distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */ - distixy = (disty << 4) - distxy; /* disty * (16 - distx) */ - distixiy = - 16 * 16 - (disty << 4) - - (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */ - - lo = (tl & 0xff00ff) * distixiy; - hi = ((tl >> 8) & 0xff00ff) * distixiy; - - lo += (tr & 0xff00ff) * distxiy; - hi += ((tr >> 8) & 0xff00ff) * distxiy; - - lo += (bl & 0xff00ff) * distixy; - hi += ((bl >> 8) & 0xff00ff) * distixy; - - lo += (br & 0xff00ff) * distxy; - hi += ((br >> 8) & 0xff00ff) * distxy; - - return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff); -} - -#else -#if SIZEOF_LONG > 4 - -static force_inline uint32_t -bilinear_interpolation (uint32_t tl, uint32_t tr, - uint32_t bl, uint32_t br, - int distx, int disty) -{ - uint64_t distxy, distxiy, distixy, distixiy; - uint64_t tl64, tr64, bl64, br64; - uint64_t f, r; - - distx <<= (8 - BILINEAR_INTERPOLATION_BITS); - disty <<= (8 - BILINEAR_INTERPOLATION_BITS); - - distxy = distx * disty; - distxiy = distx * (256 - disty); - distixy = (256 - distx) * disty; - distixiy = (256 - distx) * (256 - disty); - - /* Alpha and Blue */ - tl64 = tl & 0xff0000ff; - tr64 = tr & 0xff0000ff; - bl64 = bl & 0xff0000ff; - br64 = br & 0xff0000ff; - - f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; - r = f & 0x0000ff0000ff0000ull; - - /* Red and Green */ - tl64 = tl; - tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull); - - tr64 = tr; - tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull); - - bl64 = bl; - bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull); - - br64 = br; - br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull); - - f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; - r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull); - - return (uint32_t)(r >> 16); -} - -#else - -static force_inline uint32_t -bilinear_interpolation (uint32_t tl, uint32_t tr, - uint32_t bl, uint32_t br, - int distx, int disty) -{ - int distxy, distxiy, distixy, distixiy; - uint32_t f, r; - - distx <<= (8 - BILINEAR_INTERPOLATION_BITS); - disty <<= (8 - BILINEAR_INTERPOLATION_BITS); - - distxy = distx * disty; - distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */ - distixy = (disty << 8) - distxy; /* disty * (256 - distx) */ - distixiy = - 256 * 256 - (disty << 8) - - (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */ - - /* Blue */ - r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy - + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; - - /* Green */ - f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy - + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; - r |= f & 0xff000000; - - tl >>= 16; - tr >>= 16; - bl >>= 16; - br >>= 16; - r >>= 16; - - /* Red */ - f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy - + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; - r |= f & 0x00ff0000; - - /* Alpha */ - f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy - + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; - r |= f & 0xff000000; - - return r; -} - -#endif -#endif // BILINEAR_INTERPOLATION_BITS <= 4 - -static force_inline argb_t -bilinear_interpolation_float (argb_t tl, argb_t tr, - argb_t bl, argb_t br, - float distx, float disty) -{ - float distxy, distxiy, distixy, distixiy; - argb_t r; - - distxy = distx * disty; - distxiy = distx * (1.f - disty); - distixy = (1.f - distx) * disty; - distixiy = (1.f - distx) * (1.f - disty); - - r.a = tl.a * distixiy + tr.a * distxiy + - bl.a * distixy + br.a * distxy; - r.r = tl.r * distixiy + tr.r * distxiy + - bl.r * distixy + br.r * distxy; - r.g = tl.g * distixiy + tr.g * distxiy + - bl.g * distixy + br.g * distxy; - r.b = tl.b * distixiy + tr.b * distxiy + - bl.b * distixy + br.b * distxy; - - return r; -} - -/* - * For each scanline fetched from source image with PAD repeat: - * - calculate how many pixels need to be padded on the left side - * - calculate how many pixels need to be padded on the right side - * - update width to only count pixels which are fetched from the image - * All this information is returned via 'width', 'left_pad', 'right_pad' - * arguments. The code is assuming that 'unit_x' is positive. - * - * Note: 64-bit math is used in order to avoid potential overflows, which - * is probably excessive in many cases. This particular function - * may need its own correctness test and performance tuning. - */ -static force_inline void -pad_repeat_get_scanline_bounds (int32_t source_image_width, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - int32_t * width, - int32_t * left_pad, - int32_t * right_pad) -{ - int64_t max_vx = (int64_t) source_image_width << 16; - int64_t tmp; - if (vx < 0) - { - tmp = ((int64_t) unit_x - 1 - vx) / unit_x; - if (tmp > *width) - { - *left_pad = *width; - *width = 0; - } - else - { - *left_pad = (int32_t) tmp; - *width -= (int32_t) tmp; - } - } - else - { - *left_pad = 0; - } - tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad; - if (tmp < 0) - { - *right_pad = *width; - *width = 0; - } - else if (tmp >= *width) - { - *right_pad = 0; - } - else - { - *right_pad = *width - (int32_t) tmp; - *width = (int32_t) tmp; - } -} - -/* A macroified version of specialized nearest scalers for some - * common 8888 and 565 formats. It supports SRC and OVER ops. - * - * There are two repeat versions, one that handles repeat normal, - * and one without repeat handling that only works if the src region - * used is completely covered by the pre-repeated source samples. - * - * The loops are unrolled to process two pixels per iteration for better - * performance on most CPU architectures (superscalar processors - * can issue several operations simultaneously, other processors can hide - * instructions latencies by pipelining operations). Unrolling more - * does not make much sense because the compiler will start running out - * of spare registers soon. - */ - -#define GET_8888_ALPHA(s) ((s) >> 24) - /* This is not actually used since we don't have an OVER with - 565 source, but it is needed to build. */ -#define GET_0565_ALPHA(s) 0xff -#define GET_x888_ALPHA(s) 0xff - -#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \ - src_type_t, dst_type_t, OP, repeat_mode) \ -static force_inline void \ -scanline_func_name (dst_type_t *dst, \ - const src_type_t *src, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t src_width_fixed, \ - pixman_bool_t fully_transparent_src) \ -{ \ - uint32_t d; \ - src_type_t s1, s2; \ - uint8_t a1, a2; \ - int x1, x2; \ - \ - if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \ - return; \ - \ - if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \ - abort(); \ - \ - while ((w -= 2) >= 0) \ - { \ - x1 = pixman_fixed_to_int (vx); \ - vx += unit_x; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - /* This works because we know that unit_x is positive */ \ - while (vx >= 0) \ - vx -= src_width_fixed; \ - } \ - s1 = *(src + x1); \ - \ - x2 = pixman_fixed_to_int (vx); \ - vx += unit_x; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - /* This works because we know that unit_x is positive */ \ - while (vx >= 0) \ - vx -= src_width_fixed; \ - } \ - s2 = *(src + x2); \ - \ - if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ - { \ - a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ - a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \ - \ - if (a1 == 0xff) \ - { \ - *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ - } \ - else if (s1) \ - { \ - d = convert_ ## DST_FORMAT ## _to_8888 (*dst); \ - s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \ - a1 ^= 0xff; \ - UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ - *dst = convert_8888_to_ ## DST_FORMAT (d); \ - } \ - dst++; \ - \ - if (a2 == 0xff) \ - { \ - *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \ - } \ - else if (s2) \ - { \ - d = convert_## DST_FORMAT ## _to_8888 (*dst); \ - s2 = convert_## SRC_FORMAT ## _to_8888 (s2); \ - a2 ^= 0xff; \ - UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \ - *dst = convert_8888_to_ ## DST_FORMAT (d); \ - } \ - dst++; \ - } \ - else /* PIXMAN_OP_SRC */ \ - { \ - *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ - *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \ - } \ - } \ - \ - if (w & 1) \ - { \ - x1 = pixman_fixed_to_int (vx); \ - s1 = *(src + x1); \ - \ - if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ - { \ - a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ - \ - if (a1 == 0xff) \ - { \ - *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ - } \ - else if (s1) \ - { \ - d = convert_## DST_FORMAT ## _to_8888 (*dst); \ - s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \ - a1 ^= 0xff; \ - UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ - *dst = convert_8888_to_ ## DST_FORMAT (d); \ - } \ - dst++; \ - } \ - else /* PIXMAN_OP_SRC */ \ - { \ - *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ - } \ - } \ -} - -#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ - dst_type_t, repeat_mode, have_mask, mask_is_solid) \ -static void \ -fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \ - pixman_composite_info_t *info) \ -{ \ - PIXMAN_COMPOSITE_ARGS (info); \ - dst_type_t *dst_line; \ - mask_type_t *mask_line; \ - src_type_t *src_first_line; \ - int y; \ - pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width); \ - pixman_fixed_t max_vy; \ - pixman_vector_t v; \ - pixman_fixed_t vx, vy; \ - pixman_fixed_t unit_x, unit_y; \ - int32_t left_pad, right_pad; \ - \ - src_type_t *src; \ - dst_type_t *dst; \ - mask_type_t solid_mask; \ - const mask_type_t *mask = &solid_mask; \ - int src_stride, mask_stride, dst_stride; \ - \ - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ - if (have_mask) \ - { \ - if (mask_is_solid) \ - solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \ - else \ - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ - mask_stride, mask_line, 1); \ - } \ - /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ - * transformed from destination space to source space */ \ - PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ - \ - /* reference point is the center of the pixel */ \ - v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ - v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ - v.vector[2] = pixman_fixed_1; \ - \ - if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ - return; \ - \ - unit_x = src_image->common.transform->matrix[0][0]; \ - unit_y = src_image->common.transform->matrix[1][1]; \ - \ - /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \ - v.vector[0] -= pixman_fixed_e; \ - v.vector[1] -= pixman_fixed_e; \ - \ - vx = v.vector[0]; \ - vy = v.vector[1]; \ - \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - max_vy = pixman_int_to_fixed (src_image->bits.height); \ - \ - /* Clamp repeating positions inside the actual samples */ \ - repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ - repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ - } \ - \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ - PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ - { \ - pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \ - &width, &left_pad, &right_pad); \ - vx += left_pad * unit_x; \ - } \ - \ - while (--height >= 0) \ - { \ - dst = dst_line; \ - dst_line += dst_stride; \ - if (have_mask && !mask_is_solid) \ - { \ - mask = mask_line; \ - mask_line += mask_stride; \ - } \ - \ - y = pixman_fixed_to_int (vy); \ - vy += unit_y; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ - { \ - repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \ - src = src_first_line + src_stride * y; \ - if (left_pad > 0) \ - { \ - scanline_func (mask, dst, \ - src + src_image->bits.width - src_image->bits.width + 1, \ - left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \ - } \ - if (width > 0) \ - { \ - scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ - dst + left_pad, src + src_image->bits.width, width, \ - vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \ - } \ - if (right_pad > 0) \ - { \ - scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ - dst + left_pad + width, src + src_image->bits.width, \ - right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \ - } \ - } \ - else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ - { \ - static const src_type_t zero[1] = { 0 }; \ - if (y < 0 || y >= src_image->bits.height) \ - { \ - scanline_func (mask, dst, zero + 1, left_pad + width + right_pad, \ - -pixman_fixed_e, 0, src_width_fixed, TRUE); \ - continue; \ - } \ - src = src_first_line + src_stride * y; \ - if (left_pad > 0) \ - { \ - scanline_func (mask, dst, zero + 1, left_pad, \ - -pixman_fixed_e, 0, src_width_fixed, TRUE); \ - } \ - if (width > 0) \ - { \ - scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ - dst + left_pad, src + src_image->bits.width, width, \ - vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \ - } \ - if (right_pad > 0) \ - { \ - scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ - dst + left_pad + width, zero + 1, right_pad, \ - -pixman_fixed_e, 0, src_width_fixed, TRUE); \ - } \ - } \ - else \ - { \ - src = src_first_line + src_stride * y; \ - scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed, \ - unit_x, src_width_fixed, FALSE); \ - } \ - } \ -} - -/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ -#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ - dst_type_t, repeat_mode, have_mask, mask_is_solid) \ - FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \ - dst_type_t, repeat_mode, have_mask, mask_is_solid) - -#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \ - repeat_mode) \ - static force_inline void \ - scanline_func##scale_func_name##_wrapper ( \ - const uint8_t *mask, \ - dst_type_t *dst, \ - const src_type_t *src, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t fully_transparent_src) \ - { \ - scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \ - } \ - FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \ - src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE) - -#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \ - repeat_mode) \ - FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \ - dst_type_t, repeat_mode) - -#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \ - src_type_t, dst_type_t, OP, repeat_mode) \ - FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ - SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \ - OP, repeat_mode) \ - FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \ - scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ - src_type_t, dst_type_t, repeat_mode) - - -#define SCALED_NEAREST_FLAGS \ - (FAST_PATH_SCALE_TRANSFORM | \ - FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NEAREST_FILTER | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_NARROW_FORMAT) - -#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NORMAL_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ - } - -#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ - } - -#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NORMAL_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ - } - -#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ - } - -#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NORMAL_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ - } - -#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ - } - -/* Prefer the use of 'cover' variant, because it is faster */ -#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) - -#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) - -#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \ - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func) - -/*****************************************************************************/ - -/* - * Identify 5 zones in each scanline for bilinear scaling. Depending on - * whether 2 pixels to be interpolated are fetched from the image itself, - * from the padding area around it or from both image and padding area. - */ -static force_inline void -bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - int32_t * left_pad, - int32_t * left_tz, - int32_t * width, - int32_t * right_tz, - int32_t * right_pad) -{ - int width1 = *width, left_pad1, right_pad1; - int width2 = *width, left_pad2, right_pad2; - - pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x, - &width1, &left_pad1, &right_pad1); - pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1, - unit_x, &width2, &left_pad2, &right_pad2); - - *left_pad = left_pad2; - *left_tz = left_pad1 - left_pad2; - *right_tz = right_pad2 - right_pad1; - *right_pad = right_pad1; - *width -= *left_pad + *left_tz + *right_tz + *right_pad; -} - -/* - * Main loop template for single pass bilinear scaling. It needs to be - * provided with 'scanline_func' which should do the compositing operation. - * The needed function has the following prototype: - * - * scanline_func (dst_type_t * dst, - * const mask_type_ * mask, - * const src_type_t * src_top, - * const src_type_t * src_bottom, - * int32_t width, - * int weight_top, - * int weight_bottom, - * pixman_fixed_t vx, - * pixman_fixed_t unit_x, - * pixman_fixed_t max_vx, - * pixman_bool_t zero_src) - * - * Where: - * dst - destination scanline buffer for storing results - * mask - mask buffer (or single value for solid mask) - * src_top, src_bottom - two source scanlines - * width - number of pixels to process - * weight_top - weight of the top row for interpolation - * weight_bottom - weight of the bottom row for interpolation - * vx - initial position for fetching the first pair of - * pixels from the source buffer - * unit_x - position increment needed to move to the next pair - * of pixels - * max_vx - image size as a fixed point value, can be used for - * implementing NORMAL repeat (when it is supported) - * zero_src - boolean hint variable, which is set to TRUE when - * all source pixels are fetched from zero padding - * zone for NONE repeat - * - * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to - * BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that - * for NONE repeat when handling fuzzy antialiased top or bottom image - * edges. Also both top and bottom weight variables are guaranteed to - * have value, which is less than BILINEAR_INTERPOLATION_RANGE. - * For example, the weights can fit into unsigned byte or be used - * with 8-bit SIMD multiplication instructions for 8-bit interpolation - * precision. - */ -#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ - dst_type_t, repeat_mode, flags) \ -static void \ -fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \ - pixman_composite_info_t *info) \ -{ \ - PIXMAN_COMPOSITE_ARGS (info); \ - dst_type_t *dst_line; \ - mask_type_t *mask_line; \ - src_type_t *src_first_line; \ - int y1, y2; \ - pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ - pixman_vector_t v; \ - pixman_fixed_t vx, vy; \ - pixman_fixed_t unit_x, unit_y; \ - int32_t left_pad, left_tz, right_tz, right_pad; \ - \ - dst_type_t *dst; \ - mask_type_t solid_mask; \ - const mask_type_t *mask = &solid_mask; \ - int src_stride, mask_stride, dst_stride; \ - \ - int src_width; \ - pixman_fixed_t src_width_fixed; \ - int max_x; \ - pixman_bool_t need_src_extension; \ - \ - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ - if (flags & FLAG_HAVE_SOLID_MASK) \ - { \ - solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \ - mask_stride = 0; \ - } \ - else if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - { \ - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ - mask_stride, mask_line, 1); \ - } \ - \ - /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ - * transformed from destination space to source space */ \ - PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ - \ - /* reference point is the center of the pixel */ \ - v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ - v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ - v.vector[2] = pixman_fixed_1; \ - \ - if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ - return; \ - \ - unit_x = src_image->common.transform->matrix[0][0]; \ - unit_y = src_image->common.transform->matrix[1][1]; \ - \ - v.vector[0] -= pixman_fixed_1 / 2; \ - v.vector[1] -= pixman_fixed_1 / 2; \ - \ - vy = v.vector[1]; \ - \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ - PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ - { \ - bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \ - &left_pad, &left_tz, &width, &right_tz, &right_pad); \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ - { \ - /* PAD repeat does not need special handling for 'transition zones' and */ \ - /* they can be combined with 'padding zones' safely */ \ - left_pad += left_tz; \ - right_pad += right_tz; \ - left_tz = right_tz = 0; \ - } \ - v.vector[0] += left_pad * unit_x; \ - } \ - \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - vx = v.vector[0]; \ - repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width)); \ - max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1; \ - \ - if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH) \ - { \ - src_width = 0; \ - \ - while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) \ - src_width += src_image->bits.width; \ - \ - need_src_extension = TRUE; \ - } \ - else \ - { \ - src_width = src_image->bits.width; \ - need_src_extension = FALSE; \ - } \ - \ - src_width_fixed = pixman_int_to_fixed (src_width); \ - } \ - \ - while (--height >= 0) \ - { \ - int weight1, weight2; \ - dst = dst_line; \ - dst_line += dst_stride; \ - vx = v.vector[0]; \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - { \ - mask = mask_line; \ - mask_line += mask_stride; \ - } \ - \ - y1 = pixman_fixed_to_int (vy); \ - weight2 = pixman_fixed_to_bilinear_weight (vy); \ - if (weight2) \ - { \ - /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */ \ - y2 = y1 + 1; \ - weight1 = BILINEAR_INTERPOLATION_RANGE - weight2; \ - } \ - else \ - { \ - /* set both top and bottom row to the same scanline and tweak weights */ \ - y2 = y1; \ - weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2; \ - } \ - vy += unit_y; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ - { \ - src_type_t *src1, *src2; \ - src_type_t buf1[2]; \ - src_type_t buf2[2]; \ - repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \ - repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \ - src1 = src_first_line + src_stride * y1; \ - src2 = src_first_line + src_stride * y2; \ - \ - if (left_pad > 0) \ - { \ - buf1[0] = buf1[1] = src1[0]; \ - buf2[0] = buf2[1] = src2[0]; \ - scanline_func (dst, mask, \ - buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \ - dst += left_pad; \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - mask += left_pad; \ - } \ - if (width > 0) \ - { \ - scanline_func (dst, mask, \ - src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ - dst += width; \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - mask += width; \ - } \ - if (right_pad > 0) \ - { \ - buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \ - buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \ - scanline_func (dst, mask, \ - buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \ - } \ - } \ - else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ - { \ - src_type_t *src1, *src2; \ - src_type_t buf1[2]; \ - src_type_t buf2[2]; \ - /* handle top/bottom zero padding by just setting weights to 0 if needed */ \ - if (y1 < 0) \ - { \ - weight1 = 0; \ - y1 = 0; \ - } \ - if (y1 >= src_image->bits.height) \ - { \ - weight1 = 0; \ - y1 = src_image->bits.height - 1; \ - } \ - if (y2 < 0) \ - { \ - weight2 = 0; \ - y2 = 0; \ - } \ - if (y2 >= src_image->bits.height) \ - { \ - weight2 = 0; \ - y2 = src_image->bits.height - 1; \ - } \ - src1 = src_first_line + src_stride * y1; \ - src2 = src_first_line + src_stride * y2; \ - \ - if (left_pad > 0) \ - { \ - buf1[0] = buf1[1] = 0; \ - buf2[0] = buf2[1] = 0; \ - scanline_func (dst, mask, \ - buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \ - dst += left_pad; \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - mask += left_pad; \ - } \ - if (left_tz > 0) \ - { \ - buf1[0] = 0; \ - buf1[1] = src1[0]; \ - buf2[0] = 0; \ - buf2[1] = src2[0]; \ - scanline_func (dst, mask, \ - buf1, buf2, left_tz, weight1, weight2, \ - pixman_fixed_frac (vx), unit_x, 0, FALSE); \ - dst += left_tz; \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - mask += left_tz; \ - vx += left_tz * unit_x; \ - } \ - if (width > 0) \ - { \ - scanline_func (dst, mask, \ - src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ - dst += width; \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - mask += width; \ - vx += width * unit_x; \ - } \ - if (right_tz > 0) \ - { \ - buf1[0] = src1[src_image->bits.width - 1]; \ - buf1[1] = 0; \ - buf2[0] = src2[src_image->bits.width - 1]; \ - buf2[1] = 0; \ - scanline_func (dst, mask, \ - buf1, buf2, right_tz, weight1, weight2, \ - pixman_fixed_frac (vx), unit_x, 0, FALSE); \ - dst += right_tz; \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - mask += right_tz; \ - } \ - if (right_pad > 0) \ - { \ - buf1[0] = buf1[1] = 0; \ - buf2[0] = buf2[1] = 0; \ - scanline_func (dst, mask, \ - buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \ - } \ - } \ - else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - int32_t num_pixels; \ - int32_t width_remain; \ - src_type_t * src_line_top; \ - src_type_t * src_line_bottom; \ - src_type_t buf1[2]; \ - src_type_t buf2[2]; \ - src_type_t extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2]; \ - src_type_t extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2]; \ - int i, j; \ - \ - repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); \ - repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); \ - src_line_top = src_first_line + src_stride * y1; \ - src_line_bottom = src_first_line + src_stride * y2; \ - \ - if (need_src_extension) \ - { \ - for (i=0; ibits.width; j++, i++) \ - { \ - extended_src_line0[i] = src_line_top[j]; \ - extended_src_line1[i] = src_line_bottom[j]; \ - } \ - } \ - \ - src_line_top = &extended_src_line0[0]; \ - src_line_bottom = &extended_src_line1[0]; \ - } \ - \ - /* Top & Bottom wrap around buffer */ \ - buf1[0] = src_line_top[src_width - 1]; \ - buf1[1] = src_line_top[0]; \ - buf2[0] = src_line_bottom[src_width - 1]; \ - buf2[1] = src_line_bottom[0]; \ - \ - width_remain = width; \ - \ - while (width_remain > 0) \ - { \ - /* We use src_width_fixed because it can make vx in original source range */ \ - repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ - \ - /* Wrap around part */ \ - if (pixman_fixed_to_int (vx) == src_width - 1) \ - { \ - /* for positive unit_x \ - * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed \ - * \ - * vx is in range [0, src_width_fixed - pixman_fixed_e] \ - * So we are safe from overflow. \ - */ \ - num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1; \ - \ - if (num_pixels > width_remain) \ - num_pixels = width_remain; \ - \ - scanline_func (dst, mask, buf1, buf2, num_pixels, \ - weight1, weight2, pixman_fixed_frac(vx), \ - unit_x, src_width_fixed, FALSE); \ - \ - width_remain -= num_pixels; \ - vx += num_pixels * unit_x; \ - dst += num_pixels; \ - \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - mask += num_pixels; \ - \ - repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ - } \ - \ - /* Normal scanline composite */ \ - if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0) \ - { \ - /* for positive unit_x \ - * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1) \ - * \ - * vx is in range [0, src_width_fixed - pixman_fixed_e] \ - * So we are safe from overflow here. \ - */ \ - num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e) \ - / unit_x) + 1; \ - \ - if (num_pixels > width_remain) \ - num_pixels = width_remain; \ - \ - scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels, \ - weight1, weight2, vx, unit_x, src_width_fixed, FALSE); \ - \ - width_remain -= num_pixels; \ - vx += num_pixels * unit_x; \ - dst += num_pixels; \ - \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - mask += num_pixels; \ - } \ - } \ - } \ - else \ - { \ - scanline_func (dst, mask, src_first_line + src_stride * y1, \ - src_first_line + src_stride * y2, width, \ - weight1, weight2, vx, unit_x, max_vx, FALSE); \ - } \ - } \ -} - -/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ -#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ - dst_type_t, repeat_mode, flags) \ - FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\ - dst_type_t, repeat_mode, flags) - -#define SCALED_BILINEAR_FLAGS \ - (FAST_PATH_SCALE_TRANSFORM | \ - FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_BILINEAR_FILTER | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_NARROW_FORMAT) - -#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_NORMAL_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_NORMAL_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_NORMAL_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ - } - -/* Prefer the use of 'cover' variant, because it is faster */ -#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \ - SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func), \ - SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func) - -#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \ - SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func), \ - SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func) - -#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \ - SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \ - SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func) - -#endif diff --git a/vendor/pixman/pixman/pixman-linear-gradient.c b/vendor/pixman/pixman/pixman-linear-gradient.c deleted file mode 100644 index 014b69ceb..000000000 --- a/vendor/pixman/pixman/pixman-linear-gradient.c +++ /dev/null @@ -1,292 +0,0 @@ -/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. - * 2005 Lars Knoll & Zack Rusin, Trolltech - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif -#include -#include "pixman-private.h" - -static pixman_bool_t -linear_gradient_is_horizontal (pixman_image_t *image, - int x, - int y, - int width, - int height) -{ - linear_gradient_t *linear = (linear_gradient_t *)image; - pixman_vector_t v; - pixman_fixed_32_32_t l; - pixman_fixed_48_16_t dx, dy; - double inc; - - if (image->common.transform) - { - /* projective transformation */ - if (image->common.transform->matrix[2][0] != 0 || - image->common.transform->matrix[2][1] != 0 || - image->common.transform->matrix[2][2] == 0) - { - return FALSE; - } - - v.vector[0] = image->common.transform->matrix[0][1]; - v.vector[1] = image->common.transform->matrix[1][1]; - v.vector[2] = image->common.transform->matrix[2][2]; - } - else - { - v.vector[0] = 0; - v.vector[1] = pixman_fixed_1; - v.vector[2] = pixman_fixed_1; - } - - dx = linear->p2.x - linear->p1.x; - dy = linear->p2.y - linear->p1.y; - - l = dx * dx + dy * dy; - - if (l == 0) - return FALSE; - - /* - * compute how much the input of the gradient walked changes - * when moving vertically through the whole image - */ - inc = height * (double) pixman_fixed_1 * pixman_fixed_1 * - (dx * v.vector[0] + dy * v.vector[1]) / - (v.vector[2] * (double) l); - - /* check that casting to integer would result in 0 */ - if (-1 < inc && inc < 1) - return TRUE; - - return FALSE; -} - -static uint32_t * -linear_get_scanline (pixman_iter_t *iter, - const uint32_t *mask, - int Bpp, - pixman_gradient_walker_write_t write_pixel, - pixman_gradient_walker_fill_t fill_pixel) -{ - pixman_image_t *image = iter->image; - int x = iter->x; - int y = iter->y; - int width = iter->width; - uint32_t * buffer = iter->buffer; - - pixman_vector_t v, unit; - pixman_fixed_32_32_t l; - pixman_fixed_48_16_t dx, dy; - gradient_t *gradient = (gradient_t *)image; - linear_gradient_t *linear = (linear_gradient_t *)image; - uint32_t *end = buffer + width * (Bpp / 4); - pixman_gradient_walker_t walker; - - _pixman_gradient_walker_init (&walker, gradient, image->common.repeat); - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (image->common.transform) - { - if (!pixman_transform_point_3d (image->common.transform, &v)) - return iter->buffer; - - unit.vector[0] = image->common.transform->matrix[0][0]; - unit.vector[1] = image->common.transform->matrix[1][0]; - unit.vector[2] = image->common.transform->matrix[2][0]; - } - else - { - unit.vector[0] = pixman_fixed_1; - unit.vector[1] = 0; - unit.vector[2] = 0; - } - - dx = linear->p2.x - linear->p1.x; - dy = linear->p2.y - linear->p1.y; - - l = dx * dx + dy * dy; - - if (l == 0 || unit.vector[2] == 0) - { - /* affine transformation only */ - pixman_fixed_32_32_t t, next_inc; - double inc; - - if (l == 0 || v.vector[2] == 0) - { - t = 0; - inc = 0; - } - else - { - double invden, v2; - - invden = pixman_fixed_1 * (double) pixman_fixed_1 / - (l * (double) v.vector[2]); - v2 = v.vector[2] * (1. / pixman_fixed_1); - t = ((dx * v.vector[0] + dy * v.vector[1]) - - (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden; - inc = (dx * unit.vector[0] + dy * unit.vector[1]) * invden; - } - next_inc = 0; - - if (((pixman_fixed_32_32_t )(inc * width)) == 0) - { - fill_pixel (&walker, t, buffer, end); - } - else - { - int i; - - i = 0; - while (buffer < end) - { - if (!mask || *mask++) - { - write_pixel (&walker, t + next_inc, buffer); - } - i++; - next_inc = inc * i; - buffer += (Bpp / 4); - } - } - } - else - { - /* projective transformation */ - double t; - - t = 0; - - while (buffer < end) - { - if (!mask || *mask++) - { - if (v.vector[2] != 0) - { - double invden, v2; - - invden = pixman_fixed_1 * (double) pixman_fixed_1 / - (l * (double) v.vector[2]); - v2 = v.vector[2] * (1. / pixman_fixed_1); - t = ((dx * v.vector[0] + dy * v.vector[1]) - - (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden; - } - - write_pixel (&walker, t, buffer); - } - - buffer += (Bpp / 4); - - v.vector[0] += unit.vector[0]; - v.vector[1] += unit.vector[1]; - v.vector[2] += unit.vector[2]; - } - } - - iter->y++; - - return iter->buffer; -} - -static uint32_t * -linear_get_scanline_narrow (pixman_iter_t *iter, - const uint32_t *mask) -{ - return linear_get_scanline (iter, mask, 4, - _pixman_gradient_walker_write_narrow, - _pixman_gradient_walker_fill_narrow); -} - - -static uint32_t * -linear_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) -{ - return linear_get_scanline (iter, NULL, 16, - _pixman_gradient_walker_write_wide, - _pixman_gradient_walker_fill_wide); -} - -void -_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter) -{ - if (linear_gradient_is_horizontal ( - iter->image, iter->x, iter->y, iter->width, iter->height)) - { - if (iter->iter_flags & ITER_NARROW) - linear_get_scanline_narrow (iter, NULL); - else - linear_get_scanline_wide (iter, NULL); - - iter->get_scanline = _pixman_iter_get_scanline_noop; - } - else - { - if (iter->iter_flags & ITER_NARROW) - iter->get_scanline = linear_get_scanline_narrow; - else - iter->get_scanline = linear_get_scanline_wide; - } -} - -PIXMAN_EXPORT pixman_image_t * -pixman_image_create_linear_gradient (const pixman_point_fixed_t * p1, - const pixman_point_fixed_t * p2, - const pixman_gradient_stop_t *stops, - int n_stops) -{ - pixman_image_t *image; - linear_gradient_t *linear; - - image = _pixman_image_allocate (); - - if (!image) - return NULL; - - linear = &image->linear; - - if (!_pixman_init_gradient (&linear->common, stops, n_stops)) - { - free (image); - return NULL; - } - - linear->p1 = *p1; - linear->p2 = *p2; - - image->type = LINEAR; - - return image; -} - diff --git a/vendor/pixman/pixman/pixman-matrix.c b/vendor/pixman/pixman/pixman-matrix.c deleted file mode 100644 index da5209cbe..000000000 --- a/vendor/pixman/pixman/pixman-matrix.c +++ /dev/null @@ -1,1073 +0,0 @@ -/* - * Copyright © 2008 Keith Packard - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that copyright - * notice and this permission notice appear in supporting documentation, and - * that the name of the copyright holders not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. The copyright holders make no representations - * about the suitability of this software for any purpose. It is provided "as - * is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE - * OF THIS SOFTWARE. - */ - -/* - * Matrix interfaces - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include -#include "pixman-private.h" - -#define F(x) pixman_int_to_fixed (x) - -static force_inline int -count_leading_zeros (uint32_t x) -{ -#ifdef HAVE_BUILTIN_CLZ - return __builtin_clz (x); -#else - int n = 0; - while (x) - { - n++; - x >>= 1; - } - return 32 - n; -#endif -} - -/* - * Large signed/unsigned integer division with rounding for the platforms with - * only 64-bit integer data type supported (no 128-bit data type). - * - * Arguments: - * hi, lo - high and low 64-bit parts of the dividend - * div - 48-bit divisor - * - * Returns: lowest 64 bits of the result as a return value and highest 64 - * bits of the result to "result_hi" pointer - */ - -/* grade-school unsigned division (128-bit by 48-bit) with rounding to nearest */ -static force_inline uint64_t -rounded_udiv_128_by_48 (uint64_t hi, - uint64_t lo, - uint64_t div, - uint64_t *result_hi) -{ - uint64_t tmp, remainder, result_lo; - assert(div < ((uint64_t)1 << 48)); - - remainder = hi % div; - *result_hi = hi / div; - - tmp = (remainder << 16) + (lo >> 48); - result_lo = tmp / div; - remainder = tmp % div; - - tmp = (remainder << 16) + ((lo >> 32) & 0xFFFF); - result_lo = (result_lo << 16) + (tmp / div); - remainder = tmp % div; - - tmp = (remainder << 16) + ((lo >> 16) & 0xFFFF); - result_lo = (result_lo << 16) + (tmp / div); - remainder = tmp % div; - - tmp = (remainder << 16) + (lo & 0xFFFF); - result_lo = (result_lo << 16) + (tmp / div); - remainder = tmp % div; - - /* round to nearest */ - if (remainder * 2 >= div && ++result_lo == 0) - *result_hi += 1; - - return result_lo; -} - -/* signed division (128-bit by 49-bit) with rounding to nearest */ -static inline int64_t -rounded_sdiv_128_by_49 (int64_t hi, - uint64_t lo, - int64_t div, - int64_t *signed_result_hi) -{ - uint64_t result_lo, result_hi; - int sign = 0; - if (div < 0) - { - div = -div; - sign ^= 1; - } - if (hi < 0) - { - if (lo != 0) - hi++; - hi = -hi; - lo = -lo; - sign ^= 1; - } - result_lo = rounded_udiv_128_by_48 (hi, lo, div, &result_hi); - if (sign) - { - if (result_lo != 0) - result_hi++; - result_hi = -result_hi; - result_lo = -result_lo; - } - if (signed_result_hi) - { - *signed_result_hi = result_hi; - } - return result_lo; -} - -/* - * Multiply 64.16 fixed point value by (2^scalebits) and convert - * to 128-bit integer. - */ -static force_inline void -fixed_64_16_to_int128 (int64_t hi, - int64_t lo, - int64_t *rhi, - int64_t *rlo, - int scalebits) -{ - /* separate integer and fractional parts */ - hi += lo >> 16; - lo &= 0xFFFF; - - if (scalebits <= 0) - { - *rlo = hi >> (-scalebits); - *rhi = *rlo >> 63; - } - else - { - *rhi = hi >> (64 - scalebits); - *rlo = (uint64_t)hi << scalebits; - if (scalebits < 16) - *rlo += lo >> (16 - scalebits); - else - *rlo += lo << (scalebits - 16); - } -} - -/* - * Convert 112.16 fixed point value to 48.16 with clamping for the out - * of range values. - */ -static force_inline pixman_fixed_48_16_t -fixed_112_16_to_fixed_48_16 (int64_t hi, int64_t lo, pixman_bool_t *clampflag) -{ - if ((lo >> 63) != hi) - { - *clampflag = TRUE; - return hi >= 0 ? INT64_MAX : INT64_MIN; - } - else - { - return lo; - } -} - -/* - * Transform a point with 31.16 fixed point coordinates from the destination - * space to a point with 48.16 fixed point coordinates in the source space. - * No overflows are possible for affine transformations and the results are - * accurate including the least significant bit. Projective transformations - * may overflow, in this case the results are just clamped to return maximum - * or minimum 48.16 values (so that the caller can at least handle the NONE - * and PAD repeats correctly) and the return value is FALSE to indicate that - * such clamping has happened. - */ -PIXMAN_EXPORT pixman_bool_t -pixman_transform_point_31_16 (const pixman_transform_t *t, - const pixman_vector_48_16_t *v, - pixman_vector_48_16_t *result) -{ - pixman_bool_t clampflag = FALSE; - int i; - int64_t tmp[3][2], divint; - uint16_t divfrac; - - /* input vector values must have no more than 31 bits (including sign) - * in the integer part */ - assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); - assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); - assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); - assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); - assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16))); - assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); - - for (i = 0; i < 3; i++) - { - tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16); - tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF); - tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16); - tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF); - tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16); - tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF); - } - - /* - * separate 64-bit integer and 16-bit fractional parts for the divisor, - * which is also scaled by 65536 after fixed point multiplication. - */ - divint = tmp[2][0] + (tmp[2][1] >> 16); - divfrac = tmp[2][1] & 0xFFFF; - - if (divint == pixman_fixed_1 && divfrac == 0) - { - /* - * this is a simple affine transformation - */ - result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); - result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); - result->v[2] = pixman_fixed_1; - } - else if (divint == 0 && divfrac == 0) - { - /* - * handle zero divisor (if the values are non-zero, set the - * results to maximum positive or minimum negative) - */ - clampflag = TRUE; - - result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); - result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); - - if (result->v[0] > 0) - result->v[0] = INT64_MAX; - else if (result->v[0] < 0) - result->v[0] = INT64_MIN; - - if (result->v[1] > 0) - result->v[1] = INT64_MAX; - else if (result->v[1] < 0) - result->v[1] = INT64_MIN; - } - else - { - /* - * projective transformation, analyze the top 32 bits of the divisor - */ - int32_t hi32divbits = divint >> 32; - if (hi32divbits < 0) - hi32divbits = ~hi32divbits; - - if (hi32divbits == 0) - { - /* the divisor is small, we can actually keep all the bits */ - int64_t hi, rhi, lo, rlo; - int64_t div = ((uint64_t)divint << 16) + divfrac; - - fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32); - rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); - result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); - - fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32); - rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); - result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); - } - else - { - /* the divisor needs to be reduced to 48 bits */ - int64_t hi, rhi, lo, rlo, div; - int shift = 32 - count_leading_zeros (hi32divbits); - fixed_64_16_to_int128 (divint, divfrac, &hi, &div, 16 - shift); - - fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32 - shift); - rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); - result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); - - fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32 - shift); - rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); - result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); - } - } - result->v[2] = pixman_fixed_1; - return !clampflag; -} - -PIXMAN_EXPORT void -pixman_transform_point_31_16_affine (const pixman_transform_t *t, - const pixman_vector_48_16_t *v, - pixman_vector_48_16_t *result) -{ - int64_t hi0, lo0, hi1, lo1; - - /* input vector values must have no more than 31 bits (including sign) - * in the integer part */ - assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); - assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); - assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); - assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); - - hi0 = (int64_t)t->matrix[0][0] * (v->v[0] >> 16); - lo0 = (int64_t)t->matrix[0][0] * (v->v[0] & 0xFFFF); - hi0 += (int64_t)t->matrix[0][1] * (v->v[1] >> 16); - lo0 += (int64_t)t->matrix[0][1] * (v->v[1] & 0xFFFF); - hi0 += (int64_t)t->matrix[0][2]; - - hi1 = (int64_t)t->matrix[1][0] * (v->v[0] >> 16); - lo1 = (int64_t)t->matrix[1][0] * (v->v[0] & 0xFFFF); - hi1 += (int64_t)t->matrix[1][1] * (v->v[1] >> 16); - lo1 += (int64_t)t->matrix[1][1] * (v->v[1] & 0xFFFF); - hi1 += (int64_t)t->matrix[1][2]; - - result->v[0] = hi0 + ((lo0 + 0x8000) >> 16); - result->v[1] = hi1 + ((lo1 + 0x8000) >> 16); - result->v[2] = pixman_fixed_1; -} - -PIXMAN_EXPORT void -pixman_transform_point_31_16_3d (const pixman_transform_t *t, - const pixman_vector_48_16_t *v, - pixman_vector_48_16_t *result) -{ - int i; - int64_t tmp[3][2]; - - /* input vector values must have no more than 31 bits (including sign) - * in the integer part */ - assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); - assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); - assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); - assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); - assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16))); - assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); - - for (i = 0; i < 3; i++) - { - tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16); - tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF); - tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16); - tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF); - tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16); - tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF); - } - - result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); - result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); - result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16); -} - -PIXMAN_EXPORT void -pixman_transform_init_identity (struct pixman_transform *matrix) -{ - int i; - - memset (matrix, '\0', sizeof (struct pixman_transform)); - for (i = 0; i < 3; i++) - matrix->matrix[i][i] = F (1); -} - -typedef pixman_fixed_32_32_t pixman_fixed_34_30_t; - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_point_3d (const struct pixman_transform *transform, - struct pixman_vector * vector) -{ - pixman_vector_48_16_t tmp; - tmp.v[0] = vector->vector[0]; - tmp.v[1] = vector->vector[1]; - tmp.v[2] = vector->vector[2]; - - pixman_transform_point_31_16_3d (transform, &tmp, &tmp); - - vector->vector[0] = tmp.v[0]; - vector->vector[1] = tmp.v[1]; - vector->vector[2] = tmp.v[2]; - - return vector->vector[0] == tmp.v[0] && - vector->vector[1] == tmp.v[1] && - vector->vector[2] == tmp.v[2]; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_point (const struct pixman_transform *transform, - struct pixman_vector * vector) -{ - pixman_vector_48_16_t tmp; - tmp.v[0] = vector->vector[0]; - tmp.v[1] = vector->vector[1]; - tmp.v[2] = vector->vector[2]; - - if (!pixman_transform_point_31_16 (transform, &tmp, &tmp)) - return FALSE; - - vector->vector[0] = tmp.v[0]; - vector->vector[1] = tmp.v[1]; - vector->vector[2] = tmp.v[2]; - - return vector->vector[0] == tmp.v[0] && - vector->vector[1] == tmp.v[1] && - vector->vector[2] == tmp.v[2]; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_multiply (struct pixman_transform * dst, - const struct pixman_transform *l, - const struct pixman_transform *r) -{ - struct pixman_transform d; - int dx, dy; - int o; - - for (dy = 0; dy < 3; dy++) - { - for (dx = 0; dx < 3; dx++) - { - pixman_fixed_48_16_t v; - pixman_fixed_32_32_t partial; - - v = 0; - for (o = 0; o < 3; o++) - { - partial = - (pixman_fixed_32_32_t) l->matrix[dy][o] * - (pixman_fixed_32_32_t) r->matrix[o][dx]; - - v += (partial + 0x8000) >> 16; - } - - if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) - return FALSE; - - d.matrix[dy][dx] = (pixman_fixed_t) v; - } - } - - *dst = d; - return TRUE; -} - -PIXMAN_EXPORT void -pixman_transform_init_scale (struct pixman_transform *t, - pixman_fixed_t sx, - pixman_fixed_t sy) -{ - memset (t, '\0', sizeof (struct pixman_transform)); - - t->matrix[0][0] = sx; - t->matrix[1][1] = sy; - t->matrix[2][2] = F (1); -} - -static pixman_fixed_t -fixed_inverse (pixman_fixed_t x) -{ - return (pixman_fixed_t) ((((pixman_fixed_48_16_t) F (1)) * F (1)) / x); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_scale (struct pixman_transform *forward, - struct pixman_transform *reverse, - pixman_fixed_t sx, - pixman_fixed_t sy) -{ - struct pixman_transform t; - - if (sx == 0 || sy == 0) - return FALSE; - - if (forward) - { - pixman_transform_init_scale (&t, sx, sy); - if (!pixman_transform_multiply (forward, &t, forward)) - return FALSE; - } - - if (reverse) - { - pixman_transform_init_scale (&t, fixed_inverse (sx), - fixed_inverse (sy)); - if (!pixman_transform_multiply (reverse, reverse, &t)) - return FALSE; - } - - return TRUE; -} - -PIXMAN_EXPORT void -pixman_transform_init_rotate (struct pixman_transform *t, - pixman_fixed_t c, - pixman_fixed_t s) -{ - memset (t, '\0', sizeof (struct pixman_transform)); - - t->matrix[0][0] = c; - t->matrix[0][1] = -s; - t->matrix[1][0] = s; - t->matrix[1][1] = c; - t->matrix[2][2] = F (1); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_rotate (struct pixman_transform *forward, - struct pixman_transform *reverse, - pixman_fixed_t c, - pixman_fixed_t s) -{ - struct pixman_transform t; - - if (forward) - { - pixman_transform_init_rotate (&t, c, s); - if (!pixman_transform_multiply (forward, &t, forward)) - return FALSE; - } - - if (reverse) - { - pixman_transform_init_rotate (&t, c, -s); - if (!pixman_transform_multiply (reverse, reverse, &t)) - return FALSE; - } - - return TRUE; -} - -PIXMAN_EXPORT void -pixman_transform_init_translate (struct pixman_transform *t, - pixman_fixed_t tx, - pixman_fixed_t ty) -{ - memset (t, '\0', sizeof (struct pixman_transform)); - - t->matrix[0][0] = F (1); - t->matrix[0][2] = tx; - t->matrix[1][1] = F (1); - t->matrix[1][2] = ty; - t->matrix[2][2] = F (1); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_translate (struct pixman_transform *forward, - struct pixman_transform *reverse, - pixman_fixed_t tx, - pixman_fixed_t ty) -{ - struct pixman_transform t; - - if (forward) - { - pixman_transform_init_translate (&t, tx, ty); - - if (!pixman_transform_multiply (forward, &t, forward)) - return FALSE; - } - - if (reverse) - { - pixman_transform_init_translate (&t, -tx, -ty); - - if (!pixman_transform_multiply (reverse, reverse, &t)) - return FALSE; - } - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_bounds (const struct pixman_transform *matrix, - struct pixman_box16 * b) - -{ - struct pixman_vector v[4]; - int i; - int x1, y1, x2, y2; - - v[0].vector[0] = F (b->x1); - v[0].vector[1] = F (b->y1); - v[0].vector[2] = F (1); - - v[1].vector[0] = F (b->x2); - v[1].vector[1] = F (b->y1); - v[1].vector[2] = F (1); - - v[2].vector[0] = F (b->x2); - v[2].vector[1] = F (b->y2); - v[2].vector[2] = F (1); - - v[3].vector[0] = F (b->x1); - v[3].vector[1] = F (b->y2); - v[3].vector[2] = F (1); - - for (i = 0; i < 4; i++) - { - if (!pixman_transform_point (matrix, &v[i])) - return FALSE; - - x1 = pixman_fixed_to_int (v[i].vector[0]); - y1 = pixman_fixed_to_int (v[i].vector[1]); - x2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[0])); - y2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[1])); - - if (i == 0) - { - b->x1 = x1; - b->y1 = y1; - b->x2 = x2; - b->y2 = y2; - } - else - { - if (x1 < b->x1) b->x1 = x1; - if (y1 < b->y1) b->y1 = y1; - if (x2 > b->x2) b->x2 = x2; - if (y2 > b->y2) b->y2 = y2; - } - } - - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_invert (struct pixman_transform * dst, - const struct pixman_transform *src) -{ - struct pixman_f_transform m; - - pixman_f_transform_from_pixman_transform (&m, src); - - if (!pixman_f_transform_invert (&m, &m)) - return FALSE; - - if (!pixman_transform_from_pixman_f_transform (dst, &m)) - return FALSE; - - return TRUE; -} - -static pixman_bool_t -within_epsilon (pixman_fixed_t a, - pixman_fixed_t b, - pixman_fixed_t epsilon) -{ - pixman_fixed_t t = a - b; - - if (t < 0) - t = -t; - - return t <= epsilon; -} - -#define EPSILON (pixman_fixed_t) (2) - -#define IS_SAME(a, b) (within_epsilon (a, b, EPSILON)) -#define IS_ZERO(a) (within_epsilon (a, 0, EPSILON)) -#define IS_ONE(a) (within_epsilon (a, F (1), EPSILON)) -#define IS_UNIT(a) \ - (within_epsilon (a, F (1), EPSILON) || \ - within_epsilon (a, F (-1), EPSILON) || \ - IS_ZERO (a)) -#define IS_INT(a) (IS_ZERO (pixman_fixed_frac (a))) - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_is_identity (const struct pixman_transform *t) -{ - return (IS_SAME (t->matrix[0][0], t->matrix[1][1]) && - IS_SAME (t->matrix[0][0], t->matrix[2][2]) && - !IS_ZERO (t->matrix[0][0]) && - IS_ZERO (t->matrix[0][1]) && - IS_ZERO (t->matrix[0][2]) && - IS_ZERO (t->matrix[1][0]) && - IS_ZERO (t->matrix[1][2]) && - IS_ZERO (t->matrix[2][0]) && - IS_ZERO (t->matrix[2][1])); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_is_scale (const struct pixman_transform *t) -{ - return (!IS_ZERO (t->matrix[0][0]) && - IS_ZERO (t->matrix[0][1]) && - IS_ZERO (t->matrix[0][2]) && - - IS_ZERO (t->matrix[1][0]) && - !IS_ZERO (t->matrix[1][1]) && - IS_ZERO (t->matrix[1][2]) && - - IS_ZERO (t->matrix[2][0]) && - IS_ZERO (t->matrix[2][1]) && - !IS_ZERO (t->matrix[2][2])); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_is_int_translate (const struct pixman_transform *t) -{ - return (IS_ONE (t->matrix[0][0]) && - IS_ZERO (t->matrix[0][1]) && - IS_INT (t->matrix[0][2]) && - - IS_ZERO (t->matrix[1][0]) && - IS_ONE (t->matrix[1][1]) && - IS_INT (t->matrix[1][2]) && - - IS_ZERO (t->matrix[2][0]) && - IS_ZERO (t->matrix[2][1]) && - IS_ONE (t->matrix[2][2])); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_is_inverse (const struct pixman_transform *a, - const struct pixman_transform *b) -{ - struct pixman_transform t; - - if (!pixman_transform_multiply (&t, a, b)) - return FALSE; - - return pixman_transform_is_identity (&t); -} - -PIXMAN_EXPORT void -pixman_f_transform_from_pixman_transform (struct pixman_f_transform * ft, - const struct pixman_transform *t) -{ - int i, j; - - for (j = 0; j < 3; j++) - { - for (i = 0; i < 3; i++) - ft->m[j][i] = pixman_fixed_to_double (t->matrix[j][i]); - } -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_from_pixman_f_transform (struct pixman_transform * t, - const struct pixman_f_transform *ft) -{ - int i, j; - - for (j = 0; j < 3; j++) - { - for (i = 0; i < 3; i++) - { - double d = ft->m[j][i]; - if (d < -32767.0 || d > 32767.0) - return FALSE; - d = d * 65536.0 + 0.5; - t->matrix[j][i] = (pixman_fixed_t) floor (d); - } - } - - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_f_transform_invert (struct pixman_f_transform * dst, - const struct pixman_f_transform *src) -{ - static const int a[3] = { 2, 2, 1 }; - static const int b[3] = { 1, 0, 0 }; - pixman_f_transform_t d; - double det; - int i, j; - - det = 0; - for (i = 0; i < 3; i++) - { - double p; - int ai = a[i]; - int bi = b[i]; - p = src->m[i][0] * (src->m[ai][2] * src->m[bi][1] - - src->m[ai][1] * src->m[bi][2]); - if (i == 1) - p = -p; - det += p; - } - - if (det == 0) - return FALSE; - - det = 1 / det; - for (j = 0; j < 3; j++) - { - for (i = 0; i < 3; i++) - { - double p; - int ai = a[i]; - int aj = a[j]; - int bi = b[i]; - int bj = b[j]; - - p = (src->m[ai][aj] * src->m[bi][bj] - - src->m[ai][bj] * src->m[bi][aj]); - - if (((i + j) & 1) != 0) - p = -p; - - d.m[j][i] = det * p; - } - } - - *dst = d; - - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_f_transform_point (const struct pixman_f_transform *t, - struct pixman_f_vector * v) -{ - struct pixman_f_vector result; - int i, j; - double a; - - for (j = 0; j < 3; j++) - { - a = 0; - for (i = 0; i < 3; i++) - a += t->m[j][i] * v->v[i]; - result.v[j] = a; - } - - if (!result.v[2]) - return FALSE; - - for (j = 0; j < 2; j++) - v->v[j] = result.v[j] / result.v[2]; - - v->v[2] = 1; - - return TRUE; -} - -PIXMAN_EXPORT void -pixman_f_transform_point_3d (const struct pixman_f_transform *t, - struct pixman_f_vector * v) -{ - struct pixman_f_vector result; - int i, j; - double a; - - for (j = 0; j < 3; j++) - { - a = 0; - for (i = 0; i < 3; i++) - a += t->m[j][i] * v->v[i]; - result.v[j] = a; - } - - *v = result; -} - -PIXMAN_EXPORT void -pixman_f_transform_multiply (struct pixman_f_transform * dst, - const struct pixman_f_transform *l, - const struct pixman_f_transform *r) -{ - struct pixman_f_transform d; - int dx, dy; - int o; - - for (dy = 0; dy < 3; dy++) - { - for (dx = 0; dx < 3; dx++) - { - double v = 0; - for (o = 0; o < 3; o++) - v += l->m[dy][o] * r->m[o][dx]; - d.m[dy][dx] = v; - } - } - - *dst = d; -} - -PIXMAN_EXPORT void -pixman_f_transform_init_scale (struct pixman_f_transform *t, - double sx, - double sy) -{ - t->m[0][0] = sx; - t->m[0][1] = 0; - t->m[0][2] = 0; - t->m[1][0] = 0; - t->m[1][1] = sy; - t->m[1][2] = 0; - t->m[2][0] = 0; - t->m[2][1] = 0; - t->m[2][2] = 1; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_f_transform_scale (struct pixman_f_transform *forward, - struct pixman_f_transform *reverse, - double sx, - double sy) -{ - struct pixman_f_transform t; - - if (sx == 0 || sy == 0) - return FALSE; - - if (forward) - { - pixman_f_transform_init_scale (&t, sx, sy); - pixman_f_transform_multiply (forward, &t, forward); - } - - if (reverse) - { - pixman_f_transform_init_scale (&t, 1 / sx, 1 / sy); - pixman_f_transform_multiply (reverse, reverse, &t); - } - - return TRUE; -} - -PIXMAN_EXPORT void -pixman_f_transform_init_rotate (struct pixman_f_transform *t, - double c, - double s) -{ - t->m[0][0] = c; - t->m[0][1] = -s; - t->m[0][2] = 0; - t->m[1][0] = s; - t->m[1][1] = c; - t->m[1][2] = 0; - t->m[2][0] = 0; - t->m[2][1] = 0; - t->m[2][2] = 1; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_f_transform_rotate (struct pixman_f_transform *forward, - struct pixman_f_transform *reverse, - double c, - double s) -{ - struct pixman_f_transform t; - - if (forward) - { - pixman_f_transform_init_rotate (&t, c, s); - pixman_f_transform_multiply (forward, &t, forward); - } - - if (reverse) - { - pixman_f_transform_init_rotate (&t, c, -s); - pixman_f_transform_multiply (reverse, reverse, &t); - } - - return TRUE; -} - -PIXMAN_EXPORT void -pixman_f_transform_init_translate (struct pixman_f_transform *t, - double tx, - double ty) -{ - t->m[0][0] = 1; - t->m[0][1] = 0; - t->m[0][2] = tx; - t->m[1][0] = 0; - t->m[1][1] = 1; - t->m[1][2] = ty; - t->m[2][0] = 0; - t->m[2][1] = 0; - t->m[2][2] = 1; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_f_transform_translate (struct pixman_f_transform *forward, - struct pixman_f_transform *reverse, - double tx, - double ty) -{ - struct pixman_f_transform t; - - if (forward) - { - pixman_f_transform_init_translate (&t, tx, ty); - pixman_f_transform_multiply (forward, &t, forward); - } - - if (reverse) - { - pixman_f_transform_init_translate (&t, -tx, -ty); - pixman_f_transform_multiply (reverse, reverse, &t); - } - - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_f_transform_bounds (const struct pixman_f_transform *t, - struct pixman_box16 * b) -{ - struct pixman_f_vector v[4]; - int i; - int x1, y1, x2, y2; - - v[0].v[0] = b->x1; - v[0].v[1] = b->y1; - v[0].v[2] = 1; - v[1].v[0] = b->x2; - v[1].v[1] = b->y1; - v[1].v[2] = 1; - v[2].v[0] = b->x2; - v[2].v[1] = b->y2; - v[2].v[2] = 1; - v[3].v[0] = b->x1; - v[3].v[1] = b->y2; - v[3].v[2] = 1; - - for (i = 0; i < 4; i++) - { - if (!pixman_f_transform_point (t, &v[i])) - return FALSE; - - x1 = floor (v[i].v[0]); - y1 = floor (v[i].v[1]); - x2 = ceil (v[i].v[0]); - y2 = ceil (v[i].v[1]); - - if (i == 0) - { - b->x1 = x1; - b->y1 = y1; - b->x2 = x2; - b->y2 = y2; - } - else - { - if (x1 < b->x1) b->x1 = x1; - if (y1 < b->y1) b->y1 = y1; - if (x2 > b->x2) b->x2 = x2; - if (y2 > b->y2) b->y2 = y2; - } - } - - return TRUE; -} - -PIXMAN_EXPORT void -pixman_f_transform_init_identity (struct pixman_f_transform *t) -{ - int i, j; - - for (j = 0; j < 3; j++) - { - for (i = 0; i < 3; i++) - t->m[j][i] = i == j ? 1 : 0; - } -} diff --git a/vendor/pixman/pixman/pixman-mips-dspr2-asm.S b/vendor/pixman/pixman/pixman-mips-dspr2-asm.S deleted file mode 100644 index 9dad163b7..000000000 --- a/vendor/pixman/pixman/pixman-mips-dspr2-asm.S +++ /dev/null @@ -1,4283 +0,0 @@ -/* - * Copyright (c) 2012 - * MIPS Technologies, Inc., California. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Author: Nemanja Lukic (nemanja.lukic@rt-rk.com) - */ - -#include "pixman-private.h" -#include "pixman-mips-dspr2-asm.h" - -LEAF_MIPS_DSPR2(pixman_fill_buff16_mips) -/* - * a0 - *dest - * a1 - count (bytes) - * a2 - value to fill buffer with - */ - - beqz a1, 3f - andi t1, a0, 0x0002 - beqz t1, 0f /* check if address is 4-byte aligned */ - nop - sh a2, 0(a0) - addiu a0, a0, 2 - addiu a1, a1, -2 -0: - srl t1, a1, 5 /* t1 how many multiples of 32 bytes */ - replv.ph a2, a2 /* replicate fill value (16bit) in a2 */ - beqz t1, 2f - nop -1: - addiu t1, t1, -1 - beqz t1, 11f - addiu a1, a1, -32 - pref 30, 32(a0) - sw a2, 0(a0) - sw a2, 4(a0) - sw a2, 8(a0) - sw a2, 12(a0) - sw a2, 16(a0) - sw a2, 20(a0) - sw a2, 24(a0) - sw a2, 28(a0) - b 1b - addiu a0, a0, 32 -11: - sw a2, 0(a0) - sw a2, 4(a0) - sw a2, 8(a0) - sw a2, 12(a0) - sw a2, 16(a0) - sw a2, 20(a0) - sw a2, 24(a0) - sw a2, 28(a0) - addiu a0, a0, 32 -2: - blez a1, 3f - addiu a1, a1, -2 - sh a2, 0(a0) - b 2b - addiu a0, a0, 2 -3: - jr ra - nop - -END(pixman_fill_buff16_mips) - -LEAF_MIPS32R2(pixman_fill_buff32_mips) -/* - * a0 - *dest - * a1 - count (bytes) - * a2 - value to fill buffer with - */ - - beqz a1, 3f - nop - srl t1, a1, 5 /* t1 how many multiples of 32 bytes */ - beqz t1, 2f - nop -1: - addiu t1, t1, -1 - beqz t1, 11f - addiu a1, a1, -32 - pref 30, 32(a0) - sw a2, 0(a0) - sw a2, 4(a0) - sw a2, 8(a0) - sw a2, 12(a0) - sw a2, 16(a0) - sw a2, 20(a0) - sw a2, 24(a0) - sw a2, 28(a0) - b 1b - addiu a0, a0, 32 -11: - sw a2, 0(a0) - sw a2, 4(a0) - sw a2, 8(a0) - sw a2, 12(a0) - sw a2, 16(a0) - sw a2, 20(a0) - sw a2, 24(a0) - sw a2, 28(a0) - addiu a0, a0, 32 -2: - blez a1, 3f - addiu a1, a1, -4 - sw a2, 0(a0) - b 2b - addiu a0, a0, 4 -3: - jr ra - nop - -END(pixman_fill_buff32_mips) - -LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips) -/* - * a0 - dst (r5g6b5) - * a1 - src (a8r8g8b8) - * a2 - w - */ - - beqz a2, 3f - nop - addiu t1, a2, -1 - beqz t1, 2f - nop - li t4, 0xf800f800 - li t5, 0x07e007e0 - li t6, 0x001f001f -1: - lw t0, 0(a1) - lw t1, 4(a1) - addiu a1, a1, 8 - addiu a2, a2, -2 - - CONVERT_2x8888_TO_2x0565 t0, t1, t2, t3, t4, t5, t6, t7, t8 - - sh t2, 0(a0) - sh t3, 2(a0) - - addiu t2, a2, -1 - bgtz t2, 1b - addiu a0, a0, 4 -2: - beqz a2, 3f - nop - lw t0, 0(a1) - - CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 - - sh t1, 0(a0) -3: - j ra - nop - -END(pixman_composite_src_8888_0565_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_src_0565_8888_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (r5g6b5) - * a2 - w - */ - - beqz a2, 3f - nop - addiu t1, a2, -1 - beqz t1, 2f - nop - li t4, 0x07e007e0 - li t5, 0x001F001F -1: - lhu t0, 0(a1) - lhu t1, 2(a1) - addiu a1, a1, 4 - addiu a2, a2, -2 - - CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9 - - sw t2, 0(a0) - sw t3, 4(a0) - - addiu t2, a2, -1 - bgtz t2, 1b - addiu a0, a0, 8 -2: - beqz a2, 3f - nop - lhu t0, 0(a1) - - CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3 - - sw t1, 0(a0) -3: - j ra - nop - -END(pixman_composite_src_0565_8888_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (x8r8g8b8) - * a2 - w - */ - - beqz a2, 4f - nop - li t9, 0xff000000 - srl t8, a2, 3 /* t1 = how many multiples of 8 src pixels */ - beqz t8, 3f /* branch if less than 8 src pixels */ - nop -1: - addiu t8, t8, -1 - beqz t8, 2f - addiu a2, a2, -8 - pref 0, 32(a1) - lw t0, 0(a1) - lw t1, 4(a1) - lw t2, 8(a1) - lw t3, 12(a1) - lw t4, 16(a1) - lw t5, 20(a1) - lw t6, 24(a1) - lw t7, 28(a1) - addiu a1, a1, 32 - or t0, t0, t9 - or t1, t1, t9 - or t2, t2, t9 - or t3, t3, t9 - or t4, t4, t9 - or t5, t5, t9 - or t6, t6, t9 - or t7, t7, t9 - pref 30, 32(a0) - sw t0, 0(a0) - sw t1, 4(a0) - sw t2, 8(a0) - sw t3, 12(a0) - sw t4, 16(a0) - sw t5, 20(a0) - sw t6, 24(a0) - sw t7, 28(a0) - b 1b - addiu a0, a0, 32 -2: - lw t0, 0(a1) - lw t1, 4(a1) - lw t2, 8(a1) - lw t3, 12(a1) - lw t4, 16(a1) - lw t5, 20(a1) - lw t6, 24(a1) - lw t7, 28(a1) - addiu a1, a1, 32 - or t0, t0, t9 - or t1, t1, t9 - or t2, t2, t9 - or t3, t3, t9 - or t4, t4, t9 - or t5, t5, t9 - or t6, t6, t9 - or t7, t7, t9 - sw t0, 0(a0) - sw t1, 4(a0) - sw t2, 8(a0) - sw t3, 12(a0) - sw t4, 16(a0) - sw t5, 20(a0) - sw t6, 24(a0) - sw t7, 28(a0) - beqz a2, 4f - addiu a0, a0, 32 -3: - lw t0, 0(a1) - addiu a1, a1, 4 - addiu a2, a2, -1 - or t1, t0, t9 - sw t1, 0(a0) - bnez a2, 3b - addiu a0, a0, 4 -4: - jr ra - nop - -END(pixman_composite_src_x888_8888_asm_mips) - -#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) -LEAF_MIPS_DSPR2(pixman_composite_src_0888_8888_rev_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (b8g8r8) - * a2 - w - */ - - beqz a2, 6f - nop - - lui t8, 0xff00; - srl t9, a2, 2 /* t9 = how many multiples of 4 src pixels */ - beqz t9, 4f /* branch if less than 4 src pixels */ - nop - - li t0, 0x1 - li t1, 0x2 - li t2, 0x3 - andi t3, a1, 0x3 - beq t3, t0, 1f - nop - beq t3, t1, 2f - nop - beq t3, t2, 3f - nop - -0: - beqz t9, 4f - addiu t9, t9, -1 - lw t0, 0(a1) /* t0 = R2 | B1 | G1 | R1 */ - lw t1, 4(a1) /* t1 = G3 | R3 | B2 | G2 */ - lw t2, 8(a1) /* t2 = B4 | G4 | R4 | B3 */ - - addiu a1, a1, 12 - addiu a2, a2, -4 - - wsbh t0, t0 /* t0 = B1 | R2 | R1 | G1 */ - wsbh t1, t1 /* t1 = R3 | G3 | G2 | B2 */ - wsbh t2, t2 /* t2 = G4 | B4 | B3 | R4 */ - - packrl.ph t3, t1, t0 /* t3 = G2 | B2 | B1 | R2 */ - packrl.ph t4, t0, t0 /* t4 = R1 | G1 | B1 | R2 */ - rotr t3, t3, 16 /* t3 = B1 | R2 | G2 | B2 */ - or t3, t3, t8 /* t3 = FF | R2 | G2 | B2 */ - srl t4, t4, 8 /* t4 = 0 | R1 | G1 | B1 */ - or t4, t4, t8 /* t4 = FF | R1 | G1 | B1 */ - packrl.ph t5, t2, t1 /* t5 = B3 | R4 | R3 | G3 */ - rotr t5, t5, 24 /* t5 = R4 | R3 | G3 | B3 */ - or t5, t5, t8 /* t5 = FF | R3 | G3 | B3 */ - rotr t2, t2, 16 /* t2 = B3 | R4 | G4 | B4 */ - or t2, t2, t8 /* t5 = FF | R3 | G3 | B3 */ - - sw t4, 0(a0) - sw t3, 4(a0) - sw t5, 8(a0) - sw t2, 12(a0) - b 0b - addiu a0, a0, 16 - -1: - lbu t6, 0(a1) /* t6 = 0 | 0 | 0 | R1 */ - lhu t7, 1(a1) /* t7 = 0 | 0 | B1 | G1 */ - sll t6, t6, 16 /* t6 = 0 | R1 | 0 | 0 */ - wsbh t7, t7 /* t7 = 0 | 0 | G1 | B1 */ - or t7, t6, t7 /* t7 = 0 | R1 | G1 | B1 */ -11: - beqz t9, 4f - addiu t9, t9, -1 - lw t0, 3(a1) /* t0 = R3 | B2 | G2 | R2 */ - lw t1, 7(a1) /* t1 = G4 | R4 | B3 | G3 */ - lw t2, 11(a1) /* t2 = B5 | G5 | R5 | B4 */ - - addiu a1, a1, 12 - addiu a2, a2, -4 - - wsbh t0, t0 /* t0 = B2 | R3 | R2 | G2 */ - wsbh t1, t1 /* t1 = R4 | G4 | G3 | B3 */ - wsbh t2, t2 /* t2 = G5 | B5 | B4 | R5 */ - - packrl.ph t3, t1, t0 /* t3 = G3 | B3 | B2 | R3 */ - packrl.ph t4, t2, t1 /* t4 = B4 | R5 | R4 | G4 */ - rotr t0, t0, 24 /* t0 = R3 | R2 | G2 | B2 */ - rotr t3, t3, 16 /* t3 = B2 | R3 | G3 | B3 */ - rotr t4, t4, 24 /* t4 = R5 | R4 | G4 | B4 */ - or t7, t7, t8 /* t7 = FF | R1 | G1 | B1 */ - or t0, t0, t8 /* t0 = FF | R2 | G2 | B2 */ - or t3, t3, t8 /* t1 = FF | R3 | G3 | B3 */ - or t4, t4, t8 /* t3 = FF | R4 | G4 | B4 */ - - sw t7, 0(a0) - sw t0, 4(a0) - sw t3, 8(a0) - sw t4, 12(a0) - rotr t7, t2, 16 /* t7 = xx | R5 | G5 | B5 */ - b 11b - addiu a0, a0, 16 - -2: - lhu t7, 0(a1) /* t7 = 0 | 0 | G1 | R1 */ - wsbh t7, t7 /* t7 = 0 | 0 | R1 | G1 */ -21: - beqz t9, 4f - addiu t9, t9, -1 - lw t0, 2(a1) /* t0 = B2 | G2 | R2 | B1 */ - lw t1, 6(a1) /* t1 = R4 | B3 | G3 | R3 */ - lw t2, 10(a1) /* t2 = G5 | R5 | B4 | G4 */ - - addiu a1, a1, 12 - addiu a2, a2, -4 - - wsbh t0, t0 /* t0 = G2 | B2 | B1 | R2 */ - wsbh t1, t1 /* t1 = B3 | R4 | R3 | G3 */ - wsbh t2, t2 /* t2 = R5 | G5 | G4 | B4 */ - - precr_sra.ph.w t7, t0, 0 /* t7 = R1 | G1 | B1 | R2 */ - rotr t0, t0, 16 /* t0 = B1 | R2 | G2 | B2 */ - packrl.ph t3, t2, t1 /* t3 = G4 | B4 | B3 | R4 */ - rotr t1, t1, 24 /* t1 = R4 | R3 | G3 | B3 */ - srl t7, t7, 8 /* t7 = 0 | R1 | G1 | B1 */ - rotr t3, t3, 16 /* t3 = B3 | R4 | G4 | B4 */ - or t7, t7, t8 /* t7 = FF | R1 | G1 | B1 */ - or t0, t0, t8 /* t0 = FF | R2 | G2 | B2 */ - or t1, t1, t8 /* t1 = FF | R3 | G3 | B3 */ - or t3, t3, t8 /* t3 = FF | R4 | G4 | B4 */ - - sw t7, 0(a0) - sw t0, 4(a0) - sw t1, 8(a0) - sw t3, 12(a0) - srl t7, t2, 16 /* t7 = 0 | 0 | R5 | G5 */ - b 21b - addiu a0, a0, 16 - -3: - lbu t7, 0(a1) /* t7 = 0 | 0 | 0 | R1 */ -31: - beqz t9, 4f - addiu t9, t9, -1 - lw t0, 1(a1) /* t0 = G2 | R2 | B1 | G1 */ - lw t1, 5(a1) /* t1 = B3 | G3 | R3 | B2 */ - lw t2, 9(a1) /* t2 = R5 | B4 | G4 | R4 */ - - addiu a1, a1, 12 - addiu a2, a2, -4 - - wsbh t0, t0 /* t0 = R2 | G2 | G1 | B1 */ - wsbh t1, t1 /* t1 = G3 | B3 | B2 | R3 */ - wsbh t2, t2 /* t2 = B4 | R5 | R4 | G4 */ - - precr_sra.ph.w t7, t0, 0 /* t7 = xx | R1 | G1 | B1 */ - packrl.ph t3, t1, t0 /* t3 = B2 | R3 | R2 | G2 */ - rotr t1, t1, 16 /* t1 = B2 | R3 | G3 | B3 */ - rotr t4, t2, 24 /* t4 = R5 | R4 | G4 | B4 */ - rotr t3, t3, 24 /* t3 = R3 | R2 | G2 | B2 */ - or t7, t7, t8 /* t7 = FF | R1 | G1 | B1 */ - or t3, t3, t8 /* t3 = FF | R2 | G2 | B2 */ - or t1, t1, t8 /* t1 = FF | R3 | G3 | B3 */ - or t4, t4, t8 /* t4 = FF | R4 | G4 | B4 */ - - sw t7, 0(a0) - sw t3, 4(a0) - sw t1, 8(a0) - sw t4, 12(a0) - srl t7, t2, 16 /* t7 = 0 | 0 | xx | R5 */ - b 31b - addiu a0, a0, 16 - -4: - beqz a2, 6f - nop -5: - lbu t0, 0(a1) /* t0 = 0 | 0 | 0 | R */ - lbu t1, 1(a1) /* t1 = 0 | 0 | 0 | G */ - lbu t2, 2(a1) /* t2 = 0 | 0 | 0 | B */ - addiu a1, a1, 3 - - sll t0, t0, 16 /* t2 = 0 | R | 0 | 0 */ - sll t1, t1, 8 /* t1 = 0 | 0 | G | 0 */ - - or t2, t2, t1 /* t2 = 0 | 0 | G | B */ - or t2, t2, t0 /* t2 = 0 | R | G | B */ - or t2, t2, t8 /* t2 = FF | R | G | B */ - - sw t2, 0(a0) - addiu a2, a2, -1 - bnez a2, 5b - addiu a0, a0, 4 -6: - j ra - nop - -END(pixman_composite_src_0888_8888_rev_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm_mips) -/* - * a0 - dst (r5g6b5) - * a1 - src (b8g8r8) - * a2 - w - */ - - SAVE_REGS_ON_STACK 0, v0, v1 - beqz a2, 6f - nop - - li t6, 0xf800f800 - li t7, 0x07e007e0 - li t8, 0x001F001F - srl t9, a2, 2 /* t9 = how many multiples of 4 src pixels */ - beqz t9, 4f /* branch if less than 4 src pixels */ - nop - - li t0, 0x1 - li t1, 0x2 - li t2, 0x3 - andi t3, a1, 0x3 - beq t3, t0, 1f - nop - beq t3, t1, 2f - nop - beq t3, t2, 3f - nop - -0: - beqz t9, 4f - addiu t9, t9, -1 - lw t0, 0(a1) /* t0 = R2 | B1 | G1 | R1 */ - lw t1, 4(a1) /* t1 = G3 | R3 | B2 | G2 */ - lw t2, 8(a1) /* t2 = B4 | G4 | R4 | B3 */ - - addiu a1, a1, 12 - addiu a2, a2, -4 - - wsbh t0, t0 /* t0 = B1 | R2 | R1 | G1 */ - wsbh t1, t1 /* t1 = R3 | G3 | G2 | B2 */ - wsbh t2, t2 /* t2 = G4 | B4 | B3 | R4 */ - - packrl.ph t3, t1, t0 /* t3 = G2 | B2 | B1 | R2 */ - packrl.ph t4, t0, t0 /* t4 = R1 | G1 | B1 | R2 */ - rotr t3, t3, 16 /* t3 = B1 | R2 | G2 | B2 */ - srl t4, t4, 8 /* t4 = 0 | R1 | G1 | B1 */ - packrl.ph t5, t2, t1 /* t5 = B3 | R4 | R3 | G3 */ - rotr t5, t5, 24 /* t5 = R4 | R3 | G3 | B3 */ - rotr t2, t2, 16 /* t2 = B3 | R4 | G4 | B4 */ - - CONVERT_2x8888_TO_2x0565 t4, t3, t4, t3, t6, t7, t8, v0, v1 - CONVERT_2x8888_TO_2x0565 t5, t2, t5, t2, t6, t7, t8, v0, v1 - - sh t4, 0(a0) - sh t3, 2(a0) - sh t5, 4(a0) - sh t2, 6(a0) - b 0b - addiu a0, a0, 8 - -1: - lbu t4, 0(a1) /* t4 = 0 | 0 | 0 | R1 */ - lhu t5, 1(a1) /* t5 = 0 | 0 | B1 | G1 */ - sll t4, t4, 16 /* t4 = 0 | R1 | 0 | 0 */ - wsbh t5, t5 /* t5 = 0 | 0 | G1 | B1 */ - or t5, t4, t5 /* t5 = 0 | R1 | G1 | B1 */ -11: - beqz t9, 4f - addiu t9, t9, -1 - lw t0, 3(a1) /* t0 = R3 | B2 | G2 | R2 */ - lw t1, 7(a1) /* t1 = G4 | R4 | B3 | G3 */ - lw t2, 11(a1) /* t2 = B5 | G5 | R5 | B4 */ - - addiu a1, a1, 12 - addiu a2, a2, -4 - - wsbh t0, t0 /* t0 = B2 | R3 | R2 | G2 */ - wsbh t1, t1 /* t1 = R4 | G4 | G3 | B3 */ - wsbh t2, t2 /* t2 = G5 | B5 | B4 | R5 */ - - packrl.ph t3, t1, t0 /* t3 = G3 | B3 | B2 | R3 */ - packrl.ph t4, t2, t1 /* t4 = B4 | R5 | R4 | G4 */ - rotr t0, t0, 24 /* t0 = R3 | R2 | G2 | B2 */ - rotr t3, t3, 16 /* t3 = B2 | R3 | G3 | B3 */ - rotr t4, t4, 24 /* t4 = R5 | R4 | G4 | B4 */ - - CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1 - CONVERT_2x8888_TO_2x0565 t3, t4, t3, t4, t6, t7, t8, v0, v1 - - sh t5, 0(a0) - sh t0, 2(a0) - sh t3, 4(a0) - sh t4, 6(a0) - rotr t5, t2, 16 /* t5 = xx | R5 | G5 | B5 */ - b 11b - addiu a0, a0, 8 - -2: - lhu t5, 0(a1) /* t5 = 0 | 0 | G1 | R1 */ - wsbh t5, t5 /* t5 = 0 | 0 | R1 | G1 */ -21: - beqz t9, 4f - addiu t9, t9, -1 - lw t0, 2(a1) /* t0 = B2 | G2 | R2 | B1 */ - lw t1, 6(a1) /* t1 = R4 | B3 | G3 | R3 */ - lw t2, 10(a1) /* t2 = G5 | R5 | B4 | G4 */ - - addiu a1, a1, 12 - addiu a2, a2, -4 - - wsbh t0, t0 /* t0 = G2 | B2 | B1 | R2 */ - wsbh t1, t1 /* t1 = B3 | R4 | R3 | G3 */ - wsbh t2, t2 /* t2 = R5 | G5 | G4 | B4 */ - - precr_sra.ph.w t5, t0, 0 /* t5 = R1 | G1 | B1 | R2 */ - rotr t0, t0, 16 /* t0 = B1 | R2 | G2 | B2 */ - packrl.ph t3, t2, t1 /* t3 = G4 | B4 | B3 | R4 */ - rotr t1, t1, 24 /* t1 = R4 | R3 | G3 | B3 */ - srl t5, t5, 8 /* t5 = 0 | R1 | G1 | B1 */ - rotr t3, t3, 16 /* t3 = B3 | R4 | G4 | B4 */ - - CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1 - CONVERT_2x8888_TO_2x0565 t1, t3, t1, t3, t6, t7, t8, v0, v1 - - sh t5, 0(a0) - sh t0, 2(a0) - sh t1, 4(a0) - sh t3, 6(a0) - srl t5, t2, 16 /* t5 = 0 | 0 | R5 | G5 */ - b 21b - addiu a0, a0, 8 - -3: - lbu t5, 0(a1) /* t5 = 0 | 0 | 0 | R1 */ -31: - beqz t9, 4f - addiu t9, t9, -1 - lw t0, 1(a1) /* t0 = G2 | R2 | B1 | G1 */ - lw t1, 5(a1) /* t1 = B3 | G3 | R3 | B2 */ - lw t2, 9(a1) /* t2 = R5 | B4 | G4 | R4 */ - - addiu a1, a1, 12 - addiu a2, a2, -4 - - wsbh t0, t0 /* t0 = R2 | G2 | G1 | B1 */ - wsbh t1, t1 /* t1 = G3 | B3 | B2 | R3 */ - wsbh t2, t2 /* t2 = B4 | R5 | R4 | G4 */ - - precr_sra.ph.w t5, t0, 0 /* t5 = xx | R1 | G1 | B1 */ - packrl.ph t3, t1, t0 /* t3 = B2 | R3 | R2 | G2 */ - rotr t1, t1, 16 /* t1 = B2 | R3 | G3 | B3 */ - rotr t4, t2, 24 /* t4 = R5 | R4 | G4 | B4 */ - rotr t3, t3, 24 /* t3 = R3 | R2 | G2 | B2 */ - - CONVERT_2x8888_TO_2x0565 t5, t3, t5, t3, t6, t7, t8, v0, v1 - CONVERT_2x8888_TO_2x0565 t1, t4, t1, t4, t6, t7, t8, v0, v1 - - sh t5, 0(a0) - sh t3, 2(a0) - sh t1, 4(a0) - sh t4, 6(a0) - srl t5, t2, 16 /* t5 = 0 | 0 | xx | R5 */ - b 31b - addiu a0, a0, 8 - -4: - beqz a2, 6f - nop -5: - lbu t0, 0(a1) /* t0 = 0 | 0 | 0 | R */ - lbu t1, 1(a1) /* t1 = 0 | 0 | 0 | G */ - lbu t2, 2(a1) /* t2 = 0 | 0 | 0 | B */ - addiu a1, a1, 3 - - sll t0, t0, 16 /* t2 = 0 | R | 0 | 0 */ - sll t1, t1, 8 /* t1 = 0 | 0 | G | 0 */ - - or t2, t2, t1 /* t2 = 0 | 0 | G | B */ - or t2, t2, t0 /* t2 = 0 | R | G | B */ - - CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5 - - sh t3, 0(a0) - addiu a2, a2, -1 - bnez a2, 5b - addiu a0, a0, 2 -6: - RESTORE_REGS_FROM_STACK 0, v0, v1 - j ra - nop - -END(pixman_composite_src_0888_0565_rev_asm_mips) -#endif - -LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm_mips) -/* - * a0 - dst (a8b8g8r8) - * a1 - src (a8r8g8b8) - * a2 - w - */ - - SAVE_REGS_ON_STACK 0, v0 - li v0, 0x00ff00ff - - beqz a2, 3f - nop - addiu t1, a2, -1 - beqz t1, 2f - nop -1: - lw t0, 0(a1) - lw t1, 4(a1) - addiu a1, a1, 8 - addiu a2, a2, -2 - srl t2, t0, 24 - srl t3, t1, 24 - - MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9 - - sll t0, t0, 8 - sll t1, t1, 8 - andi t2, t2, 0xff - andi t3, t3, 0xff - or t0, t0, t2 - or t1, t1, t3 - wsbh t0, t0 - wsbh t1, t1 - rotr t0, t0, 16 - rotr t1, t1, 16 - sw t0, 0(a0) - sw t1, 4(a0) - - addiu t2, a2, -1 - bgtz t2, 1b - addiu a0, a0, 8 -2: - beqz a2, 3f - nop - lw t0, 0(a1) - srl t1, t0, 24 - - MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5 - - sll t0, t0, 8 - andi t1, t1, 0xff - or t0, t0, t1 - wsbh t0, t0 - rotr t0, t0, 16 - sw t0, 0(a0) -3: - RESTORE_REGS_FROM_STACK 0, v0 - j ra - nop - -END(pixman_composite_src_pixbuf_8888_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (a8r8g8b8) - * a2 - w - */ - - SAVE_REGS_ON_STACK 0, v0 - li v0, 0x00ff00ff - - beqz a2, 3f - nop - addiu t1, a2, -1 - beqz t1, 2f - nop -1: - lw t0, 0(a1) - lw t1, 4(a1) - addiu a1, a1, 8 - addiu a2, a2, -2 - srl t2, t0, 24 - srl t3, t1, 24 - - MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9 - - sll t0, t0, 8 - sll t1, t1, 8 - andi t2, t2, 0xff - andi t3, t3, 0xff - or t0, t0, t2 - or t1, t1, t3 - rotr t0, t0, 8 - rotr t1, t1, 8 - sw t0, 0(a0) - sw t1, 4(a0) - - addiu t2, a2, -1 - bgtz t2, 1b - addiu a0, a0, 8 -2: - beqz a2, 3f - nop - lw t0, 0(a1) - srl t1, t0, 24 - - MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5 - - sll t0, t0, 8 - andi t1, t1, 0xff - or t0, t0, t1 - rotr t0, t0, 8 - sw t0, 0(a0) -3: - RESTORE_REGS_FROM_STACK 0, v0 - j ra - nop - -END(pixman_composite_src_rpixbuf_8888_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (32bit constant) - * a2 - mask (a8) - * a3 - w - */ - - - SAVE_REGS_ON_STACK 0, v0 - li v0, 0x00ff00ff - - beqz a3, 3f - nop - addiu t1, a3, -1 - beqz t1, 2f - nop - -1: - /* a1 = source (32bit constant) */ - lbu t0, 0(a2) /* t2 = mask (a8) */ - lbu t1, 1(a2) /* t3 = mask (a8) */ - addiu a2, a2, 2 - - MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, t2, t3, v0, t4, t5, t6, t7, t8, t9 - - sw t2, 0(a0) - sw t3, 4(a0) - addiu a3, a3, -2 - addiu t2, a3, -1 - bgtz t2, 1b - addiu a0, a0, 8 - - beqz a3, 3f - nop - -2: - lbu t0, 0(a2) - addiu a2, a2, 1 - - MIPS_UN8x4_MUL_UN8 a1, t0, t1, v0, t3, t4, t5 - - sw t1, 0(a0) - addiu a3, a3, -1 - addiu a0, a0, 4 - -3: - RESTORE_REGS_FROM_STACK 0, v0 - j ra - nop - -END(pixman_composite_src_n_8_8888_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm_mips) -/* - * a0 - dst (a8) - * a1 - src (32bit constant) - * a2 - mask (a8) - * a3 - w - */ - - li t9, 0x00ff00ff - beqz a3, 3f - nop - srl t7, a3, 2 /* t7 = how many multiples of 4 dst pixels */ - beqz t7, 1f /* branch if less than 4 src pixels */ - nop - - srl t8, a1, 24 - replv.ph t8, t8 - -0: - beqz t7, 1f - addiu t7, t7, -1 - lbu t0, 0(a2) - lbu t1, 1(a2) - lbu t2, 2(a2) - lbu t3, 3(a2) - - addiu a2, a2, 4 - - precr_sra.ph.w t1, t0, 0 - precr_sra.ph.w t3, t2, 0 - precr.qb.ph t0, t3, t1 - - muleu_s.ph.qbl t2, t0, t8 - muleu_s.ph.qbr t3, t0, t8 - shra_r.ph t4, t2, 8 - shra_r.ph t5, t3, 8 - and t4, t4, t9 - and t5, t5, t9 - addq.ph t2, t2, t4 - addq.ph t3, t3, t5 - shra_r.ph t2, t2, 8 - shra_r.ph t3, t3, 8 - precr.qb.ph t2, t2, t3 - - sb t2, 0(a0) - srl t2, t2, 8 - sb t2, 1(a0) - srl t2, t2, 8 - sb t2, 2(a0) - srl t2, t2, 8 - sb t2, 3(a0) - addiu a3, a3, -4 - b 0b - addiu a0, a0, 4 - -1: - beqz a3, 3f - nop - srl t8, a1, 24 -2: - lbu t0, 0(a2) - addiu a2, a2, 1 - - mul t2, t0, t8 - shra_r.ph t3, t2, 8 - andi t3, t3, 0x00ff - addq.ph t2, t2, t3 - shra_r.ph t2, t2, 8 - - sb t2, 0(a0) - addiu a3, a3, -1 - bnez a3, 2b - addiu a0, a0, 1 - -3: - j ra - nop - -END(pixman_composite_src_n_8_8_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (32bit constant) - * a2 - mask (a8r8g8b8) - * a3 - w - */ - - beqz a3, 8f - nop - SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5 - - li t6, 0xff - addiu t7, zero, -1 /* t7 = 0xffffffff */ - srl t8, a1, 24 /* t8 = srca */ - li t9, 0x00ff00ff - - addiu t1, a3, -1 - beqz t1, 4f /* last pixel */ - nop - -0: - lw t0, 0(a2) /* t0 = mask */ - lw t1, 4(a2) /* t1 = mask */ - addiu a3, a3, -2 /* w = w - 2 */ - or t2, t0, t1 - beqz t2, 3f /* if (t0 == 0) && (t1 == 0) */ - addiu a2, a2, 8 - and t2, t0, t1 - beq t2, t7, 1f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - nop - -//if(ma) - lw t2, 0(a0) /* t2 = dst */ - lw t3, 4(a0) /* t3 = dst */ - MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 - MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5 - not t0, t0 - not t1, t1 - MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 - addu_s.qb t2, t4, t2 - addu_s.qb t3, t5, t3 - sw t2, 0(a0) - sw t3, 4(a0) - addiu t1, a3, -1 - bgtz t1, 0b - addiu a0, a0, 8 - b 4f - nop -1: -//if (t0 == 0xffffffff) && (t1 == 0xffffffff): - beq t8, t6, 2f /* if (srca == 0xff) */ - nop - lw t2, 0(a0) /* t2 = dst */ - lw t3, 4(a0) /* t3 = dst */ - not t0, a1 - not t1, a1 - srl t0, t0, 24 - srl t1, t1, 24 - MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 - addu_s.qb t2, a1, t2 - addu_s.qb t3, a1, t3 - sw t2, 0(a0) - sw t3, 4(a0) - addiu t1, a3, -1 - bgtz t1, 0b - addiu a0, a0, 8 - b 4f - nop -2: - sw a1, 0(a0) - sw a1, 4(a0) -3: - addiu t1, a3, -1 - bgtz t1, 0b - addiu a0, a0, 8 - -4: - beqz a3, 7f - nop - /* a1 = src */ - lw t0, 0(a2) /* t0 = mask */ - beqz t0, 7f /* if (t0 == 0) */ - nop - beq t0, t7, 5f /* if (t0 == 0xffffffff) */ - nop -//if(ma) - lw t1, 0(a0) /* t1 = dst */ - MIPS_UN8x4_MUL_UN8x4 a1, t0, t2, t9, t3, t4, t5, s0 - MIPS_UN8x4_MUL_UN8 t0, t8, t0, t9, t3, t4, t5 - not t0, t0 - MIPS_UN8x4_MUL_UN8x4 t1, t0, t1, t9, t3, t4, t5, s0 - addu_s.qb t1, t2, t1 - sw t1, 0(a0) - RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 - j ra - nop -5: -//if (t0 == 0xffffffff) - beq t8, t6, 6f /* if (srca == 0xff) */ - nop - lw t1, 0(a0) /* t1 = dst */ - not t0, a1 - srl t0, t0, 24 - MIPS_UN8x4_MUL_UN8 t1, t0, t1, t9, t2, t3, t4 - addu_s.qb t1, a1, t1 - sw t1, 0(a0) - RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 - j ra - nop -6: - sw a1, 0(a0) -7: - RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 -8: - j ra - nop - -END(pixman_composite_over_n_8888_8888_ca_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips) -/* - * a0 - dst (r5g6b5) - * a1 - src (32bit constant) - * a2 - mask (a8r8g8b8) - * a3 - w - */ - - beqz a3, 8f - nop - SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 - - li t6, 0xff - addiu t7, zero, -1 /* t7 = 0xffffffff */ - srl t8, a1, 24 /* t8 = srca */ - li t9, 0x00ff00ff - li s6, 0xf800f800 - li s7, 0x07e007e0 - li s8, 0x001F001F - - addiu t1, a3, -1 - beqz t1, 4f /* last pixel */ - nop - -0: - lw t0, 0(a2) /* t0 = mask */ - lw t1, 4(a2) /* t1 = mask */ - addiu a3, a3, -2 /* w = w - 2 */ - or t2, t0, t1 - beqz t2, 3f /* if (t0 == 0) && (t1 == 0) */ - addiu a2, a2, 8 - and t2, t0, t1 - beq t2, t7, 1f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - nop - -//if(ma) - lhu t2, 0(a0) /* t2 = dst */ - lhu t3, 2(a0) /* t3 = dst */ - MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 - MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5 - not t0, t0 - not t1, t1 - CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3 - MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 - addu_s.qb t2, t4, t2 - addu_s.qb t3, t5, t3 - CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1 - sh t2, 0(a0) - sh t3, 2(a0) - addiu t1, a3, -1 - bgtz t1, 0b - addiu a0, a0, 4 - b 4f - nop -1: -//if (t0 == 0xffffffff) && (t1 == 0xffffffff): - beq t8, t6, 2f /* if (srca == 0xff) */ - nop - lhu t2, 0(a0) /* t2 = dst */ - lhu t3, 2(a0) /* t3 = dst */ - not t0, a1 - not t1, a1 - srl t0, t0, 24 - srl t1, t1, 24 - CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3 - MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 - addu_s.qb t2, a1, t2 - addu_s.qb t3, a1, t3 - CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1 - sh t2, 0(a0) - sh t3, 2(a0) - addiu t1, a3, -1 - bgtz t1, 0b - addiu a0, a0, 4 - b 4f - nop -2: - CONVERT_1x8888_TO_1x0565 a1, t2, s0, s1 - sh t2, 0(a0) - sh t2, 2(a0) -3: - addiu t1, a3, -1 - bgtz t1, 0b - addiu a0, a0, 4 - -4: - beqz a3, 7f - nop - /* a1 = src */ - lw t0, 0(a2) /* t0 = mask */ - beqz t0, 7f /* if (t0 == 0) */ - nop - beq t0, t7, 5f /* if (t0 == 0xffffffff) */ - nop -//if(ma) - lhu t1, 0(a0) /* t1 = dst */ - MIPS_UN8x4_MUL_UN8x4 a1, t0, t2, t9, t3, t4, t5, s0 - MIPS_UN8x4_MUL_UN8 t0, t8, t0, t9, t3, t4, t5 - not t0, t0 - CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3 - MIPS_UN8x4_MUL_UN8x4 s1, t0, s1, t9, t3, t4, t5, s0 - addu_s.qb s1, t2, s1 - CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2 - sh t1, 0(a0) - RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 - j ra - nop -5: -//if (t0 == 0xffffffff) - beq t8, t6, 6f /* if (srca == 0xff) */ - nop - lhu t1, 0(a0) /* t1 = dst */ - not t0, a1 - srl t0, t0, 24 - CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3 - MIPS_UN8x4_MUL_UN8 s1, t0, s1, t9, t2, t3, t4 - addu_s.qb s1, a1, s1 - CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2 - sh t1, 0(a0) - RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 - j ra - nop -6: - CONVERT_1x8888_TO_1x0565 a1, t1, s0, s2 - sh t1, 0(a0) -7: - RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 -8: - j ra - nop - -END(pixman_composite_over_n_8888_0565_ca_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8_asm_mips) -/* - * a0 - dst (a8) - * a1 - src (32bit constant) - * a2 - mask (a8) - * a3 - w - */ - - SAVE_REGS_ON_STACK 0, v0 - li t9, 0x00ff00ff - beqz a3, 3f - nop - srl v0, a3, 2 /* v0 = how many multiples of 4 dst pixels */ - beqz v0, 1f /* branch if less than 4 src pixels */ - nop - - srl t8, a1, 24 - replv.ph t8, t8 - -0: - beqz v0, 1f - addiu v0, v0, -1 - lbu t0, 0(a2) - lbu t1, 1(a2) - lbu t2, 2(a2) - lbu t3, 3(a2) - lbu t4, 0(a0) - lbu t5, 1(a0) - lbu t6, 2(a0) - lbu t7, 3(a0) - - addiu a2, a2, 4 - - precr_sra.ph.w t1, t0, 0 - precr_sra.ph.w t3, t2, 0 - precr_sra.ph.w t5, t4, 0 - precr_sra.ph.w t7, t6, 0 - - precr.qb.ph t0, t3, t1 - precr.qb.ph t1, t7, t5 - - muleu_s.ph.qbl t2, t0, t8 - muleu_s.ph.qbr t3, t0, t8 - shra_r.ph t4, t2, 8 - shra_r.ph t5, t3, 8 - and t4, t4, t9 - and t5, t5, t9 - addq.ph t2, t2, t4 - addq.ph t3, t3, t5 - shra_r.ph t2, t2, 8 - shra_r.ph t3, t3, 8 - precr.qb.ph t0, t2, t3 - not t6, t0 - - preceu.ph.qbl t7, t6 - preceu.ph.qbr t6, t6 - - muleu_s.ph.qbl t2, t1, t7 - muleu_s.ph.qbr t3, t1, t6 - shra_r.ph t4, t2, 8 - shra_r.ph t5, t3, 8 - and t4, t4, t9 - and t5, t5, t9 - addq.ph t2, t2, t4 - addq.ph t3, t3, t5 - shra_r.ph t2, t2, 8 - shra_r.ph t3, t3, 8 - precr.qb.ph t1, t2, t3 - - addu_s.qb t2, t0, t1 - - sb t2, 0(a0) - srl t2, t2, 8 - sb t2, 1(a0) - srl t2, t2, 8 - sb t2, 2(a0) - srl t2, t2, 8 - sb t2, 3(a0) - addiu a3, a3, -4 - b 0b - addiu a0, a0, 4 - -1: - beqz a3, 3f - nop - srl t8, a1, 24 -2: - lbu t0, 0(a2) - lbu t1, 0(a0) - addiu a2, a2, 1 - - mul t2, t0, t8 - shra_r.ph t3, t2, 8 - andi t3, t3, 0x00ff - addq.ph t2, t2, t3 - shra_r.ph t2, t2, 8 - not t3, t2 - andi t3, t3, 0x00ff - - - mul t4, t1, t3 - shra_r.ph t5, t4, 8 - andi t5, t5, 0x00ff - addq.ph t4, t4, t5 - shra_r.ph t4, t4, 8 - andi t4, t4, 0x00ff - - addu_s.qb t2, t2, t4 - sb t2, 0(a0) - addiu a3, a3, -1 - bnez a3, 2b - addiu a0, a0, 1 - -3: - RESTORE_REGS_FROM_STACK 0, v0 - j ra - nop - -END(pixman_composite_over_n_8_8_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (32bit constant) - * a2 - mask (a8) - * a3 - w - */ - - SAVE_REGS_ON_STACK 4, s0, s1, s2, s3, s4 - beqz a3, 4f - nop - li t4, 0x00ff00ff - li t5, 0xff - addiu t0, a3, -1 - beqz t0, 3f /* last pixel */ - srl t6, a1, 24 /* t6 = srca */ - not s4, a1 - beq t5, t6, 2f /* if (srca == 0xff) */ - srl s4, s4, 24 -1: - /* a1 = src */ - lbu t0, 0(a2) /* t0 = mask */ - lbu t1, 1(a2) /* t1 = mask */ - or t2, t0, t1 - beqz t2, 111f /* if (t0 == 0) && (t1 == 0) */ - addiu a2, a2, 2 - and t3, t0, t1 - - lw t2, 0(a0) /* t2 = dst */ - beq t3, t5, 11f /* if (t0 == 0xff) && (t1 == 0xff) */ - lw t3, 4(a0) /* t3 = dst */ - - MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, s0, s1, t4, t6, t7, t8, t9, s2, s3 - not s2, s0 - not s3, s1 - srl s2, s2, 24 - srl s3, s3, 24 - MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s2, s3, t2, t3, t4, t0, t1, t6, t7, t8, t9 - addu_s.qb s2, t2, s0 - addu_s.qb s3, t3, s1 - sw s2, 0(a0) - b 111f - sw s3, 4(a0) -11: - MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s4, s4, t2, t3, t4, t0, t1, t6, t7, t8, t9 - addu_s.qb s2, t2, a1 - addu_s.qb s3, t3, a1 - sw s2, 0(a0) - sw s3, 4(a0) - -111: - addiu a3, a3, -2 - addiu t0, a3, -1 - bgtz t0, 1b - addiu a0, a0, 8 - b 3f - nop -2: - /* a1 = src */ - lbu t0, 0(a2) /* t0 = mask */ - lbu t1, 1(a2) /* t1 = mask */ - or t2, t0, t1 - beqz t2, 222f /* if (t0 == 0) && (t1 == 0) */ - addiu a2, a2, 2 - and t3, t0, t1 - beq t3, t5, 22f /* if (t0 == 0xff) && (t1 == 0xff) */ - nop - lw t2, 0(a0) /* t2 = dst */ - lw t3, 4(a0) /* t3 = dst */ - - OVER_2x8888_2x8_2x8888 a1, a1, t0, t1, t2, t3, \ - t6, t7, t4, t8, t9, s0, s1, s2, s3 - sw t6, 0(a0) - b 222f - sw t7, 4(a0) -22: - sw a1, 0(a0) - sw a1, 4(a0) -222: - addiu a3, a3, -2 - addiu t0, a3, -1 - bgtz t0, 2b - addiu a0, a0, 8 -3: - blez a3, 4f - nop - /* a1 = src */ - lbu t0, 0(a2) /* t0 = mask */ - beqz t0, 4f /* if (t0 == 0) */ - addiu a2, a2, 1 - move t3, a1 - beq t0, t5, 31f /* if (t0 == 0xff) */ - lw t1, 0(a0) /* t1 = dst */ - - MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t6, t7, t8 -31: - not t2, t3 - srl t2, t2, 24 - MIPS_UN8x4_MUL_UN8 t1, t2, t1, t4, t6, t7, t8 - addu_s.qb t2, t1, t3 - sw t2, 0(a0) -4: - RESTORE_REGS_FROM_STACK 4, s0, s1, s2, s3, s4 - j ra - nop - -END(pixman_composite_over_n_8_8888_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips) -/* - * a0 - dst (r5g6b5) - * a1 - src (32bit constant) - * a2 - mask (a8) - * a3 - w - */ - SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 - beqz a3, 4f - nop - li t4, 0x00ff00ff - li t5, 0xff - li t6, 0xf800f800 - li t7, 0x07e007e0 - li t8, 0x001F001F - addiu t1, a3, -1 - beqz t1, 3f /* last pixel */ - srl t0, a1, 24 /* t0 = srca */ - not v0, a1 - beq t0, t5, 2f /* if (srca == 0xff) */ - srl v0, v0, 24 -1: - /* a1 = src */ - lbu t0, 0(a2) /* t0 = mask */ - lbu t1, 1(a2) /* t1 = mask */ - or t2, t0, t1 - beqz t2, 111f /* if (t0 == 0) && (t1 == 0) */ - addiu a2, a2, 2 - lhu t2, 0(a0) /* t2 = dst */ - lhu t3, 2(a0) /* t3 = dst */ - CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t7, t8, t9, s2, s3, s4 - and t9, t0, t1 - beq t9, t5, 11f /* if (t0 == 0xff) && (t1 == 0xff) */ - nop - - MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, s2, s3, t4, t9, s4, s5, s6, s7, s8 - not s4, s2 - not s5, s3 - srl s4, s4, 24 - srl s5, s5, 24 - MIPS_2xUN8x4_MUL_2xUN8 s0, s1, s4, s5, s0, s1, t4, t9, t0, t1, s6, s7, s8 - addu_s.qb s4, s2, s0 - addu_s.qb s5, s3, s1 - CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1 - sh t2, 0(a0) - b 111f - sh t3, 2(a0) -11: - MIPS_2xUN8x4_MUL_2xUN8 s0, s1, v0, v0, s0, s1, t4, t9, t0, t1, s6, s7, s8 - addu_s.qb s4, a1, s0 - addu_s.qb s5, a1, s1 - CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1 - sh t2, 0(a0) - sh t3, 2(a0) -111: - addiu a3, a3, -2 - addiu t0, a3, -1 - bgtz t0, 1b - addiu a0, a0, 4 - b 3f - nop -2: - CONVERT_1x8888_TO_1x0565 a1, s0, s1, s2 -21: - /* a1 = src */ - lbu t0, 0(a2) /* t0 = mask */ - lbu t1, 1(a2) /* t1 = mask */ - or t2, t0, t1 - beqz t2, 222f /* if (t0 == 0) && (t1 == 0) */ - addiu a2, a2, 2 - and t9, t0, t1 - move s2, s0 - beq t9, t5, 22f /* if (t0 == 0xff) && (t2 == 0xff) */ - move s3, s0 - lhu t2, 0(a0) /* t2 = dst */ - lhu t3, 2(a0) /* t3 = dst */ - - CONVERT_2x0565_TO_2x8888 t2, t3, s2, s3, t7, t8, s4, s5, s6, s7 - OVER_2x8888_2x8_2x8888 a1, a1, t0, t1, s2, s3, \ - t2, t3, t4, t9, s4, s5, s6, s7, s8 - CONVERT_2x8888_TO_2x0565 t2, t3, s2, s3, t6, t7, t8, s4, s5 -22: - sh s2, 0(a0) - sh s3, 2(a0) -222: - addiu a3, a3, -2 - addiu t0, a3, -1 - bgtz t0, 21b - addiu a0, a0, 4 -3: - blez a3, 4f - nop - /* a1 = src */ - lbu t0, 0(a2) /* t0 = mask */ - beqz t0, 4f /* if (t0 == 0) */ - nop - lhu t1, 0(a0) /* t1 = dst */ - CONVERT_1x0565_TO_1x8888 t1, t2, t3, t7 - beq t0, t5, 31f /* if (t0 == 0xff) */ - move t3, a1 - - MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t7, t8, t9 -31: - not t6, t3 - srl t6, t6, 24 - MIPS_UN8x4_MUL_UN8 t2, t6, t2, t4, t7, t8, t9 - addu_s.qb t1, t2, t3 - CONVERT_1x8888_TO_1x0565 t1, t2, t3, t7 - sh t2, 0(a0) -4: - RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 - j ra - nop - -END(pixman_composite_over_n_8_0565_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (a8r8g8b8) - * a2 - mask (32bit constant) - * a3 - w - */ - - SAVE_REGS_ON_STACK 0, s0 - li t4, 0x00ff00ff - beqz a3, 3f - nop - addiu t1, a3, -1 - srl a2, a2, 24 - beqz t1, 2f - nop - -1: - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ - /* a2 = mask (32bit constant) */ - lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ - lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ - addiu a1, a1, 8 - - OVER_2x8888_2x8_2x8888 t0, t1, a2, a2, t2, t3, \ - t5, t6, t4, t7, t8, t9, t0, t1, s0 - - sw t5, 0(a0) - sw t6, 4(a0) - addiu a3, a3, -2 - addiu t1, a3, -1 - bgtz t1, 1b - addiu a0, a0, 8 -2: - beqz a3, 3f - nop - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - /* a2 = mask (32bit constant) */ - lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ - - OVER_8888_8_8888 t0, a2, t1, t3, t4, t5, t6, t7, t8 - - sw t3, 0(a0) -3: - RESTORE_REGS_FROM_STACK 0, s0 - j ra - nop - -END(pixman_composite_over_8888_n_8888_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_0565_asm_mips) -/* - * a0 - dst (r5g6b5) - * a1 - src (a8r8g8b8) - * a2 - mask (32bit constant) - * a3 - w - */ - - SAVE_REGS_ON_STACK 0, s0, s1, s2, s3 - li t6, 0x00ff00ff - li t7, 0xf800f800 - li t8, 0x07e007e0 - li t9, 0x001F001F - beqz a3, 3f - nop - srl a2, a2, 24 - addiu t1, a3, -1 - beqz t1, 2f - nop -1: - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ - /* a2 = mask (32bit constant) */ - lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ - lhu t3, 2(a0) /* t2 = destination (r5g6b5) */ - addiu a1, a1, 8 - - CONVERT_2x0565_TO_2x8888 t2, t3, t4, t5, t8, t9, s0, s1, t2, t3 - OVER_2x8888_2x8_2x8888 t0, t1, a2, a2, t4, t5, \ - t2, t3, t6, t0, t1, s0, s1, s2, s3 - CONVERT_2x8888_TO_2x0565 t2, t3, t4, t5, t7, t8, t9, s0, s1 - - sh t4, 0(a0) - sh t5, 2(a0) - addiu a3, a3, -2 - addiu t1, a3, -1 - bgtz t1, 1b - addiu a0, a0, 4 -2: - beqz a3, 3f - nop - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - /* a2 = mask (32bit constant) */ - lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ - - CONVERT_1x0565_TO_1x8888 t1, t2, t4, t5 - OVER_8888_8_8888 t0, a2, t2, t1, t6, t3, t4, t5, t7 - CONVERT_1x8888_TO_1x0565 t1, t3, t4, t5 - - sh t3, 0(a0) -3: - RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 - j ra - nop - -END(pixman_composite_over_8888_n_0565_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_over_0565_n_0565_asm_mips) -/* - * a0 - dst (r5g6b5) - * a1 - src (r5g6b5) - * a2 - mask (32bit constant) - * a3 - w - */ - - SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5 - li t6, 0x00ff00ff - li t7, 0xf800f800 - li t8, 0x07e007e0 - li t9, 0x001F001F - beqz a3, 3f - nop - srl a2, a2, 24 - addiu t1, a3, -1 - beqz t1, 2f - nop -1: - lhu t0, 0(a1) /* t0 = source (r5g6b5) */ - lhu t1, 2(a1) /* t1 = source (r5g6b5) */ - /* a2 = mask (32bit constant) */ - lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ - lhu t3, 2(a0) /* t3 = destination (r5g6b5) */ - addiu a1, a1, 4 - - CONVERT_2x0565_TO_2x8888 t0, t1, t4, t5, t8, t9, s0, s1, s2, s3 - CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t8, t9, s2, s3, s4, s5 - OVER_2x8888_2x8_2x8888 t4, t5, a2, a2, s0, s1, \ - t0, t1, t6, s2, s3, s4, s5, t4, t5 - CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t7, t8, t9, s2, s3 - - sh s0, 0(a0) - sh s1, 2(a0) - addiu a3, a3, -2 - addiu t1, a3, -1 - bgtz t1, 1b - addiu a0, a0, 4 -2: - beqz a3, 3f - nop - lhu t0, 0(a1) /* t0 = source (r5g6b5) */ - /* a2 = mask (32bit constant) */ - lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ - - CONVERT_1x0565_TO_1x8888 t0, t2, t4, t5 - CONVERT_1x0565_TO_1x8888 t1, t3, t4, t5 - OVER_8888_8_8888 t2, a2, t3, t0, t6, t1, t4, t5, t7 - CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5 - - sh t3, 0(a0) -3: - RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5 - j ra - nop - -END(pixman_composite_over_0565_n_0565_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (a8r8g8b8) - * a2 - mask (a8) - * a3 - w - */ - - SAVE_REGS_ON_STACK 0, s0, s1 - li t4, 0x00ff00ff - beqz a3, 3f - nop - addiu t1, a3, -1 - beqz t1, 2f - nop -1: - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ - lbu t2, 0(a2) /* t2 = mask (a8) */ - lbu t3, 1(a2) /* t3 = mask (a8) */ - lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */ - lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */ - addiu a1, a1, 8 - addiu a2, a2, 2 - - OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, \ - t7, t8, t4, t9, s0, s1, t0, t1, t2 - - sw t7, 0(a0) - sw t8, 4(a0) - addiu a3, a3, -2 - addiu t1, a3, -1 - bgtz t1, 1b - addiu a0, a0, 8 -2: - beqz a3, 3f - nop - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - lbu t1, 0(a2) /* t1 = mask (a8) */ - lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ - - OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8 - - sw t3, 0(a0) -3: - RESTORE_REGS_FROM_STACK 0, s0, s1 - j ra - nop - -END(pixman_composite_over_8888_8_8888_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_0565_asm_mips) -/* - * a0 - dst (r5g6b5) - * a1 - src (a8r8g8b8) - * a2 - mask (a8) - * a3 - w - */ - - SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5 - li t6, 0x00ff00ff - li t7, 0xf800f800 - li t8, 0x07e007e0 - li t9, 0x001F001F - beqz a3, 3f - nop - addiu t1, a3, -1 - beqz t1, 2f - nop -1: - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ - lbu t2, 0(a2) /* t2 = mask (a8) */ - lbu t3, 1(a2) /* t3 = mask (a8) */ - lhu t4, 0(a0) /* t4 = destination (r5g6b5) */ - lhu t5, 2(a0) /* t5 = destination (r5g6b5) */ - addiu a1, a1, 8 - addiu a2, a2, 2 - - CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5 - OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, s0, s1, \ - t4, t5, t6, s2, s3, s4, s5, t0, t1 - CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3 - - sh s0, 0(a0) - sh s1, 2(a0) - addiu a3, a3, -2 - addiu t1, a3, -1 - bgtz t1, 1b - addiu a0, a0, 4 -2: - beqz a3, 3f - nop - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - lbu t1, 0(a2) /* t1 = mask (a8) */ - lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ - - CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5 - OVER_8888_8_8888 t0, t1, t3, t2, t6, t4, t5, t7, t8 - CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5 - - sh t3, 0(a0) -3: - RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5 - j ra - nop - -END(pixman_composite_over_8888_8_0565_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_over_0565_8_0565_asm_mips) -/* - * a0 - dst (r5g6b5) - * a1 - src (r5g6b5) - * a2 - mask (a8) - * a3 - w - */ - - SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5 - li t4, 0xf800f800 - li t5, 0x07e007e0 - li t6, 0x001F001F - li t7, 0x00ff00ff - beqz a3, 3f - nop - addiu t1, a3, -1 - beqz t1, 2f - nop -1: - lhu t0, 0(a1) /* t0 = source (r5g6b5) */ - lhu t1, 2(a1) /* t1 = source (r5g6b5) */ - lbu t2, 0(a2) /* t2 = mask (a8) */ - lbu t3, 1(a2) /* t3 = mask (a8) */ - lhu t8, 0(a0) /* t8 = destination (r5g6b5) */ - lhu t9, 2(a0) /* t9 = destination (r5g6b5) */ - addiu a1, a1, 4 - addiu a2, a2, 2 - - CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5 - CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1 - OVER_2x8888_2x8_2x8888 s0, s1, t2, t3, s2, s3, \ - t0, t1, t7, s4, s5, t8, t9, s0, s1 - CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3 - - sh s0, 0(a0) - sh s1, 2(a0) - addiu a3, a3, -2 - addiu t1, a3, -1 - bgtz t1, 1b - addiu a0, a0, 4 -2: - beqz a3, 3f - nop - lhu t0, 0(a1) /* t0 = source (r5g6b5) */ - lbu t1, 0(a2) /* t1 = mask (a8) */ - lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ - - CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5 - CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6 - OVER_8888_8_8888 t3, t1, t4, t0, t7, t2, t5, t6, t8 - CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5 - - sh t3, 0(a0) -3: - RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5 - j ra - nop - -END(pixman_composite_over_0565_8_0565_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_8888_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (a8r8g8b8) - * a2 - mask (a8r8g8b8) - * a3 - w - */ - - SAVE_REGS_ON_STACK 0, s0, s1, s2 - li t4, 0x00ff00ff - beqz a3, 3f - nop - addiu t1, a3, -1 - beqz t1, 2f - nop -1: - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ - lw t2, 0(a2) /* t2 = mask (a8r8g8b8) */ - lw t3, 4(a2) /* t3 = mask (a8r8g8b8) */ - lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */ - lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */ - addiu a1, a1, 8 - addiu a2, a2, 8 - srl t2, t2, 24 - srl t3, t3, 24 - - OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, t7, t8, t4, t9, s0, s1, s2, t0, t1 - - sw t7, 0(a0) - sw t8, 4(a0) - addiu a3, a3, -2 - addiu t1, a3, -1 - bgtz t1, 1b - addiu a0, a0, 8 -2: - beqz a3, 3f - nop - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - lw t1, 0(a2) /* t1 = mask (a8r8g8b8) */ - lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ - srl t1, t1, 24 - - OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8 - - sw t3, 0(a0) -3: - RESTORE_REGS_FROM_STACK 0, s0, s1, s2 - j ra - nop - -END(pixman_composite_over_8888_8888_8888_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (a8r8g8b8) - * a2 - w - */ - - SAVE_REGS_ON_STACK 0, s0, s1, s2 - li t4, 0x00ff00ff - beqz a2, 3f - nop - addiu t1, a2, -1 - beqz t1, 2f - nop -1: - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ - lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ - lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ - addiu a1, a1, 8 - - not t5, t0 - srl t5, t5, 24 - not t6, t1 - srl t6, t6, 24 - - or t7, t5, t6 - beqz t7, 11f - or t8, t0, t1 - beqz t8, 12f - - MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t5, t6, t7, t8, t4, t9, s0, s1, s2, t2, t3 - - addu_s.qb t0, t7, t0 - addu_s.qb t1, t8, t1 -11: - sw t0, 0(a0) - sw t1, 4(a0) -12: - addiu a2, a2, -2 - addiu t1, a2, -1 - bgtz t1, 1b - addiu a0, a0, 8 -2: - beqz a2, 3f - nop - - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ - addiu a1, a1, 4 - - not t2, t0 - srl t2, t2, 24 - - beqz t2, 21f - nop - beqz t0, 3f - - MIPS_UN8x4_MUL_UN8 t1, t2, t3, t4, t5, t6, t7 - - addu_s.qb t0, t3, t0 -21: - sw t0, 0(a0) - -3: - RESTORE_REGS_FROM_STACK 0, s0, s1, s2 - j ra - nop - -END(pixman_composite_over_8888_8888_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_over_8888_0565_asm_mips) -/* - * a0 - dst (r5g6b5) - * a1 - src (a8r8g8b8) - * a2 - w - */ - - SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5 - li t4, 0x00ff00ff - li s3, 0xf800f800 - li s4, 0x07e007e0 - li s5, 0x001F001F - beqz a2, 3f - nop - addiu t1, a2, -1 - beqz t1, 2f - nop -1: - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ - lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ - lhu t3, 2(a0) /* t3 = destination (r5g6b5) */ - addiu a1, a1, 8 - - not t5, t0 - srl t5, t5, 24 - not t6, t1 - srl t6, t6, 24 - - or t7, t5, t6 - beqz t7, 11f - or t8, t0, t1 - beqz t8, 12f - - CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, s4, s5, t7, t8, t9, s2 - MIPS_2xUN8x4_MUL_2xUN8 s0, s1, t5, t6, t7, t8, t4, t9, t2, t3, s2, s0, s1 - - addu_s.qb t0, t7, t0 - addu_s.qb t1, t8, t1 -11: - CONVERT_2x8888_TO_2x0565 t0, t1, t7, t8, s3, s4, s5, t2, t3 - sh t7, 0(a0) - sh t8, 2(a0) -12: - addiu a2, a2, -2 - addiu t1, a2, -1 - bgtz t1, 1b - addiu a0, a0, 4 -2: - beqz a2, 3f - nop - - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ - addiu a1, a1, 4 - - not t2, t0 - srl t2, t2, 24 - - beqz t2, 21f - nop - beqz t0, 3f - - CONVERT_1x0565_TO_1x8888 t1, s0, t8, t9 - MIPS_UN8x4_MUL_UN8 s0, t2, t3, t4, t5, t6, t7 - - addu_s.qb t0, t3, t0 -21: - CONVERT_1x8888_TO_1x0565 t0, s0, t8, t9 - sh s0, 0(a0) - -3: - RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 - j ra - nop - -END(pixman_composite_over_8888_0565_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips) -/* - * a0 - dst (r5g6b5) - * a1 - src (32bit constant) - * a2 - w - */ - - beqz a2, 5f - nop - - not t0, a1 - srl t0, t0, 24 - bgtz t0, 1f - nop - CONVERT_1x8888_TO_1x0565 a1, t1, t2, t3 -0: - sh t1, 0(a0) - addiu a2, a2, -1 - bgtz a2, 0b - addiu a0, a0, 2 - j ra - nop - -1: - SAVE_REGS_ON_STACK 0, s0, s1, s2 - li t4, 0x00ff00ff - li t5, 0xf800f800 - li t6, 0x07e007e0 - li t7, 0x001F001F - addiu t1, a2, -1 - beqz t1, 3f - nop -2: - lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ - lhu t2, 2(a0) /* t2 = destination (r5g6b5) */ - - CONVERT_2x0565_TO_2x8888 t1, t2, t3, t8, t6, t7, t9, s0, s1, s2 - MIPS_2xUN8x4_MUL_2xUN8 t3, t8, t0, t0, t1, t2, t4, t9, s0, s1, s2, t3, t8 - addu_s.qb t1, t1, a1 - addu_s.qb t2, t2, a1 - CONVERT_2x8888_TO_2x0565 t1, t2, t3, t8, t5, t6, t7, s0, s1 - - sh t3, 0(a0) - sh t8, 2(a0) - - addiu a2, a2, -2 - addiu t1, a2, -1 - bgtz t1, 2b - addiu a0, a0, 4 -3: - beqz a2, 4f - nop - - lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ - - CONVERT_1x0565_TO_1x8888 t1, t2, s0, s1 - MIPS_UN8x4_MUL_UN8 t2, t0, t1, t4, s0, s1, s2 - addu_s.qb t1, t1, a1 - CONVERT_1x8888_TO_1x0565 t1, t2, s0, s1 - - sh t2, 0(a0) - -4: - RESTORE_REGS_FROM_STACK 0, s0, s1, s2 -5: - j ra - nop - -END(pixman_composite_over_n_0565_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (32bit constant) - * a2 - w - */ - - beqz a2, 5f - nop - - not t0, a1 - srl t0, t0, 24 - bgtz t0, 1f - nop -0: - sw a1, 0(a0) - addiu a2, a2, -1 - bgtz a2, 0b - addiu a0, a0, 4 - j ra - nop - -1: - SAVE_REGS_ON_STACK 0, s0, s1, s2 - li t4, 0x00ff00ff - addiu t1, a2, -1 - beqz t1, 3f - nop -2: - lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ - lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ - - MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t0, t7, t8, t4, t9, s0, s1, s2, t2, t3 - - addu_s.qb t7, t7, a1 - addu_s.qb t8, t8, a1 - - sw t7, 0(a0) - sw t8, 4(a0) - - addiu a2, a2, -2 - addiu t1, a2, -1 - bgtz t1, 2b - addiu a0, a0, 8 -3: - beqz a2, 4f - nop - - lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ - - MIPS_UN8x4_MUL_UN8 t1, t0, t3, t4, t5, t6, t7 - - addu_s.qb t3, t3, a1 - - sw t3, 0(a0) - -4: - RESTORE_REGS_FROM_STACK 0, s0, s1, s2 -5: - j ra - nop - -END(pixman_composite_over_n_8888_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips) -/* - * a0 - dst (a8) - * a1 - src (a8) - * a2 - mask (a8) - * a3 - w - */ - - SAVE_REGS_ON_STACK 0, v0, v1 - li t9, 0x00ff00ff - beqz a3, 3f - nop - - srl v0, a3, 2 /* v0 = how many multiples of 4 dst pixels */ - beqz v0, 1f /* branch if less than 4 src pixels */ - nop - -0: - beqz v0, 1f - addiu v0, v0, -1 - lbu t0, 0(a2) - lbu t1, 1(a2) - lbu t2, 2(a2) - lbu t3, 3(a2) - lbu t4, 0(a0) - lbu t5, 1(a0) - lbu t6, 2(a0) - lbu t7, 3(a0) - - addiu a2, a2, 4 - - precr_sra.ph.w t1, t0, 0 - precr_sra.ph.w t3, t2, 0 - precr_sra.ph.w t5, t4, 0 - precr_sra.ph.w t7, t6, 0 - - precr.qb.ph t0, t3, t1 - precr.qb.ph t1, t7, t5 - - lbu t4, 0(a1) - lbu v1, 1(a1) - lbu t7, 2(a1) - lbu t8, 3(a1) - - addiu a1, a1, 4 - - precr_sra.ph.w v1, t4, 0 - precr_sra.ph.w t8, t7, 0 - - muleu_s.ph.qbl t2, t0, t8 - muleu_s.ph.qbr t3, t0, v1 - shra_r.ph t4, t2, 8 - shra_r.ph t5, t3, 8 - and t4, t4, t9 - and t5, t5, t9 - addq.ph t2, t2, t4 - addq.ph t3, t3, t5 - shra_r.ph t2, t2, 8 - shra_r.ph t3, t3, 8 - precr.qb.ph t0, t2, t3 - - addu_s.qb t2, t0, t1 - - sb t2, 0(a0) - srl t2, t2, 8 - sb t2, 1(a0) - srl t2, t2, 8 - sb t2, 2(a0) - srl t2, t2, 8 - sb t2, 3(a0) - addiu a3, a3, -4 - b 0b - addiu a0, a0, 4 - -1: - beqz a3, 3f - nop -2: - lbu t8, 0(a1) - lbu t0, 0(a2) - lbu t1, 0(a0) - addiu a1, a1, 1 - addiu a2, a2, 1 - - mul t2, t0, t8 - shra_r.ph t3, t2, 8 - andi t3, t3, 0xff - addq.ph t2, t2, t3 - shra_r.ph t2, t2, 8 - andi t2, t2, 0xff - - addu_s.qb t2, t2, t1 - sb t2, 0(a0) - addiu a3, a3, -1 - bnez a3, 2b - addiu a0, a0, 1 - -3: - RESTORE_REGS_FROM_STACK 0, v0, v1 - j ra - nop - -END(pixman_composite_add_8_8_8_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8_asm_mips) -/* - * a0 - dst (a8) - * a1 - src (32bit constant) - * a2 - mask (a8) - * a3 - w - */ - - SAVE_REGS_ON_STACK 0, v0 - li t9, 0x00ff00ff - beqz a3, 3f - nop - - srl v0, a3, 2 /* v0 = how many multiples of 4 dst pixels */ - beqz v0, 1f /* branch if less than 4 src pixels */ - nop - - srl t8, a1, 24 - replv.ph t8, t8 - -0: - beqz v0, 1f - addiu v0, v0, -1 - lbu t0, 0(a2) - lbu t1, 1(a2) - lbu t2, 2(a2) - lbu t3, 3(a2) - lbu t4, 0(a0) - lbu t5, 1(a0) - lbu t6, 2(a0) - lbu t7, 3(a0) - - addiu a2, a2, 4 - - precr_sra.ph.w t1, t0, 0 - precr_sra.ph.w t3, t2, 0 - precr_sra.ph.w t5, t4, 0 - precr_sra.ph.w t7, t6, 0 - - precr.qb.ph t0, t3, t1 - precr.qb.ph t1, t7, t5 - - muleu_s.ph.qbl t2, t0, t8 - muleu_s.ph.qbr t3, t0, t8 - shra_r.ph t4, t2, 8 - shra_r.ph t5, t3, 8 - and t4, t4, t9 - and t5, t5, t9 - addq.ph t2, t2, t4 - addq.ph t3, t3, t5 - shra_r.ph t2, t2, 8 - shra_r.ph t3, t3, 8 - precr.qb.ph t0, t2, t3 - - addu_s.qb t2, t0, t1 - - sb t2, 0(a0) - srl t2, t2, 8 - sb t2, 1(a0) - srl t2, t2, 8 - sb t2, 2(a0) - srl t2, t2, 8 - sb t2, 3(a0) - addiu a3, a3, -4 - b 0b - addiu a0, a0, 4 - -1: - beqz a3, 3f - nop - srl t8, a1, 24 -2: - lbu t0, 0(a2) - lbu t1, 0(a0) - addiu a2, a2, 1 - - mul t2, t0, t8 - shra_r.ph t3, t2, 8 - andi t3, t3, 0xff - addq.ph t2, t2, t3 - shra_r.ph t2, t2, 8 - andi t2, t2, 0xff - - addu_s.qb t2, t2, t1 - sb t2, 0(a0) - addiu a3, a3, -1 - bnez a3, 2b - addiu a0, a0, 1 - -3: - RESTORE_REGS_FROM_STACK 0, v0 - j ra - nop - -END(pixman_composite_add_n_8_8_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (32bit constant) - * a2 - mask (a8) - * a3 - w - */ - - SAVE_REGS_ON_STACK 0, s0, s1, s2 - li t4, 0x00ff00ff - beqz a3, 3f - nop - addiu t1, a3, -1 - beqz t1, 2f - nop -1: - /* a1 = source (32bit constant) */ - lbu t0, 0(a2) /* t0 = mask (a8) */ - lbu t1, 1(a2) /* t1 = mask (a8) */ - lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ - lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ - addiu a2, a2, 2 - - MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 a1, a1, \ - t0, t1, \ - t2, t3, \ - t5, t6, \ - t4, t7, t8, t9, s0, s1, s2 - - sw t5, 0(a0) - sw t6, 4(a0) - addiu a3, a3, -2 - addiu t1, a3, -1 - bgtz t1, 1b - addiu a0, a0, 8 -2: - beqz a3, 3f - nop - /* a1 = source (32bit constant) */ - lbu t0, 0(a2) /* t0 = mask (a8) */ - lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ - - MIPS_UN8x4_MUL_UN8_ADD_UN8x4 a1, t0, t1, t2, t4, t3, t5, t6 - - sw t2, 0(a0) -3: - RESTORE_REGS_FROM_STACK 0, s0, s1, s2 - j ra - nop - -END(pixman_composite_add_n_8_8888_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm_mips) -/* - * a0 - dst (r5g6b5) - * a1 - src (r5g6b5) - * a2 - mask (a8) - * a3 - w - */ - - SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 - li t4, 0xf800f800 - li t5, 0x07e007e0 - li t6, 0x001F001F - li t7, 0x00ff00ff - beqz a3, 3f - nop - addiu t1, a3, -1 - beqz t1, 2f - nop -1: - lhu t0, 0(a1) /* t0 = source (r5g6b5) */ - lhu t1, 2(a1) /* t1 = source (r5g6b5) */ - lbu t2, 0(a2) /* t2 = mask (a8) */ - lbu t3, 1(a2) /* t3 = mask (a8) */ - lhu t8, 0(a0) /* t8 = destination (r5g6b5) */ - lhu t9, 2(a0) /* t9 = destination (r5g6b5) */ - addiu a1, a1, 4 - addiu a2, a2, 2 - - CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5 - CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, s6, s7 - MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 s0, s1, \ - t2, t3, \ - s2, s3, \ - t0, t1, \ - t7, s4, s5, s6, s7, t8, t9 - CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3 - - sh s0, 0(a0) - sh s1, 2(a0) - addiu a3, a3, -2 - addiu t1, a3, -1 - bgtz t1, 1b - addiu a0, a0, 4 -2: - beqz a3, 3f - nop - lhu t0, 0(a1) /* t0 = source (r5g6b5) */ - lbu t1, 0(a2) /* t1 = mask (a8) */ - lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ - - CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5 - CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6 - MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t3, t1, t4, t0, t7, t2, t5, t6 - CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5 - - sh t3, 0(a0) -3: - RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 - j ra - nop - -END(pixman_composite_add_0565_8_0565_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (a8r8g8b8) - * a2 - mask (a8) - * a3 - w - */ - - SAVE_REGS_ON_STACK 0, s0, s1, s2 - li t4, 0x00ff00ff - beqz a3, 3f - nop - addiu t1, a3, -1 - beqz t1, 2f - nop -1: - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ - lbu t2, 0(a2) /* t2 = mask (a8) */ - lbu t3, 1(a2) /* t3 = mask (a8) */ - lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */ - lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */ - addiu a1, a1, 8 - addiu a2, a2, 2 - - MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \ - t2, t3, \ - t5, t6, \ - t7, t8, \ - t4, t9, s0, s1, s2, t0, t1 - - sw t7, 0(a0) - sw t8, 4(a0) - addiu a3, a3, -2 - addiu t1, a3, -1 - bgtz t1, 1b - addiu a0, a0, 8 -2: - beqz a3, 3f - nop - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - lbu t1, 0(a2) /* t1 = mask (a8) */ - lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ - - MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7 - - sw t3, 0(a0) -3: - RESTORE_REGS_FROM_STACK 0, s0, s1, s2 - j ra - nop - -END(pixman_composite_add_8888_8_8888_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (a8r8g8b8) - * a2 - mask (32bit constant) - * a3 - w - */ - - SAVE_REGS_ON_STACK 0, s0, s1, s2 - li t4, 0x00ff00ff - beqz a3, 3f - nop - srl a2, a2, 24 - addiu t1, a3, -1 - beqz t1, 2f - nop -1: - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ - /* a2 = mask (32bit constant) */ - lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ - lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ - addiu a1, a1, 8 - - MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \ - a2, a2, \ - t2, t3, \ - t5, t6, \ - t4, t7, t8, t9, s0, s1, s2 - - sw t5, 0(a0) - sw t6, 4(a0) - addiu a3, a3, -2 - addiu t1, a3, -1 - bgtz t1, 1b - addiu a0, a0, 8 -2: - beqz a3, 3f - nop - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - /* a2 = mask (32bit constant) */ - lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ - - MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, a2, t1, t3, t4, t5, t6, t7 - - sw t3, 0(a0) -3: - RESTORE_REGS_FROM_STACK 0, s0, s1, s2 - j ra - nop - -END(pixman_composite_add_8888_n_8888_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (a8r8g8b8) - * a2 - mask (a8r8g8b8) - * a3 - w - */ - - SAVE_REGS_ON_STACK 0, s0, s1, s2 - li t4, 0x00ff00ff - beqz a3, 3f - nop - addiu t1, a3, -1 - beqz t1, 2f - nop -1: - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ - lw t2, 0(a2) /* t2 = mask (a8r8g8b8) */ - lw t3, 4(a2) /* t3 = mask (a8r8g8b8) */ - lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */ - lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */ - addiu a1, a1, 8 - addiu a2, a2, 8 - srl t2, t2, 24 - srl t3, t3, 24 - - MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \ - t2, t3, \ - t5, t6, \ - t7, t8, \ - t4, t9, s0, s1, s2, t0, t1 - - sw t7, 0(a0) - sw t8, 4(a0) - addiu a3, a3, -2 - addiu t1, a3, -1 - bgtz t1, 1b - addiu a0, a0, 8 -2: - beqz a3, 3f - nop - lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ - lw t1, 0(a2) /* t1 = mask (a8r8g8b8) */ - lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ - srl t1, t1, 24 - - MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7 - - sw t3, 0(a0) -3: - RESTORE_REGS_FROM_STACK 0, s0, s1, s2 - j ra - nop - -END(pixman_composite_add_8888_8888_8888_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_add_8_8_asm_mips) -/* - * a0 - dst (a8) - * a1 - src (a8) - * a2 - w - */ - - beqz a2, 3f - nop - srl t9, a2, 2 /* t9 = how many multiples of 4 dst pixels */ - beqz t9, 1f /* branch if less than 4 src pixels */ - nop - -0: - beqz t9, 1f - addiu t9, t9, -1 - lbu t0, 0(a1) - lbu t1, 1(a1) - lbu t2, 2(a1) - lbu t3, 3(a1) - lbu t4, 0(a0) - lbu t5, 1(a0) - lbu t6, 2(a0) - lbu t7, 3(a0) - - addiu a1, a1, 4 - - precr_sra.ph.w t1, t0, 0 - precr_sra.ph.w t3, t2, 0 - precr_sra.ph.w t5, t4, 0 - precr_sra.ph.w t7, t6, 0 - - precr.qb.ph t0, t3, t1 - precr.qb.ph t1, t7, t5 - - addu_s.qb t2, t0, t1 - - sb t2, 0(a0) - srl t2, t2, 8 - sb t2, 1(a0) - srl t2, t2, 8 - sb t2, 2(a0) - srl t2, t2, 8 - sb t2, 3(a0) - addiu a2, a2, -4 - b 0b - addiu a0, a0, 4 - -1: - beqz a2, 3f - nop -2: - lbu t0, 0(a1) - lbu t1, 0(a0) - addiu a1, a1, 1 - - addu_s.qb t2, t0, t1 - sb t2, 0(a0) - addiu a2, a2, -1 - bnez a2, 2b - addiu a0, a0, 1 - -3: - j ra - nop - -END(pixman_composite_add_8_8_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (a8r8g8b8) - * a2 - w - */ - - beqz a2, 4f - nop - - srl t9, a2, 2 /* t1 = how many multiples of 4 src pixels */ - beqz t9, 3f /* branch if less than 4 src pixels */ - nop -1: - addiu t9, t9, -1 - beqz t9, 2f - addiu a2, a2, -4 - - lw t0, 0(a1) - lw t1, 4(a1) - lw t2, 8(a1) - lw t3, 12(a1) - lw t4, 0(a0) - lw t5, 4(a0) - lw t6, 8(a0) - lw t7, 12(a0) - addiu a1, a1, 16 - - addu_s.qb t4, t4, t0 - addu_s.qb t5, t5, t1 - addu_s.qb t6, t6, t2 - addu_s.qb t7, t7, t3 - - sw t4, 0(a0) - sw t5, 4(a0) - sw t6, 8(a0) - sw t7, 12(a0) - b 1b - addiu a0, a0, 16 -2: - lw t0, 0(a1) - lw t1, 4(a1) - lw t2, 8(a1) - lw t3, 12(a1) - lw t4, 0(a0) - lw t5, 4(a0) - lw t6, 8(a0) - lw t7, 12(a0) - addiu a1, a1, 16 - - addu_s.qb t4, t4, t0 - addu_s.qb t5, t5, t1 - addu_s.qb t6, t6, t2 - addu_s.qb t7, t7, t3 - - sw t4, 0(a0) - sw t5, 4(a0) - sw t6, 8(a0) - sw t7, 12(a0) - - beqz a2, 4f - addiu a0, a0, 16 -3: - lw t0, 0(a1) - lw t1, 0(a0) - addiu a1, a1, 4 - addiu a2, a2, -1 - addu_s.qb t1, t1, t0 - sw t1, 0(a0) - bnez a2, 3b - addiu a0, a0, 4 -4: - jr ra - nop - -END(pixman_composite_add_8888_8888_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm_mips) -/* - * a0 - dst (r5g6b5) - * a1 - src (a8) - * a2 - w - */ - - beqz a2, 4f - nop - - SAVE_REGS_ON_STACK 0, s0, s1, s2, s3 - li t2, 0xf800f800 - li t3, 0x07e007e0 - li t4, 0x001F001F - li t5, 0x00ff00ff - - addiu t1, a2, -1 - beqz t1, 2f - nop -1: - lbu t0, 0(a1) /* t0 = source (a8) */ - lbu t1, 1(a1) /* t1 = source (a8) */ - lhu t6, 0(a0) /* t6 = destination (r5g6b5) */ - lhu t7, 2(a0) /* t7 = destination (r5g6b5) */ - addiu a1, a1, 2 - - not t0, t0 - not t1, t1 - andi t0, 0xff /* t0 = neg source1 */ - andi t1, 0xff /* t1 = neg source2 */ - CONVERT_2x0565_TO_2x8888 t6, t7, t8, t9, t3, t4, s0, s1, s2, s3 - MIPS_2xUN8x4_MUL_2xUN8 t8, t9, t0, t1, t6, t7, t5, s0, s1, s2, s3, t8, t9 - CONVERT_2x8888_TO_2x0565 t6, t7, t8, t9, t2, t3, t4, s0, s1 - - sh t8, 0(a0) - sh t9, 2(a0) - addiu a2, a2, -2 - addiu t1, a2, -1 - bgtz t1, 1b - addiu a0, a0, 4 -2: - beqz a2, 3f - nop - lbu t0, 0(a1) /* t0 = source (a8) */ - lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ - - not t0, t0 - andi t0, 0xff /* t0 = neg source */ - CONVERT_1x0565_TO_1x8888 t1, t2, t3, t4 - MIPS_UN8x4_MUL_UN8 t2, t0, t1, t5, t3, t4, t6 - CONVERT_1x8888_TO_1x0565 t1, t2, t3, t4 - - sh t2, 0(a0) -3: - RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 -4: - j ra - nop - -END(pixman_composite_out_reverse_8_0565_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (a8) - * a2 - w - */ - - beqz a2, 3f - nop - li t4, 0x00ff00ff - addiu t1, a2, -1 - beqz t1, 2f - nop -1: - lbu t0, 0(a1) /* t0 = source (a8) */ - lbu t1, 1(a1) /* t1 = source (a8) */ - lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ - lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ - addiu a1, a1, 2 - not t0, t0 - not t1, t1 - andi t0, 0xff /* t0 = neg source */ - andi t1, 0xff /* t1 = neg source */ - - MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t5, t6, t4, t7, t8, t9, t2, t3, t0 - - sw t5, 0(a0) - sw t6, 4(a0) - addiu a2, a2, -2 - addiu t1, a2, -1 - bgtz t1, 1b - addiu a0, a0, 8 -2: - beqz a2, 3f - nop - lbu t0, 0(a1) /* t0 = source (a8) */ - lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ - not t0, t0 - andi t0, 0xff /* t0 = neg source */ - - MIPS_UN8x4_MUL_UN8 t1, t0, t2, t4, t3, t5, t6 - - sw t2, 0(a0) -3: - j ra - nop - -END(pixman_composite_out_reverse_8_8888_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (32bit constant) - * a2 - w - */ - - beqz a2, 5f - nop - - SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 - li t0, 0x00ff00ff - srl t9, a2, 2 /* t9 = how many multiples of 4 src pixels */ - beqz t9, 2f /* branch if less than 4 src pixels */ - nop -1: - beqz t9, 2f - addiu t9, t9, -1 - - lw t1, 0(a0) - lw t2, 4(a0) - lw t3, 8(a0) - lw t4, 12(a0) - - addiu a2, a2, -4 - - not t5, t1 - not t6, t2 - not t7, t3 - not t8, t4 - srl t5, t5, 24 - srl t6, t6, 24 - srl t7, t7, 24 - srl t8, t8, 24 - replv.ph t5, t5 - replv.ph t6, t6 - replv.ph t7, t7 - replv.ph t8, t8 - muleu_s.ph.qbl s0, a1, t5 - muleu_s.ph.qbr s1, a1, t5 - muleu_s.ph.qbl s2, a1, t6 - muleu_s.ph.qbr s3, a1, t6 - muleu_s.ph.qbl s4, a1, t7 - muleu_s.ph.qbr s5, a1, t7 - muleu_s.ph.qbl s6, a1, t8 - muleu_s.ph.qbr s7, a1, t8 - - shra_r.ph t5, s0, 8 - shra_r.ph t6, s1, 8 - shra_r.ph t7, s2, 8 - shra_r.ph t8, s3, 8 - and t5, t5, t0 - and t6, t6, t0 - and t7, t7, t0 - and t8, t8, t0 - addq.ph s0, s0, t5 - addq.ph s1, s1, t6 - addq.ph s2, s2, t7 - addq.ph s3, s3, t8 - shra_r.ph s0, s0, 8 - shra_r.ph s1, s1, 8 - shra_r.ph s2, s2, 8 - shra_r.ph s3, s3, 8 - shra_r.ph t5, s4, 8 - shra_r.ph t6, s5, 8 - shra_r.ph t7, s6, 8 - shra_r.ph t8, s7, 8 - and t5, t5, t0 - and t6, t6, t0 - and t7, t7, t0 - and t8, t8, t0 - addq.ph s4, s4, t5 - addq.ph s5, s5, t6 - addq.ph s6, s6, t7 - addq.ph s7, s7, t8 - shra_r.ph s4, s4, 8 - shra_r.ph s5, s5, 8 - shra_r.ph s6, s6, 8 - shra_r.ph s7, s7, 8 - - precr.qb.ph t5, s0, s1 - precr.qb.ph t6, s2, s3 - precr.qb.ph t7, s4, s5 - precr.qb.ph t8, s6, s7 - addu_s.qb t5, t1, t5 - addu_s.qb t6, t2, t6 - addu_s.qb t7, t3, t7 - addu_s.qb t8, t4, t8 - - sw t5, 0(a0) - sw t6, 4(a0) - sw t7, 8(a0) - sw t8, 12(a0) - b 1b - addiu a0, a0, 16 - -2: - beqz a2, 4f - nop -3: - lw t1, 0(a0) - - not t2, t1 - srl t2, t2, 24 - replv.ph t2, t2 - - muleu_s.ph.qbl t4, a1, t2 - muleu_s.ph.qbr t5, a1, t2 - shra_r.ph t6, t4, 8 - shra_r.ph t7, t5, 8 - - and t6,t6,t0 - and t7,t7,t0 - - addq.ph t8, t4, t6 - addq.ph t9, t5, t7 - - shra_r.ph t8, t8, 8 - shra_r.ph t9, t9, 8 - - precr.qb.ph t9, t8, t9 - - addu_s.qb t9, t1, t9 - sw t9, 0(a0) - - addiu a2, a2, -1 - bnez a2, 3b - addiu a0, a0, 4 -4: - RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 -5: - j ra - nop - -END(pixman_composite_over_reverse_n_8888_asm_mips) - -LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips) -/* - * a0 - dst (a8) - * a1 - src (32bit constant) - * a2 - w - */ - - li t9, 0x00ff00ff - beqz a2, 3f - nop - srl t7, a2, 2 /* t7 = how many multiples of 4 dst pixels */ - beqz t7, 1f /* branch if less than 4 src pixels */ - nop - - srl t8, a1, 24 - replv.ph t8, t8 - -0: - beqz t7, 1f - addiu t7, t7, -1 - lbu t0, 0(a0) - lbu t1, 1(a0) - lbu t2, 2(a0) - lbu t3, 3(a0) - - precr_sra.ph.w t1, t0, 0 - precr_sra.ph.w t3, t2, 0 - precr.qb.ph t0, t3, t1 - - muleu_s.ph.qbl t2, t0, t8 - muleu_s.ph.qbr t3, t0, t8 - shra_r.ph t4, t2, 8 - shra_r.ph t5, t3, 8 - and t4, t4, t9 - and t5, t5, t9 - addq.ph t2, t2, t4 - addq.ph t3, t3, t5 - shra_r.ph t2, t2, 8 - shra_r.ph t3, t3, 8 - precr.qb.ph t2, t2, t3 - - sb t2, 0(a0) - srl t2, t2, 8 - sb t2, 1(a0) - srl t2, t2, 8 - sb t2, 2(a0) - srl t2, t2, 8 - sb t2, 3(a0) - addiu a2, a2, -4 - b 0b - addiu a0, a0, 4 - -1: - beqz a2, 3f - nop - srl t8, a1, 24 -2: - lbu t0, 0(a0) - - mul t2, t0, t8 - shra_r.ph t3, t2, 8 - andi t3, t3, 0x00ff - addq.ph t2, t2, t3 - shra_r.ph t2, t2, 8 - - sb t2, 0(a0) - addiu a2, a2, -1 - bnez a2, 2b - addiu a0, a0, 1 - -3: - j ra - nop - -END(pixman_composite_in_n_8_asm_mips) - -LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (a8r8g8b8) - * a2 - w - * a3 - vx - * 16(sp) - unit_x - */ - - SAVE_REGS_ON_STACK 0, s0, s1, s2, s3 - lw t8, 16(sp) /* t8 = unit_x */ - li t6, 0x00ff00ff - beqz a2, 3f - nop - addiu t1, a2, -1 - beqz t1, 2f - nop -1: - sra t0, a3, 16 /* t0 = vx >> 16 */ - sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ - addu t0, a1, t0 - lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ - addu a3, a3, t8 /* a3 = vx + unit_x */ - - sra t1, a3, 16 /* t0 = vx >> 16 */ - sll t1, t1, 2 /* t0 = t0 * 4 (a8r8g8b8) */ - addu t1, a1, t1 - lw t1, 0(t1) /* t1 = source (a8r8g8b8) */ - addu a3, a3, t8 /* a3 = vx + unit_x */ - - lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ - lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ - - OVER_2x8888_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t9, s0, s1, s2, s3 - - sw t4, 0(a0) - sw t5, 4(a0) - addiu a2, a2, -2 - addiu t1, a2, -1 - bgtz t1, 1b - addiu a0, a0, 8 -2: - beqz a2, 3f - nop - sra t0, a3, 16 /* t0 = vx >> 16 */ - sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ - addu t0, a1, t0 - lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ - lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ - addu a3, a3, t8 /* a3 = vx + unit_x */ - - OVER_8888_8888 t0, t1, t2, t6, t4, t5, t3, t7 - - sw t2, 0(a0) -3: - RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 - j ra - nop - -END(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips) - -LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips) -/* - * a0 - dst (r5g6b5) - * a1 - src (a8r8g8b8) - * a2 - w - * a3 - vx - * 16(sp) - unit_x - */ - - SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, v0, v1 - lw t8, 40(sp) /* t8 = unit_x */ - li t4, 0x00ff00ff - li t5, 0xf800f800 - li t6, 0x07e007e0 - li t7, 0x001F001F - beqz a2, 3f - nop - addiu t1, a2, -1 - beqz t1, 2f - nop -1: - sra t0, a3, 16 /* t0 = vx >> 16 */ - sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ - addu t0, a1, t0 - lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ - addu a3, a3, t8 /* a3 = vx + unit_x */ - sra t1, a3, 16 /* t0 = vx >> 16 */ - sll t1, t1, 2 /* t0 = t0 * 4 (a8r8g8b8) */ - addu t1, a1, t1 - lw t1, 0(t1) /* t1 = source (a8r8g8b8) */ - addu a3, a3, t8 /* a3 = vx + unit_x */ - lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ - lhu t3, 2(a0) /* t3 = destination (r5g6b5) */ - - CONVERT_2x0565_TO_2x8888 t2, t3, v0, v1, t6, t7, s0, s1, s2, s3 - OVER_2x8888_2x8888 t0, t1, v0, v1, t2, t3, t4, t9, s0, s1, s2, s3, s4 - CONVERT_2x8888_TO_2x0565 t2, t3, v0, v1, t5, t6, t7, t9, s2 - - sh v0, 0(a0) - sh v1, 2(a0) - addiu a2, a2, -2 - addiu t1, a2, -1 - bgtz t1, 1b - addiu a0, a0, 4 -2: - beqz a2, 3f - nop - sra t0, a3, 16 /* t0 = vx >> 16 */ - sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ - addu t0, a1, t0 - lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ - lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ - addu a3, a3, t8 /* a3 = vx + unit_x */ - - CONVERT_1x0565_TO_1x8888 t1, t2, t5, t6 - OVER_8888_8888 t0, t2, t1, t4, t3, t5, t6, t7 - CONVERT_1x8888_TO_1x0565 t1, t2, t5, t6 - - sh t2, 0(a0) -3: - RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, v0, v1 - j ra - nop - -END(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips) - -LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - src (r5g6b5) - * a2 - w - * a3 - vx - * 16(sp) - unit_x - */ - - SAVE_REGS_ON_STACK 0, v0 - beqz a2, 3f - nop - - lw v0, 16(sp) /* v0 = unit_x */ - addiu t1, a2, -1 - beqz t1, 2f - nop - - li t4, 0x07e007e0 - li t5, 0x001F001F -1: - sra t0, a3, 16 /* t0 = vx >> 16 */ - sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */ - addu t0, a1, t0 - lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */ - addu a3, a3, v0 /* a3 = vx + unit_x */ - sra t1, a3, 16 /* t1 = vx >> 16 */ - sll t1, t1, 1 /* t1 = t1 * 2 ((r5g6b5)) */ - addu t1, a1, t1 - lhu t1, 0(t1) /* t1 = source ((r5g6b5)) */ - addu a3, a3, v0 /* a3 = vx + unit_x */ - addiu a2, a2, -2 - - CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9 - - sw t2, 0(a0) - sw t3, 4(a0) - - addiu t2, a2, -1 - bgtz t2, 1b - addiu a0, a0, 8 -2: - beqz a2, 3f - nop - sra t0, a3, 16 /* t0 = vx >> 16 */ - sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */ - addu t0, a1, t0 - lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */ - - CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3 - - sw t1, 0(a0) -3: - RESTORE_REGS_FROM_STACK 0, v0 - j ra - nop - -END(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips) - -LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips) -/* - * a0 - dst (r5g6b5) - * a1 - src (a8r8g8b8) - * a2 - mask (a8) - * a3 - w - * 16(sp) - vx - * 20(sp) - unit_x - */ - beqz a3, 4f - nop - - SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5 - lw v0, 36(sp) /* v0 = vx */ - lw v1, 40(sp) /* v1 = unit_x */ - li t6, 0x00ff00ff - li t7, 0xf800f800 - li t8, 0x07e007e0 - li t9, 0x001F001F - - addiu t1, a3, -1 - beqz t1, 2f - nop -1: - sra t0, v0, 16 /* t0 = vx >> 16 */ - sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ - addu t0, a1, t0 - lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ - addu v0, v0, v1 /* v0 = vx + unit_x */ - sra t1, v0, 16 /* t1 = vx >> 16 */ - sll t1, t1, 2 /* t1 = t1 * 4 (a8r8g8b8) */ - addu t1, a1, t1 - lw t1, 0(t1) /* t1 = source (a8r8g8b8) */ - addu v0, v0, v1 /* v0 = vx + unit_x */ - lbu t2, 0(a2) /* t2 = mask (a8) */ - lbu t3, 1(a2) /* t3 = mask (a8) */ - lhu t4, 0(a0) /* t4 = destination (r5g6b5) */ - lhu t5, 2(a0) /* t5 = destination (r5g6b5) */ - addiu a2, a2, 2 - - CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5 - OVER_2x8888_2x8_2x8888 t0, t1, \ - t2, t3, \ - s0, s1, \ - t4, t5, \ - t6, s2, s3, s4, s5, t2, t3 - CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3 - - sh s0, 0(a0) - sh s1, 2(a0) - addiu a3, a3, -2 - addiu t1, a3, -1 - bgtz t1, 1b - addiu a0, a0, 4 -2: - beqz a3, 3f - nop - sra t0, v0, 16 /* t0 = vx >> 16 */ - sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ - addu t0, a1, t0 - lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ - lbu t1, 0(a2) /* t1 = mask (a8) */ - lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ - - CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5 - OVER_8888_8_8888 t0, t1, t3, t2, t6, t4, t5, t7, t8 - CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5 - - sh t3, 0(a0) -3: - RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5 -4: - j ra - nop - -END(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips) - -LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips) -/* - * a0 - dst (r5g6b5) - * a1 - src (r5g6b5) - * a2 - mask (a8) - * a3 - w - * 16(sp) - vx - * 20(sp) - unit_x - */ - - beqz a3, 4f - nop - SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5 - lw v0, 36(sp) /* v0 = vx */ - lw v1, 40(sp) /* v1 = unit_x */ - li t4, 0xf800f800 - li t5, 0x07e007e0 - li t6, 0x001F001F - li t7, 0x00ff00ff - - addiu t1, a3, -1 - beqz t1, 2f - nop -1: - sra t0, v0, 16 /* t0 = vx >> 16 */ - sll t0, t0, 1 /* t0 = t0 * 2 (r5g6b5) */ - addu t0, a1, t0 - lhu t0, 0(t0) /* t0 = source (r5g6b5) */ - addu v0, v0, v1 /* v0 = vx + unit_x */ - sra t1, v0, 16 /* t1 = vx >> 16 */ - sll t1, t1, 1 /* t1 = t1 * 2 (r5g6b5) */ - addu t1, a1, t1 - lhu t1, 0(t1) /* t1 = source (r5g6b5) */ - addu v0, v0, v1 /* v0 = vx + unit_x */ - lbu t2, 0(a2) /* t2 = mask (a8) */ - lbu t3, 1(a2) /* t3 = mask (a8) */ - lhu t8, 0(a0) /* t8 = destination (r5g6b5) */ - lhu t9, 2(a0) /* t9 = destination (r5g6b5) */ - addiu a2, a2, 2 - - CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5 - CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1 - OVER_2x8888_2x8_2x8888 s0, s1, \ - t2, t3, \ - s2, s3, \ - t0, t1, \ - t7, t8, t9, s4, s5, s0, s1 - CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3 - - sh s0, 0(a0) - sh s1, 2(a0) - addiu a3, a3, -2 - addiu t1, a3, -1 - bgtz t1, 1b - addiu a0, a0, 4 -2: - beqz a3, 3f - nop - sra t0, v0, 16 /* t0 = vx >> 16 */ - sll t0, t0, 1 /* t0 = t0 * 2 (r5g6b5) */ - addu t0, a1, t0 - - lhu t0, 0(t0) /* t0 = source (r5g6b5) */ - lbu t1, 0(a2) /* t1 = mask (a8) */ - lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ - - CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5 - CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6 - OVER_8888_8_8888 t3, t1, t4, t0, t7, t2, t5, t6, t8 - CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5 - - sh t3, 0(a0) -3: - RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5 -4: - j ra - nop - -END(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips) - -LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips) -/* - * a0 - *dst - * a1 - *src_top - * a2 - *src_bottom - * a3 - w - * 16(sp) - wt - * 20(sp) - wb - * 24(sp) - vx - * 28(sp) - unit_x - */ - - beqz a3, 1f - nop - - SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 - - lw s0, 36(sp) /* s0 = wt */ - lw s1, 40(sp) /* s1 = wb */ - lw s2, 44(sp) /* s2 = vx */ - lw s3, 48(sp) /* s3 = unit_x */ - li v0, BILINEAR_INTERPOLATION_RANGE - - sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) - sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) -0: - andi t4, s2, 0xffff /* t4 = (short)vx */ - srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ - subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ - - mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ - mul s5, s0, t4 /* s5 = wt*(vx>>8) */ - mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ - mul s7, s1, t4 /* s7 = wb*(vx>>8) */ - - sra t9, s2, 16 - sll t9, t9, 2 - addiu t8, t9, 4 - lwx t0, t9(a1) /* t0 = tl */ - lwx t1, t8(a1) /* t1 = tr */ - addiu a3, a3, -1 - lwx t2, t9(a2) /* t2 = bl */ - lwx t3, t8(a2) /* t3 = br */ - - BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 - - addu s2, s2, s3 /* vx += unit_x; */ - sw t0, 0(a0) - bnez a3, 0b - addiu a0, a0, 4 - - RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 -1: - j ra - nop - -END(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips) - -LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips) -/* - * a0 - *dst - * a1 - *src_top - * a2 - *src_bottom - * a3 - w - * 16(sp) - wt - * 20(sp) - wb - * 24(sp) - vx - * 28(sp) - unit_x - */ - - beqz a3, 1f - nop - - SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 - - lw s0, 36(sp) /* s0 = wt */ - lw s1, 40(sp) /* s1 = wb */ - lw s2, 44(sp) /* s2 = vx */ - lw s3, 48(sp) /* s3 = unit_x */ - li v0, BILINEAR_INTERPOLATION_RANGE - - sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) - sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) -0: - andi t4, s2, 0xffff /* t4 = (short)vx */ - srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ - subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ - - mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ - mul s5, s0, t4 /* s5 = wt*(vx>>8) */ - mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ - mul s7, s1, t4 /* s7 = wb*(vx>>8) */ - - sra t9, s2, 16 - sll t9, t9, 2 - addiu t8, t9, 4 - lwx t0, t9(a1) /* t0 = tl */ - lwx t1, t8(a1) /* t1 = tr */ - addiu a3, a3, -1 - lwx t2, t9(a2) /* t2 = bl */ - lwx t3, t8(a2) /* t3 = br */ - - BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 - CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 - - addu s2, s2, s3 /* vx += unit_x; */ - sh t1, 0(a0) - bnez a3, 0b - addiu a0, a0, 2 - - RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 -1: - j ra - nop - -END(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips) - -LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips) -/* - * a0 - *dst - * a1 - *src_top - * a2 - *src_bottom - * a3 - w - * 16(sp) - wt - * 20(sp) - wb - * 24(sp) - vx - * 28(sp) - unit_x - */ - - beqz a3, 1f - nop - - SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 - - lw s0, 44(sp) /* s0 = wt */ - lw s1, 48(sp) /* s1 = wb */ - lw s2, 52(sp) /* s2 = vx */ - lw s3, 56(sp) /* s3 = unit_x */ - li v0, BILINEAR_INTERPOLATION_RANGE - li v1, 0x07e007e0 - li s8, 0x001f001f - - sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) - sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) -0: - andi t4, s2, 0xffff /* t4 = (short)vx */ - srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ - subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ - - mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ - mul s5, s0, t4 /* s5 = wt*(vx>>8) */ - mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ - mul s7, s1, t4 /* s7 = wb*(vx>>8) */ - - sra t9, s2, 16 - sll t9, t9, 1 - addiu t8, t9, 2 - lhx t0, t9(a1) /* t0 = tl */ - lhx t1, t8(a1) /* t1 = tr */ - andi t1, t1, 0xffff - addiu a3, a3, -1 - lhx t2, t9(a2) /* t2 = bl */ - lhx t3, t8(a2) /* t3 = br */ - andi t3, t3, 0xffff - - CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 - CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 - BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 - - addu s2, s2, s3 /* vx += unit_x; */ - sw t0, 0(a0) - bnez a3, 0b - addiu a0, a0, 4 - - RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 -1: - j ra - nop - -END(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips) - -LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips) -/* - * a0 - *dst - * a1 - *src_top - * a2 - *src_bottom - * a3 - w - * 16(sp) - wt - * 20(sp) - wb - * 24(sp) - vx - * 28(sp) - unit_x - */ - - beqz a3, 1f - nop - - SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 - - lw s0, 44(sp) /* s0 = wt */ - lw s1, 48(sp) /* s1 = wb */ - lw s2, 52(sp) /* s2 = vx */ - lw s3, 56(sp) /* s3 = unit_x */ - li v0, BILINEAR_INTERPOLATION_RANGE - li v1, 0x07e007e0 - li s8, 0x001f001f - - sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) - sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) -0: - andi t4, s2, 0xffff /* t4 = (short)vx */ - srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ - subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ - - mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ - mul s5, s0, t4 /* s5 = wt*(vx>>8) */ - mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ - mul s7, s1, t4 /* s7 = wb*(vx>>8) */ - - sra t9, s2, 16 - sll t9, t9, 1 - addiu t8, t9, 2 - lhx t0, t9(a1) /* t0 = tl */ - lhx t1, t8(a1) /* t1 = tr */ - andi t1, t1, 0xffff - addiu a3, a3, -1 - lhx t2, t9(a2) /* t2 = bl */ - lhx t3, t8(a2) /* t3 = br */ - andi t3, t3, 0xffff - - CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 - CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 - BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 - CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 - - addu s2, s2, s3 /* vx += unit_x; */ - sh t1, 0(a0) - bnez a3, 0b - addiu a0, a0, 2 - - RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 -1: - j ra - nop - -END(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips) - -LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips) -/* - * a0 - *dst - * a1 - *src_top - * a2 - *src_bottom - * a3 - w - * 16(sp) - wt - * 20(sp) - wb - * 24(sp) - vx - * 28(sp) - unit_x - */ - - beqz a3, 1f - nop - - SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 - - lw s0, 40(sp) /* s0 = wt */ - lw s1, 44(sp) /* s1 = wb */ - lw s2, 48(sp) /* s2 = vx */ - lw s3, 52(sp) /* s3 = unit_x */ - li v0, BILINEAR_INTERPOLATION_RANGE - li s8, 0x00ff00ff - - sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) - sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) -0: - andi t4, s2, 0xffff /* t4 = (short)vx */ - srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ - subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ - - mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ - mul s5, s0, t4 /* s5 = wt*(vx>>8) */ - mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ - mul s7, s1, t4 /* s7 = wb*(vx>>8) */ - - sra t9, s2, 16 - sll t9, t9, 2 - addiu t8, t9, 4 - lwx t0, t9(a1) /* t0 = tl */ - lwx t1, t8(a1) /* t1 = tr */ - addiu a3, a3, -1 - lwx t2, t9(a2) /* t2 = bl */ - lwx t3, t8(a2) /* t3 = br */ - - BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 - lw t1, 0(a0) /* t1 = dest */ - OVER_8888_8888 t0, t1, t2, s8, t3, t4, t5, t6 - - addu s2, s2, s3 /* vx += unit_x; */ - sw t2, 0(a0) - bnez a3, 0b - addiu a0, a0, 4 - - RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 -1: - j ra - nop - -END(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips) - -LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips) -/* - * a0 - *dst - * a1 - *src_top - * a2 - *src_bottom - * a3 - w - * 16(sp) - wt - * 20(sp) - wb - * 24(sp) - vx - * 28(sp) - unit_x - */ - - beqz a3, 1f - nop - - SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 - - lw s0, 36(sp) /* s0 = wt */ - lw s1, 40(sp) /* s1 = wb */ - lw s2, 44(sp) /* s2 = vx */ - lw s3, 48(sp) /* s3 = unit_x */ - li v0, BILINEAR_INTERPOLATION_RANGE - - sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) - sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) -0: - andi t4, s2, 0xffff /* t4 = (short)vx */ - srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ - subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ - - mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ - mul s5, s0, t4 /* s5 = wt*(vx>>8) */ - mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ - mul s7, s1, t4 /* s7 = wb*(vx>>8) */ - - sra t9, s2, 16 - sll t9, t9, 2 - addiu t8, t9, 4 - lwx t0, t9(a1) /* t0 = tl */ - lwx t1, t8(a1) /* t1 = tr */ - addiu a3, a3, -1 - lwx t2, t9(a2) /* t2 = bl */ - lwx t3, t8(a2) /* t3 = br */ - - BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 - lw t1, 0(a0) - addu_s.qb t2, t0, t1 - - addu s2, s2, s3 /* vx += unit_x; */ - sw t2, 0(a0) - bnez a3, 0b - addiu a0, a0, 4 - - RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 -1: - j ra - nop - -END(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips) - -LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips) -/* - * a0 - *dst - * a1 - *mask - * a2 - *src_top - * a3 - *src_bottom - * 16(sp) - wt - * 20(sp) - wb - * 24(sp) - vx - * 28(sp) - unit_x - * 32(sp) - w - */ - - lw v1, 32(sp) - beqz v1, 1f - nop - - SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 - - lw s0, 44(sp) /* s0 = wt */ - lw s1, 48(sp) /* s1 = wb */ - lw s2, 52(sp) /* s2 = vx */ - lw s3, 56(sp) /* s3 = unit_x */ - li v0, BILINEAR_INTERPOLATION_RANGE - li s8, 0x00ff00ff - - sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) - sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) -0: - andi t4, s2, 0xffff /* t4 = (short)vx */ - srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ - subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ - - mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ - mul s5, s0, t4 /* s5 = wt*(vx>>8) */ - mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ - mul s7, s1, t4 /* s7 = wb*(vx>>8) */ - - sra t9, s2, 16 - sll t9, t9, 2 - addiu t8, t9, 4 - lwx t0, t9(a2) /* t0 = tl */ - lwx t1, t8(a2) /* t1 = tr */ - addiu v1, v1, -1 - lwx t2, t9(a3) /* t2 = bl */ - lwx t3, t8(a3) /* t3 = br */ - - BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 - lbu t1, 0(a1) /* t1 = mask */ - addiu a1, a1, 1 - MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4 - - addu s2, s2, s3 /* vx += unit_x; */ - sw t0, 0(a0) - bnez v1, 0b - addiu a0, a0, 4 - - RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 -1: - j ra - nop - -END(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips) - -LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips) -/* - * a0 - *dst - * a1 - *mask - * a2 - *src_top - * a3 - *src_bottom - * 16(sp) - wt - * 20(sp) - wb - * 24(sp) - vx - * 28(sp) - unit_x - * 32(sp) - w - */ - - lw v1, 32(sp) - beqz v1, 1f - nop - - SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 - - lw s0, 44(sp) /* s0 = wt */ - lw s1, 48(sp) /* s1 = wb */ - lw s2, 52(sp) /* s2 = vx */ - lw s3, 56(sp) /* s3 = unit_x */ - li v0, BILINEAR_INTERPOLATION_RANGE - li s8, 0x00ff00ff - - sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) - sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) -0: - andi t4, s2, 0xffff /* t4 = (short)vx */ - srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ - subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ - - mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ - mul s5, s0, t4 /* s5 = wt*(vx>>8) */ - mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ - mul s7, s1, t4 /* s7 = wb*(vx>>8) */ - - sra t9, s2, 16 - sll t9, t9, 2 - addiu t8, t9, 4 - lwx t0, t9(a2) /* t0 = tl */ - lwx t1, t8(a2) /* t1 = tr */ - addiu v1, v1, -1 - lwx t2, t9(a3) /* t2 = bl */ - lwx t3, t8(a3) /* t3 = br */ - - BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 - lbu t1, 0(a1) /* t1 = mask */ - addiu a1, a1, 1 - MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4 - CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 - - addu s2, s2, s3 /* vx += unit_x; */ - sh t1, 0(a0) - bnez v1, 0b - addiu a0, a0, 2 - - RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 -1: - j ra - nop - -END(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips) - -LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips) -/* - * a0 - *dst - * a1 - *mask - * a2 - *src_top - * a3 - *src_bottom - * 16(sp) - wt - * 20(sp) - wb - * 24(sp) - vx - * 28(sp) - unit_x - * 32(sp) - w - */ - - lw t0, 32(sp) - beqz t0, 1f - nop - - SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra - - lw s0, 48(sp) /* s0 = wt */ - lw s1, 52(sp) /* s1 = wb */ - lw s2, 56(sp) /* s2 = vx */ - lw s3, 60(sp) /* s3 = unit_x */ - lw ra, 64(sp) /* ra = w */ - li v0, 0x00ff00ff - li v1, 0x07e007e0 - li s8, 0x001f001f - - sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) - sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) -0: - andi t4, s2, 0xffff /* t4 = (short)vx */ - srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ - li t5, BILINEAR_INTERPOLATION_RANGE - subu t5, t5, t4 /* t5 = ( 256 - (vx>>8)) */ - - mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ - mul s5, s0, t4 /* s5 = wt*(vx>>8) */ - mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ - mul s7, s1, t4 /* s7 = wb*(vx>>8) */ - - sra t9, s2, 16 - sll t9, t9, 1 - addiu t8, t9, 2 - lhx t0, t9(a2) /* t0 = tl */ - lhx t1, t8(a2) /* t1 = tr */ - andi t1, t1, 0xffff - addiu ra, ra, -1 - lhx t2, t9(a3) /* t2 = bl */ - lhx t3, t8(a3) /* t3 = br */ - andi t3, t3, 0xffff - - CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 - CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 - BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 - lbu t1, 0(a1) /* t1 = mask */ - addiu a1, a1, 1 - MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4 - - addu s2, s2, s3 /* vx += unit_x; */ - sw t0, 0(a0) - bnez ra, 0b - addiu a0, a0, 4 - - RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra -1: - j ra - nop - -END(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips) - -LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips) -/* - * a0 - *dst - * a1 - *mask - * a2 - *src_top - * a3 - *src_bottom - * 16(sp) - wt - * 20(sp) - wb - * 24(sp) - vx - * 28(sp) - unit_x - * 32(sp) - w - */ - - lw t0, 32(sp) - beqz t0, 1f - nop - - SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra - - lw s0, 48(sp) /* s0 = wt */ - lw s1, 52(sp) /* s1 = wb */ - lw s2, 56(sp) /* s2 = vx */ - lw s3, 60(sp) /* s3 = unit_x */ - lw ra, 64(sp) /* ra = w */ - li v0, 0x00ff00ff - li v1, 0x07e007e0 - li s8, 0x001f001f - - sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) - sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) -0: - andi t4, s2, 0xffff /* t4 = (short)vx */ - srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ - li t5, BILINEAR_INTERPOLATION_RANGE - subu t5, t5, t4 /* t5 = ( 256 - (vx>>8)) */ - - mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ - mul s5, s0, t4 /* s5 = wt*(vx>>8) */ - mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ - mul s7, s1, t4 /* s7 = wb*(vx>>8) */ - - sra t9, s2, 16 - sll t9, t9, 1 - addiu t8, t9, 2 - lhx t0, t9(a2) /* t0 = tl */ - lhx t1, t8(a2) /* t1 = tr */ - andi t1, t1, 0xffff - addiu ra, ra, -1 - lhx t2, t9(a3) /* t2 = bl */ - lhx t3, t8(a3) /* t3 = br */ - andi t3, t3, 0xffff - - CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 - CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 - BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 - lbu t1, 0(a1) /* t1 = mask */ - addiu a1, a1, 1 - MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4 - CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 - - addu s2, s2, s3 /* vx += unit_x; */ - sh t1, 0(a0) - bnez ra, 0b - addiu a0, a0, 2 - - RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra -1: - j ra - nop - -END(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips) - -LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips) -/* - * a0 - dst (a8r8g8b8) - * a1 - mask (a8) - * a2 - src_top (a8r8g8b8) - * a3 - src_bottom (a8r8g8b8) - * 16(sp) - wt - * 20(sp) - wb - * 24(sp) - vx - * 28(sp) - unit_x - * 32(sp) - w - */ - - SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 - - lw v1, 60(sp) /* v1 = w(sp + 32 + 28 save regs stack offset)*/ - beqz v1, 1f - nop - - lw s0, 44(sp) /* s0 = wt */ - lw s1, 48(sp) /* s1 = wb */ - lw s2, 52(sp) /* s2 = vx */ - lw s3, 56(sp) /* s3 = unit_x */ - li v0, BILINEAR_INTERPOLATION_RANGE - li s8, 0x00ff00ff - - sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) - sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) - -0: - andi t4, s2, 0xffff /* t4 = (short)vx */ - srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ - subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ - - mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ - mul s5, s0, t4 /* s5 = wt*(vx>>8) */ - mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ - mul s7, s1, t4 /* s7 = wb*(vx>>8) */ - - sra t9, s2, 16 - sll t9, t9, 2 - addiu t8, t9, 4 - lwx t0, t9(a2) /* t0 = tl */ - lwx t1, t8(a2) /* t1 = tr */ - addiu v1, v1, -1 - lwx t2, t9(a3) /* t2 = bl */ - lwx t3, t8(a3) /* t3 = br */ - - BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, \ - t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 - lbu t1, 0(a1) /* t1 = mask */ - lw t2, 0(a0) /* t2 = dst */ - addiu a1, a1, 1 - OVER_8888_8_8888 t0, t1, t2, t0, s8, t3, t4, t5, t6 - - addu s2, s2, s3 /* vx += unit_x; */ - sw t0, 0(a0) - bnez v1, 0b - addiu a0, a0, 4 - -1: - RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 - j ra - nop - -END(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips) - -LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips) -/* - * a0 - *dst - * a1 - *mask - * a2 - *src_top - * a3 - *src_bottom - * 16(sp) - wt - * 20(sp) - wb - * 24(sp) - vx - * 28(sp) - unit_x - * 32(sp) - w - */ - - lw v1, 32(sp) - beqz v1, 1f - nop - - SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 - - lw s0, 44(sp) /* s0 = wt */ - lw s1, 48(sp) /* s1 = wb */ - lw s2, 52(sp) /* s2 = vx */ - lw s3, 56(sp) /* s3 = unit_x */ - li v0, BILINEAR_INTERPOLATION_RANGE - li s8, 0x00ff00ff - - sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) - sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) -0: - andi t4, s2, 0xffff /* t4 = (short)vx */ - srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ - subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ - - mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ - mul s5, s0, t4 /* s5 = wt*(vx>>8) */ - mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ - mul s7, s1, t4 /* s7 = wb*(vx>>8) */ - - sra t9, s2, 16 - sll t9, t9, 2 - addiu t8, t9, 4 - lwx t0, t9(a2) /* t0 = tl */ - lwx t1, t8(a2) /* t1 = tr */ - addiu v1, v1, -1 - lwx t2, t9(a3) /* t2 = bl */ - lwx t3, t8(a3) /* t3 = br */ - - BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 - lbu t1, 0(a1) /* t1 = mask */ - lw t2, 0(a0) /* t2 = dst */ - addiu a1, a1, 1 - MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t0, s8, t3, t4, t5 - - addu s2, s2, s3 /* vx += unit_x; */ - sw t0, 0(a0) - bnez v1, 0b - addiu a0, a0, 4 - - RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 -1: - j ra - nop - -END(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips) diff --git a/vendor/pixman/pixman/pixman-mips-dspr2-asm.h b/vendor/pixman/pixman/pixman-mips-dspr2-asm.h deleted file mode 100644 index e23856619..000000000 --- a/vendor/pixman/pixman/pixman-mips-dspr2-asm.h +++ /dev/null @@ -1,711 +0,0 @@ -/* - * Copyright (c) 2012 - * MIPS Technologies, Inc., California. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Author: Nemanja Lukic (nemanja.lukic@rt-rk.com) - */ - -#ifndef PIXMAN_MIPS_DSPR2_ASM_H -#define PIXMAN_MIPS_DSPR2_ASM_H - -#define zero $0 -#define AT $1 -#define v0 $2 -#define v1 $3 -#define a0 $4 -#define a1 $5 -#define a2 $6 -#define a3 $7 -#define t0 $8 -#define t1 $9 -#define t2 $10 -#define t3 $11 -#define t4 $12 -#define t5 $13 -#define t6 $14 -#define t7 $15 -#define s0 $16 -#define s1 $17 -#define s2 $18 -#define s3 $19 -#define s4 $20 -#define s5 $21 -#define s6 $22 -#define s7 $23 -#define t8 $24 -#define t9 $25 -#define k0 $26 -#define k1 $27 -#define gp $28 -#define sp $29 -#define fp $30 -#define s8 $30 -#define ra $31 - -/* - * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2 - */ -#define LEAF_MIPS32R2(symbol) \ - .globl symbol; \ - .align 2; \ - .hidden symbol; \ - .type symbol, @function; \ - .ent symbol, 0; \ -symbol: .frame sp, 0, ra; \ - .set push; \ - .set arch=mips32r2; \ - .set noreorder; \ - .set noat; - -/* - * LEAF_MIPS32R2 - declare leaf routine for MIPS DSPr2 - */ -#define LEAF_MIPS_DSPR2(symbol) \ -LEAF_MIPS32R2(symbol) \ - .set dspr2; - -/* - * END - mark end of function - */ -#define END(function) \ - .set pop; \ - .end function; \ - .size function,.-function - -/* - * Checks if stack offset is big enough for storing/restoring regs_num - * number of register to/from stack. Stack offset must be greater than - * or equal to the number of bytes needed for storing registers (regs_num*4). - * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is - * preserved for input arguments of the functions, already stored in a0-a3), - * stack size can be further optimized by utilizing this space. - */ -.macro CHECK_STACK_OFFSET regs_num, stack_offset -.if \stack_offset < \regs_num * 4 - 16 -.error "Stack offset too small." -.endif -.endm - -/* - * Saves set of registers on stack. Maximum number of registers that - * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). - * Stack offset is number of bytes that are added to stack pointer (sp) - * before registers are pushed in order to provide enough space on stack - * (offset must be multiple of 4, and must be big enough, as described by - * CHECK_STACK_OFFSET macro). This macro is intended to be used in - * combination with RESTORE_REGS_FROM_STACK macro. Example: - * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 - * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 - */ -.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \ - r2 = 0, r3 = 0, r4 = 0, \ - r5 = 0, r6 = 0, r7 = 0, \ - r8 = 0, r9 = 0, r10 = 0, \ - r11 = 0, r12 = 0, r13 = 0, \ - r14 = 0 - .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4) - .error "Stack offset must be pozitive and multiple of 4." - .endif - .if \stack_offset != 0 - addiu sp, sp, -\stack_offset - .endif - sw \r1, 0(sp) - .if \r2 != 0 - sw \r2, 4(sp) - .endif - .if \r3 != 0 - sw \r3, 8(sp) - .endif - .if \r4 != 0 - sw \r4, 12(sp) - .endif - .if \r5 != 0 - CHECK_STACK_OFFSET 5, \stack_offset - sw \r5, 16(sp) - .endif - .if \r6 != 0 - CHECK_STACK_OFFSET 6, \stack_offset - sw \r6, 20(sp) - .endif - .if \r7 != 0 - CHECK_STACK_OFFSET 7, \stack_offset - sw \r7, 24(sp) - .endif - .if \r8 != 0 - CHECK_STACK_OFFSET 8, \stack_offset - sw \r8, 28(sp) - .endif - .if \r9 != 0 - CHECK_STACK_OFFSET 9, \stack_offset - sw \r9, 32(sp) - .endif - .if \r10 != 0 - CHECK_STACK_OFFSET 10, \stack_offset - sw \r10, 36(sp) - .endif - .if \r11 != 0 - CHECK_STACK_OFFSET 11, \stack_offset - sw \r11, 40(sp) - .endif - .if \r12 != 0 - CHECK_STACK_OFFSET 12, \stack_offset - sw \r12, 44(sp) - .endif - .if \r13 != 0 - CHECK_STACK_OFFSET 13, \stack_offset - sw \r13, 48(sp) - .endif - .if \r14 != 0 - CHECK_STACK_OFFSET 14, \stack_offset - sw \r14, 52(sp) - .endif -.endm - -/* - * Restores set of registers from stack. Maximum number of registers that - * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). - * Stack offset is number of bytes that are added to stack pointer (sp) - * after registers are restored (offset must be multiple of 4, and must - * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is - * intended to be used in combination with RESTORE_REGS_FROM_STACK macro. - * Example: - * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 - * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 - */ -.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \ - r2 = 0, r3 = 0, r4 = 0, \ - r5 = 0, r6 = 0, r7 = 0, \ - r8 = 0, r9 = 0, r10 = 0, \ - r11 = 0, r12 = 0, r13 = 0, \ - r14 = 0 - .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4) - .error "Stack offset must be pozitive and multiple of 4." - .endif - lw \r1, 0(sp) - .if \r2 != 0 - lw \r2, 4(sp) - .endif - .if \r3 != 0 - lw \r3, 8(sp) - .endif - .if \r4 != 0 - lw \r4, 12(sp) - .endif - .if \r5 != 0 - CHECK_STACK_OFFSET 5, \stack_offset - lw \r5, 16(sp) - .endif - .if \r6 != 0 - CHECK_STACK_OFFSET 6, \stack_offset - lw \r6, 20(sp) - .endif - .if \r7 != 0 - CHECK_STACK_OFFSET 7, \stack_offset - lw \r7, 24(sp) - .endif - .if \r8 != 0 - CHECK_STACK_OFFSET 8, \stack_offset - lw \r8, 28(sp) - .endif - .if \r9 != 0 - CHECK_STACK_OFFSET 9, \stack_offset - lw \r9, 32(sp) - .endif - .if \r10 != 0 - CHECK_STACK_OFFSET 10, \stack_offset - lw \r10, 36(sp) - .endif - .if \r11 != 0 - CHECK_STACK_OFFSET 11, \stack_offset - lw \r11, 40(sp) - .endif - .if \r12 != 0 - CHECK_STACK_OFFSET 12, \stack_offset - lw \r12, 44(sp) - .endif - .if \r13 != 0 - CHECK_STACK_OFFSET 13, \stack_offset - lw \r13, 48(sp) - .endif - .if \r14 != 0 - CHECK_STACK_OFFSET 14, \stack_offset - lw \r14, 52(sp) - .endif - .if \stack_offset != 0 - addiu sp, sp, \stack_offset - .endif -.endm - -/* - * Conversion of single r5g6b5 pixel (in_565) to single a8r8g8b8 pixel - * returned in (out_8888) register. Requires two temporary registers - * (scratch1 and scratch2). - */ -.macro CONVERT_1x0565_TO_1x8888 in_565, \ - out_8888, \ - scratch1, scratch2 - lui \out_8888, 0xff00 - sll \scratch1, \in_565, 0x3 - andi \scratch2, \scratch1, 0xff - ext \scratch1, \in_565, 0x2, 0x3 - or \scratch1, \scratch2, \scratch1 - or \out_8888, \out_8888, \scratch1 - - sll \scratch1, \in_565, 0x5 - andi \scratch1, \scratch1, 0xfc00 - srl \scratch2, \in_565, 0x1 - andi \scratch2, \scratch2, 0x300 - or \scratch2, \scratch1, \scratch2 - or \out_8888, \out_8888, \scratch2 - - andi \scratch1, \in_565, 0xf800 - srl \scratch2, \scratch1, 0x5 - andi \scratch2, \scratch2, 0xff00 - or \scratch1, \scratch1, \scratch2 - sll \scratch1, \scratch1, 0x8 - or \out_8888, \out_8888, \scratch1 -.endm - -/* - * Conversion of two r5g6b5 pixels (in1_565 and in2_565) to two a8r8g8b8 pixels - * returned in (out1_8888 and out2_8888) registers. Requires four scratch - * registers (scratch1 ... scratch4). It also requires maskG and maskB for - * color component extractions. These masks must have following values: - * li maskG, 0x07e007e0 - * li maskB, 0x001F001F - */ -.macro CONVERT_2x0565_TO_2x8888 in1_565, in2_565, \ - out1_8888, out2_8888, \ - maskG, maskB, \ - scratch1, scratch2, scratch3, scratch4 - sll \scratch1, \in1_565, 16 - or \scratch1, \scratch1, \in2_565 - lui \out2_8888, 0xff00 - ori \out2_8888, \out2_8888, 0xff00 - shrl.ph \scratch2, \scratch1, 11 - and \scratch3, \scratch1, \maskG - shra.ph \scratch4, \scratch2, 2 - shll.ph \scratch2, \scratch2, 3 - shll.ph \scratch3, \scratch3, 5 - or \scratch2, \scratch2, \scratch4 - shrl.qb \scratch4, \scratch3, 6 - or \out2_8888, \out2_8888, \scratch2 - or \scratch3, \scratch3, \scratch4 - and \scratch1, \scratch1, \maskB - shll.ph \scratch2, \scratch1, 3 - shra.ph \scratch4, \scratch1, 2 - or \scratch2, \scratch2, \scratch4 - or \scratch3, \scratch2, \scratch3 - precrq.ph.w \out1_8888, \out2_8888, \scratch3 - precr_sra.ph.w \out2_8888, \scratch3, 0 -.endm - -/* - * Conversion of single a8r8g8b8 pixel (in_8888) to single r5g6b5 pixel - * returned in (out_565) register. Requires two temporary registers - * (scratch1 and scratch2). - */ -.macro CONVERT_1x8888_TO_1x0565 in_8888, \ - out_565, \ - scratch1, scratch2 - ext \out_565, \in_8888, 0x3, 0x5 - srl \scratch1, \in_8888, 0x5 - andi \scratch1, \scratch1, 0x07e0 - srl \scratch2, \in_8888, 0x8 - andi \scratch2, \scratch2, 0xf800 - or \out_565, \out_565, \scratch1 - or \out_565, \out_565, \scratch2 -.endm - -/* - * Conversion of two a8r8g8b8 pixels (in1_8888 and in2_8888) to two r5g6b5 - * pixels returned in (out1_565 and out2_565) registers. Requires two temporary - * registers (scratch1 and scratch2). It also requires maskR, maskG and maskB - * for color component extractions. These masks must have following values: - * li maskR, 0xf800f800 - * li maskG, 0x07e007e0 - * li maskB, 0x001F001F - * Value of input register in2_8888 is lost. - */ -.macro CONVERT_2x8888_TO_2x0565 in1_8888, in2_8888, \ - out1_565, out2_565, \ - maskR, maskG, maskB, \ - scratch1, scratch2 - precr.qb.ph \scratch1, \in2_8888, \in1_8888 - precrq.qb.ph \in2_8888, \in2_8888, \in1_8888 - and \out1_565, \scratch1, \maskR - shrl.ph \scratch1, \scratch1, 3 - shll.ph \in2_8888, \in2_8888, 3 - and \scratch1, \scratch1, \maskB - or \out1_565, \out1_565, \scratch1 - and \in2_8888, \in2_8888, \maskG - or \out1_565, \out1_565, \in2_8888 - srl \out2_565, \out1_565, 16 -.endm - -/* - * Multiply pixel (a8) with single pixel (a8r8g8b8). It requires maskLSR needed - * for rounding process. maskLSR must have following value: - * li maskLSR, 0x00ff00ff - */ -.macro MIPS_UN8x4_MUL_UN8 s_8888, \ - m_8, \ - d_8888, \ - maskLSR, \ - scratch1, scratch2, scratch3 - replv.ph \m_8, \m_8 /* 0 | M | 0 | M */ - muleu_s.ph.qbl \scratch1, \s_8888, \m_8 /* A*M | R*M */ - muleu_s.ph.qbr \scratch2, \s_8888, \m_8 /* G*M | B*M */ - shra_r.ph \scratch3, \scratch1, 8 - shra_r.ph \d_8888, \scratch2, 8 - and \scratch3, \scratch3, \maskLSR /* 0 |A*M| 0 |R*M */ - and \d_8888, \d_8888, \maskLSR /* 0 |G*M| 0 |B*M */ - addq.ph \scratch1, \scratch1, \scratch3 /* A*M+A*M | R*M+R*M */ - addq.ph \scratch2, \scratch2, \d_8888 /* G*M+G*M | B*M+B*M */ - shra_r.ph \scratch1, \scratch1, 8 - shra_r.ph \scratch2, \scratch2, 8 - precr.qb.ph \d_8888, \scratch1, \scratch2 -.endm - -/* - * Multiply two pixels (a8) with two pixels (a8r8g8b8). It requires maskLSR - * needed for rounding process. maskLSR must have following value: - * li maskLSR, 0x00ff00ff - */ -.macro MIPS_2xUN8x4_MUL_2xUN8 s1_8888, \ - s2_8888, \ - m1_8, \ - m2_8, \ - d1_8888, \ - d2_8888, \ - maskLSR, \ - scratch1, scratch2, scratch3, \ - scratch4, scratch5, scratch6 - replv.ph \m1_8, \m1_8 /* 0 | M1 | 0 | M1 */ - replv.ph \m2_8, \m2_8 /* 0 | M2 | 0 | M2 */ - muleu_s.ph.qbl \scratch1, \s1_8888, \m1_8 /* A1*M1 | R1*M1 */ - muleu_s.ph.qbr \scratch2, \s1_8888, \m1_8 /* G1*M1 | B1*M1 */ - muleu_s.ph.qbl \scratch3, \s2_8888, \m2_8 /* A2*M2 | R2*M2 */ - muleu_s.ph.qbr \scratch4, \s2_8888, \m2_8 /* G2*M2 | B2*M2 */ - shra_r.ph \scratch5, \scratch1, 8 - shra_r.ph \d1_8888, \scratch2, 8 - shra_r.ph \scratch6, \scratch3, 8 - shra_r.ph \d2_8888, \scratch4, 8 - and \scratch5, \scratch5, \maskLSR /* 0 |A1*M1| 0 |R1*M1 */ - and \d1_8888, \d1_8888, \maskLSR /* 0 |G1*M1| 0 |B1*M1 */ - and \scratch6, \scratch6, \maskLSR /* 0 |A2*M2| 0 |R2*M2 */ - and \d2_8888, \d2_8888, \maskLSR /* 0 |G2*M2| 0 |B2*M2 */ - addq.ph \scratch1, \scratch1, \scratch5 - addq.ph \scratch2, \scratch2, \d1_8888 - addq.ph \scratch3, \scratch3, \scratch6 - addq.ph \scratch4, \scratch4, \d2_8888 - shra_r.ph \scratch1, \scratch1, 8 - shra_r.ph \scratch2, \scratch2, 8 - shra_r.ph \scratch3, \scratch3, 8 - shra_r.ph \scratch4, \scratch4, 8 - precr.qb.ph \d1_8888, \scratch1, \scratch2 - precr.qb.ph \d2_8888, \scratch3, \scratch4 -.endm - -/* - * Multiply pixel (a8r8g8b8) with single pixel (a8r8g8b8). It requires maskLSR - * needed for rounding process. maskLSR must have following value: - * li maskLSR, 0x00ff00ff - */ -.macro MIPS_UN8x4_MUL_UN8x4 s_8888, \ - m_8888, \ - d_8888, \ - maskLSR, \ - scratch1, scratch2, scratch3, scratch4 - preceu.ph.qbl \scratch1, \m_8888 /* 0 | A | 0 | R */ - preceu.ph.qbr \scratch2, \m_8888 /* 0 | G | 0 | B */ - muleu_s.ph.qbl \scratch3, \s_8888, \scratch1 /* A*A | R*R */ - muleu_s.ph.qbr \scratch4, \s_8888, \scratch2 /* G*G | B*B */ - shra_r.ph \scratch1, \scratch3, 8 - shra_r.ph \scratch2, \scratch4, 8 - and \scratch1, \scratch1, \maskLSR /* 0 |A*A| 0 |R*R */ - and \scratch2, \scratch2, \maskLSR /* 0 |G*G| 0 |B*B */ - addq.ph \scratch1, \scratch1, \scratch3 - addq.ph \scratch2, \scratch2, \scratch4 - shra_r.ph \scratch1, \scratch1, 8 - shra_r.ph \scratch2, \scratch2, 8 - precr.qb.ph \d_8888, \scratch1, \scratch2 -.endm - -/* - * Multiply two pixels (a8r8g8b8) with two pixels (a8r8g8b8). It requires - * maskLSR needed for rounding process. maskLSR must have following value: - * li maskLSR, 0x00ff00ff - */ - -.macro MIPS_2xUN8x4_MUL_2xUN8x4 s1_8888, \ - s2_8888, \ - m1_8888, \ - m2_8888, \ - d1_8888, \ - d2_8888, \ - maskLSR, \ - scratch1, scratch2, scratch3, \ - scratch4, scratch5, scratch6 - preceu.ph.qbl \scratch1, \m1_8888 /* 0 | A | 0 | R */ - preceu.ph.qbr \scratch2, \m1_8888 /* 0 | G | 0 | B */ - preceu.ph.qbl \scratch3, \m2_8888 /* 0 | A | 0 | R */ - preceu.ph.qbr \scratch4, \m2_8888 /* 0 | G | 0 | B */ - muleu_s.ph.qbl \scratch5, \s1_8888, \scratch1 /* A*A | R*R */ - muleu_s.ph.qbr \scratch6, \s1_8888, \scratch2 /* G*G | B*B */ - muleu_s.ph.qbl \scratch1, \s2_8888, \scratch3 /* A*A | R*R */ - muleu_s.ph.qbr \scratch2, \s2_8888, \scratch4 /* G*G | B*B */ - shra_r.ph \scratch3, \scratch5, 8 - shra_r.ph \scratch4, \scratch6, 8 - shra_r.ph \d1_8888, \scratch1, 8 - shra_r.ph \d2_8888, \scratch2, 8 - and \scratch3, \scratch3, \maskLSR /* 0 |A*A| 0 |R*R */ - and \scratch4, \scratch4, \maskLSR /* 0 |G*G| 0 |B*B */ - and \d1_8888, \d1_8888, \maskLSR /* 0 |A*A| 0 |R*R */ - and \d2_8888, \d2_8888, \maskLSR /* 0 |G*G| 0 |B*B */ - addq.ph \scratch3, \scratch3, \scratch5 - addq.ph \scratch4, \scratch4, \scratch6 - addq.ph \d1_8888, \d1_8888, \scratch1 - addq.ph \d2_8888, \d2_8888, \scratch2 - shra_r.ph \scratch3, \scratch3, 8 - shra_r.ph \scratch4, \scratch4, 8 - shra_r.ph \scratch5, \d1_8888, 8 - shra_r.ph \scratch6, \d2_8888, 8 - precr.qb.ph \d1_8888, \scratch3, \scratch4 - precr.qb.ph \d2_8888, \scratch5, \scratch6 -.endm - -/* - * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8 - * destination pixel (d_8888) using a8 mask (m_8). It also requires maskLSR - * needed for rounding process. maskLSR must have following value: - * li maskLSR, 0x00ff00ff - */ -.macro OVER_8888_8_8888 s_8888, \ - m_8, \ - d_8888, \ - out_8888, \ - maskLSR, \ - scratch1, scratch2, scratch3, scratch4 - MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \ - \scratch1, \maskLSR, \ - \scratch2, \scratch3, \scratch4 - - not \scratch2, \scratch1 - srl \scratch2, \scratch2, 24 - - MIPS_UN8x4_MUL_UN8 \d_8888, \scratch2, \ - \d_8888, \maskLSR, \ - \scratch3, \scratch4, \out_8888 - - addu_s.qb \out_8888, \d_8888, \scratch1 -.endm - -/* - * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two - * a8r8g8b8 destination pixels (d1_8888 and d2_8888) using a8 masks (m1_8 and - * m2_8). It also requires maskLSR needed for rounding process. maskLSR must - * have following value: - * li maskLSR, 0x00ff00ff - */ -.macro OVER_2x8888_2x8_2x8888 s1_8888, \ - s2_8888, \ - m1_8, \ - m2_8, \ - d1_8888, \ - d2_8888, \ - out1_8888, \ - out2_8888, \ - maskLSR, \ - scratch1, scratch2, scratch3, \ - scratch4, scratch5, scratch6 - MIPS_2xUN8x4_MUL_2xUN8 \s1_8888, \s2_8888, \ - \m1_8, \m2_8, \ - \scratch1, \scratch2, \ - \maskLSR, \ - \scratch3, \scratch4, \out1_8888, \ - \out2_8888, \scratch5, \scratch6 - - not \scratch3, \scratch1 - srl \scratch3, \scratch3, 24 - not \scratch4, \scratch2 - srl \scratch4, \scratch4, 24 - - MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \ - \scratch3, \scratch4, \ - \d1_8888, \d2_8888, \ - \maskLSR, \ - \scratch5, \scratch6, \out1_8888, \ - \out2_8888, \scratch3, \scratch4 - - addu_s.qb \out1_8888, \d1_8888, \scratch1 - addu_s.qb \out2_8888, \d2_8888, \scratch2 -.endm - -/* - * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8 - * destination pixel (d_8888). It also requires maskLSR needed for rounding - * process. maskLSR must have following value: - * li maskLSR, 0x00ff00ff - */ -.macro OVER_8888_8888 s_8888, \ - d_8888, \ - out_8888, \ - maskLSR, \ - scratch1, scratch2, scratch3, scratch4 - not \scratch1, \s_8888 - srl \scratch1, \scratch1, 24 - - MIPS_UN8x4_MUL_UN8 \d_8888, \scratch1, \ - \out_8888, \maskLSR, \ - \scratch2, \scratch3, \scratch4 - - addu_s.qb \out_8888, \out_8888, \s_8888 -.endm - -/* - * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two - * a8r8g8b8 destination pixels (d1_8888 and d2_8888). It also requires maskLSR - * needed for rounding process. maskLSR must have following value: - * li maskLSR, 0x00ff00ff - */ -.macro OVER_2x8888_2x8888 s1_8888, \ - s2_8888, \ - d1_8888, \ - d2_8888, \ - out1_8888, \ - out2_8888, \ - maskLSR, \ - scratch1, scratch2, scratch3, \ - scratch4, scratch5, scratch6 - not \scratch1, \s1_8888 - srl \scratch1, \scratch1, 24 - not \scratch2, \s2_8888 - srl \scratch2, \scratch2, 24 - MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \ - \scratch1, \scratch2, \ - \out1_8888, \out2_8888, \ - \maskLSR, \ - \scratch3, \scratch4, \scratch5, \ - \scratch6, \d1_8888, \d2_8888 - - addu_s.qb \out1_8888, \out1_8888, \s1_8888 - addu_s.qb \out2_8888, \out2_8888, \s2_8888 -.endm - -.macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \ - m_8, \ - d_8888, \ - out_8888, \ - maskLSR, \ - scratch1, scratch2, scratch3 - MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \ - \out_8888, \maskLSR, \ - \scratch1, \scratch2, \scratch3 - - addu_s.qb \out_8888, \out_8888, \d_8888 -.endm - -.macro MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 s1_8888, \ - s2_8888, \ - m1_8, \ - m2_8, \ - d1_8888, \ - d2_8888, \ - out1_8888, \ - out2_8888, \ - maskLSR, \ - scratch1, scratch2, scratch3, \ - scratch4, scratch5, scratch6 - MIPS_2xUN8x4_MUL_2xUN8 \s1_8888, \s2_8888, \ - \m1_8, \m2_8, \ - \out1_8888, \out2_8888, \ - \maskLSR, \ - \scratch1, \scratch2, \scratch3, \ - \scratch4, \scratch5, \scratch6 - - addu_s.qb \out1_8888, \out1_8888, \d1_8888 - addu_s.qb \out2_8888, \out2_8888, \d2_8888 -.endm - -.macro BILINEAR_INTERPOLATE_SINGLE_PIXEL tl, tr, bl, br, \ - scratch1, scratch2, \ - alpha, red, green, blue \ - wt1, wt2, wb1, wb2 - andi \scratch1, \tl, 0xff - andi \scratch2, \tr, 0xff - andi \alpha, \bl, 0xff - andi \red, \br, 0xff - - multu $ac0, \wt1, \scratch1 - maddu $ac0, \wt2, \scratch2 - maddu $ac0, \wb1, \alpha - maddu $ac0, \wb2, \red - - ext \scratch1, \tl, 8, 8 - ext \scratch2, \tr, 8, 8 - ext \alpha, \bl, 8, 8 - ext \red, \br, 8, 8 - - multu $ac1, \wt1, \scratch1 - maddu $ac1, \wt2, \scratch2 - maddu $ac1, \wb1, \alpha - maddu $ac1, \wb2, \red - - ext \scratch1, \tl, 16, 8 - ext \scratch2, \tr, 16, 8 - ext \alpha, \bl, 16, 8 - ext \red, \br, 16, 8 - - mflo \blue, $ac0 - - multu $ac2, \wt1, \scratch1 - maddu $ac2, \wt2, \scratch2 - maddu $ac2, \wb1, \alpha - maddu $ac2, \wb2, \red - - ext \scratch1, \tl, 24, 8 - ext \scratch2, \tr, 24, 8 - ext \alpha, \bl, 24, 8 - ext \red, \br, 24, 8 - - mflo \green, $ac1 - - multu $ac3, \wt1, \scratch1 - maddu $ac3, \wt2, \scratch2 - maddu $ac3, \wb1, \alpha - maddu $ac3, \wb2, \red - - mflo \red, $ac2 - mflo \alpha, $ac3 - - precr.qb.ph \alpha, \alpha, \red - precr.qb.ph \scratch1, \green, \blue - precrq.qb.ph \tl, \alpha, \scratch1 -.endm - -#endif //PIXMAN_MIPS_DSPR2_ASM_H diff --git a/vendor/pixman/pixman/pixman-mips-dspr2.c b/vendor/pixman/pixman/pixman-mips-dspr2.c deleted file mode 100644 index c43eb1e89..000000000 --- a/vendor/pixman/pixman/pixman-mips-dspr2.c +++ /dev/null @@ -1,459 +0,0 @@ -/* - * Copyright (c) 2012 - * MIPS Technologies, Inc., California. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Author: Nemanja Lukic (nemanja.lukic@rt-rk.com) - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "pixman-private.h" -#include "pixman-mips-dspr2.h" - -PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_x888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_8888_0565, - uint32_t, 1, uint16_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0565_8888, - uint16_t, 1, uint32_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0565_0565, - uint16_t, 1, uint16_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888, - uint8_t, 3, uint8_t, 3) -#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_8888_rev, - uint8_t, 3, uint32_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_0565_rev, - uint8_t, 3, uint16_t, 1) -#endif -PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_pixbuf_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_rpixbuf_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_0565, - uint32_t, 1, uint16_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_0565, - uint8_t, 1, uint16_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_8888, - uint8_t, 1, uint32_t, 1) - -PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8888, - uint8_t, 1, uint32_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca, - uint32_t, 1, uint32_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca, - uint32_t, 1, uint16_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888, - uint8_t, 1, uint32_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565, - uint8_t, 1, uint16_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, add_n_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, add_n_8_8888, - uint8_t, 1, uint32_t, 1) - -PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_8888_n_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_8888_n_0565, - uint32_t, 1, uint16_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_0565_n_0565, - uint16_t, 1, uint16_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, add_8888_n_8888, - uint32_t, 1, uint32_t, 1) - -PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_0565, - uint16_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_8888, - uint32_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_reverse_n_8888, - uint32_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_N_DST (0, in_n_8, - uint8_t, 1) - -PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8_8_8, uint8_t, 1, - uint8_t, 1, uint8_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8_8888, uint32_t, 1, - uint8_t, 1, uint32_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8888_8888, uint32_t, 1, - uint32_t, 1, uint32_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_0565_8_0565, uint16_t, 1, - uint8_t, 1, uint16_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_8888, uint32_t, 1, - uint8_t, 1, uint32_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_0565, uint32_t, 1, - uint8_t, 1, uint16_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_0565_8_0565, uint16_t, 1, - uint8_t, 1, uint16_t, 1) -PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8888_8888, uint32_t, 1, - uint32_t, 1, uint32_t, 1) - -PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_8888, OVER, - uint32_t, uint32_t) -PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_0565, OVER, - uint32_t, uint16_t) -PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (0565_8888, SRC, - uint16_t, uint32_t) - -PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC, - uint32_t, uint32_t) -PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC, - uint32_t, uint16_t) -PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_8888, SRC, - uint16_t, uint32_t) -PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_0565, SRC, - uint16_t, uint16_t) -PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, OVER, - uint32_t, uint32_t) -PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, ADD, - uint32_t, uint32_t) - -PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_0565, - OVER, uint32_t, uint16_t) -PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, 0565_8_0565, - OVER, uint16_t, uint16_t) - -PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_8888, SRC, - uint32_t, uint32_t) -PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_0565, SRC, - uint32_t, uint16_t) -PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 0565_8_x888, SRC, - uint16_t, uint32_t) -PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 0565_8_0565, SRC, - uint16_t, uint16_t) -PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_8888, OVER, - uint32_t, uint32_t) -PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_8888, ADD, - uint32_t, uint32_t) - -static pixman_bool_t -mips_dspr2_fill (pixman_implementation_t *imp, - uint32_t * bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t _xor) -{ - uint8_t *byte_line; - uint32_t byte_width; - switch (bpp) - { - case 16: - stride = stride * (int) sizeof (uint32_t) / 2; - byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); - byte_width = width * 2; - stride *= 2; - - while (height--) - { - uint8_t *dst = byte_line; - byte_line += stride; - pixman_fill_buff16_mips (dst, byte_width, _xor & 0xffff); - } - return TRUE; - case 32: - stride = stride * (int) sizeof (uint32_t) / 4; - byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); - byte_width = width * 4; - stride *= 4; - - while (height--) - { - uint8_t *dst = byte_line; - byte_line += stride; - pixman_fill_buff32_mips (dst, byte_width, _xor); - } - return TRUE; - default: - return FALSE; - } -} - -static pixman_bool_t -mips_dspr2_blt (pixman_implementation_t *imp, - uint32_t * src_bits, - uint32_t * dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dest_x, - int dest_y, - int width, - int height) -{ - if (src_bpp != dst_bpp) - return FALSE; - - uint8_t *src_bytes; - uint8_t *dst_bytes; - uint32_t byte_width; - - switch (src_bpp) - { - case 16: - src_stride = src_stride * (int) sizeof (uint32_t) / 2; - dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; - src_bytes =(uint8_t *)(((uint16_t *)src_bits) - + src_stride * (src_y) + (src_x)); - dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) - + dst_stride * (dest_y) + (dest_x)); - byte_width = width * 2; - src_stride *= 2; - dst_stride *= 2; - - while (height--) - { - uint8_t *src = src_bytes; - uint8_t *dst = dst_bytes; - src_bytes += src_stride; - dst_bytes += dst_stride; - pixman_mips_fast_memcpy (dst, src, byte_width); - } - return TRUE; - case 32: - src_stride = src_stride * (int) sizeof (uint32_t) / 4; - dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; - src_bytes = (uint8_t *)(((uint32_t *)src_bits) - + src_stride * (src_y) + (src_x)); - dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) - + dst_stride * (dest_y) + (dest_x)); - byte_width = width * 4; - src_stride *= 4; - dst_stride *= 4; - - while (height--) - { - uint8_t *src = src_bytes; - uint8_t *dst = dst_bytes; - src_bytes += src_stride; - dst_bytes += dst_stride; - pixman_mips_fast_memcpy (dst, src, byte_width); - } - return TRUE; - default: - return FALSE; - } -} - -static const pixman_fast_path_t mips_dspr2_fast_paths[] = -{ - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, mips_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, mips_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, mips_composite_src_8888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, mips_composite_src_8888_0565), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, mips_composite_src_8888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, mips_composite_src_8888_0565), - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, mips_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, mips_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, mips_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, mips_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, mips_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, mips_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, mips_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, mips_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, mips_composite_src_0888_0888), -#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, mips_composite_src_0888_8888_rev), - PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, mips_composite_src_0888_0565_rev), -#endif - PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, mips_composite_src_pixbuf_8888), - PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, mips_composite_src_rpixbuf_8888), - PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, mips_composite_src_rpixbuf_8888), - PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, mips_composite_src_pixbuf_8888), - PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, mips_composite_src_n_8_8888), - PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, mips_composite_src_n_8_8888), - PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, mips_composite_src_n_8_8888), - PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, mips_composite_src_n_8_8888), - PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8, mips_composite_src_n_8_8), - - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, mips_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, mips_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, mips_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, mips_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, mips_composite_over_n_8888_0565_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, mips_composite_over_n_8888_0565_ca), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, mips_composite_over_n_8_8), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, mips_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, mips_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, mips_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, mips_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mips_composite_over_n_8_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, mips_composite_over_n_8_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, mips_composite_over_n_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, mips_composite_over_n_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, mips_composite_over_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, mips_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, mips_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, mips_composite_over_8888_n_0565), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, mips_composite_over_8888_n_0565), - PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, mips_composite_over_0565_n_0565), - PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, mips_composite_over_0565_n_0565), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, mips_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, mips_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, mips_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, mips_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, r5g6b5, mips_composite_over_8888_8_0565), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, b5g6r5, mips_composite_over_8888_8_0565), - PIXMAN_STD_FAST_PATH (OVER, r5g6b5, a8, r5g6b5, mips_composite_over_0565_8_0565), - PIXMAN_STD_FAST_PATH (OVER, b5g6r5, a8, b5g6r5, mips_composite_over_0565_8_0565), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, mips_composite_over_8888_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, mips_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, mips_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, mips_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, mips_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, mips_composite_over_8888_0565), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, mips_composite_over_8888_0565), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, mips_composite_add_n_8_8), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, mips_composite_add_n_8_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, mips_composite_add_n_8_8888), - PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, mips_composite_add_8_8_8), - PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, mips_composite_add_0565_8_0565), - PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, mips_composite_add_0565_8_0565), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, mips_composite_add_8888_8_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, mips_composite_add_8888_8_8888), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, mips_composite_add_8888_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, mips_composite_add_8888_n_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, mips_composite_add_8888_n_8888), - PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, mips_composite_add_8_8), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, mips_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, mips_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, mips_composite_out_reverse_8_0565), - PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, mips_composite_out_reverse_8_0565), - PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8r8g8b8, mips_composite_out_reverse_8_8888), - PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8b8g8r8, mips_composite_out_reverse_8_8888), - PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, mips_composite_over_reverse_n_8888), - PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, mips_composite_over_reverse_n_8888), - PIXMAN_STD_FAST_PATH (IN, solid, null, a8, mips_composite_in_n_8), - - PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888), - PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mips_8888_8888), - PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888), - PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mips_8888_8888), - - PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_0565), - PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_0565), - - PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, mips_0565_8888), - PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888), - /* Note: NONE repeat is not supported yet */ - SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, mips_0565_8888), - SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, mips_0565_8888), - SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, mips_0565_8888), - SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, mips_0565_8888), - - SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565), - SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565), - - SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, mips_0565_8_0565), - SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, mips_0565_8_0565), - - SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8888), - - SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, mips_8888_0565), - SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, mips_8888_0565), - - SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888), - SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, mips_0565_0565), - - SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888), - - SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, mips_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, mips_8888_8888), - - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8_8888), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8_8888), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8_8888), - - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, r5g6b5, mips_8888_8_0565), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, r5g6b5, mips_8888_8_0565), - - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8_x888), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, mips_0565_8_0565), - - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8_8888), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8_8888), - - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, mips_8888_8_8888), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, mips_8888_8_8888), - { PIXMAN_OP_NONE }, -}; - -static void -mips_dspr2_combine_over_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - if (mask) - pixman_composite_over_8888_8888_8888_asm_mips ( - dest, (uint32_t *)src, (uint32_t *)mask, width); - else - pixman_composite_over_8888_8888_asm_mips ( - dest, (uint32_t *)src, width); -} - -pixman_implementation_t * -_pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback) -{ - pixman_implementation_t *imp = - _pixman_implementation_create (fallback, mips_dspr2_fast_paths); - - imp->combine_32[PIXMAN_OP_OVER] = mips_dspr2_combine_over_u; - - imp->blt = mips_dspr2_blt; - imp->fill = mips_dspr2_fill; - - return imp; -} diff --git a/vendor/pixman/pixman/pixman-mips-dspr2.h b/vendor/pixman/pixman/pixman-mips-dspr2.h deleted file mode 100644 index 57b38359e..000000000 --- a/vendor/pixman/pixman/pixman-mips-dspr2.h +++ /dev/null @@ -1,432 +0,0 @@ -/* - * Copyright (c) 2012 - * MIPS Technologies, Inc., California. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Author: Nemanja Lukic (nemanja.lukic@rt-rk.com) - */ - -#ifndef PIXMAN_MIPS_DSPR2_H -#define PIXMAN_MIPS_DSPR2_H - -#include "pixman-private.h" -#include "pixman-inlines.h" - -#define SKIP_ZERO_SRC 1 -#define SKIP_ZERO_MASK 2 -#define DO_FAST_MEMCPY 3 - -void -pixman_mips_fast_memcpy (void *dst, void *src, uint32_t n_bytes); -void -pixman_fill_buff16_mips (void *dst, uint32_t n_bytes, uint16_t value); -void -pixman_fill_buff32_mips (void *dst, uint32_t n_bytes, uint32_t value); - -/****************************************************************/ - -#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST(flags, name, \ - src_type, src_cnt, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_mips (dst_type *dst, \ - src_type *src, \ - int32_t w); \ - \ -static void \ -mips_composite_##name (pixman_implementation_t *imp, \ - pixman_composite_info_t *info) \ -{ \ - PIXMAN_COMPOSITE_ARGS (info); \ - dst_type *dst_line, *dst; \ - src_type *src_line, *src; \ - int32_t dst_stride, src_stride; \ - int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8; \ - \ - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ - src_stride, src_line, src_cnt); \ - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - \ - while (height--) \ - { \ - dst = dst_line; \ - dst_line += dst_stride; \ - src = src_line; \ - src_line += src_stride; \ - \ - if (flags == DO_FAST_MEMCPY) \ - pixman_mips_fast_memcpy (dst, src, width * bpp); \ - else \ - pixman_composite_##name##_asm_mips (dst, src, width); \ - } \ -} - -/****************************************************************/ - -#define PIXMAN_MIPS_BIND_FAST_PATH_N_DST(flags, name, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_mips (dst_type *dst, \ - uint32_t src, \ - int32_t w); \ - \ -static void \ -mips_composite_##name (pixman_implementation_t *imp, \ - pixman_composite_info_t *info) \ -{ \ - PIXMAN_COMPOSITE_ARGS (info); \ - dst_type *dst_line, *dst; \ - int32_t dst_stride; \ - uint32_t src; \ - \ - src = _pixman_image_get_solid ( \ - imp, src_image, dest_image->bits.format); \ - \ - if ((flags & SKIP_ZERO_SRC) && src == 0) \ - return; \ - \ - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - \ - while (height--) \ - { \ - dst = dst_line; \ - dst_line += dst_stride; \ - \ - pixman_composite_##name##_asm_mips (dst, src, width); \ - } \ -} - -/*******************************************************************/ - -#define PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST(flags, name, \ - mask_type, mask_cnt, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_mips (dst_type *dst, \ - uint32_t src, \ - mask_type *mask, \ - int32_t w); \ - \ -static void \ -mips_composite_##name (pixman_implementation_t *imp, \ - pixman_composite_info_t *info) \ -{ \ - PIXMAN_COMPOSITE_ARGS (info); \ - dst_type *dst_line, *dst; \ - mask_type *mask_line, *mask; \ - int32_t dst_stride, mask_stride; \ - uint32_t src; \ - \ - src = _pixman_image_get_solid ( \ - imp, src_image, dest_image->bits.format); \ - \ - if ((flags & SKIP_ZERO_SRC) && src == 0) \ - return; \ - \ - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ - mask_stride, mask_line, mask_cnt); \ - \ - while (height--) \ - { \ - dst = dst_line; \ - dst_line += dst_stride; \ - mask = mask_line; \ - mask_line += mask_stride; \ - pixman_composite_##name##_asm_mips (dst, src, mask, width); \ - } \ -} - -/*******************************************************************/ - -#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST(flags, name, \ - src_type, src_cnt, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_mips (dst_type *dst, \ - src_type *src, \ - uint32_t mask, \ - int32_t w); \ - \ -static void \ -mips_composite_##name (pixman_implementation_t *imp, \ - pixman_composite_info_t *info) \ -{ \ - PIXMAN_COMPOSITE_ARGS (info); \ - dst_type *dst_line, *dst; \ - src_type *src_line, *src; \ - int32_t dst_stride, src_stride; \ - uint32_t mask; \ - \ - mask = _pixman_image_get_solid ( \ - imp, mask_image, dest_image->bits.format); \ - \ - if ((flags & SKIP_ZERO_MASK) && mask == 0) \ - return; \ - \ - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ - src_stride, src_line, src_cnt); \ - \ - while (height--) \ - { \ - dst = dst_line; \ - dst_line += dst_stride; \ - src = src_line; \ - src_line += src_stride; \ - \ - pixman_composite_##name##_asm_mips (dst, src, mask, width); \ - } \ -} - -/************************************************************************/ - -#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST(name, src_type, src_cnt, \ - mask_type, mask_cnt, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_mips (dst_type *dst, \ - src_type *src, \ - mask_type *mask, \ - int32_t w); \ - \ -static void \ -mips_composite_##name (pixman_implementation_t *imp, \ - pixman_composite_info_t *info) \ -{ \ - PIXMAN_COMPOSITE_ARGS (info); \ - dst_type *dst_line, *dst; \ - src_type *src_line, *src; \ - mask_type *mask_line, *mask; \ - int32_t dst_stride, src_stride, mask_stride; \ - \ - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ - src_stride, src_line, src_cnt); \ - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ - mask_stride, mask_line, mask_cnt); \ - \ - while (height--) \ - { \ - dst = dst_line; \ - dst_line += dst_stride; \ - mask = mask_line; \ - mask_line += mask_stride; \ - src = src_line; \ - src_line += src_stride; \ - pixman_composite_##name##_asm_mips (dst, src, mask, width); \ - } \ -} - -/****************************************************************************/ - -#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST(name, op, \ - src_type, dst_type) \ -void \ -pixman_scaled_nearest_scanline_##name##_##op##_asm_mips ( \ - dst_type * dst, \ - const src_type * src, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x); \ - \ -static force_inline void \ -scaled_nearest_scanline_mips_##name##_##op (dst_type * pd, \ - const src_type * ps, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t zero_src) \ -{ \ - pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps, w, \ - vx, unit_x); \ -} \ - \ -FAST_NEAREST_MAINLOOP (mips_##name##_cover_##op, \ - scaled_nearest_scanline_mips_##name##_##op, \ - src_type, dst_type, COVER) \ -FAST_NEAREST_MAINLOOP (mips_##name##_none_##op, \ - scaled_nearest_scanline_mips_##name##_##op, \ - src_type, dst_type, NONE) \ -FAST_NEAREST_MAINLOOP (mips_##name##_pad_##op, \ - scaled_nearest_scanline_mips_##name##_##op, \ - src_type, dst_type, PAD) - -/* Provide entries for the fast path table */ -#define PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func) - - -/*****************************************************************************/ - -#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST(flags, name, op, \ - src_type, dst_type) \ -void \ -pixman_scaled_nearest_scanline_##name##_##op##_asm_mips ( \ - dst_type * dst, \ - const src_type * src, \ - const uint8_t * mask, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x); \ - \ -static force_inline void \ -scaled_nearest_scanline_mips_##name##_##op (const uint8_t * mask, \ - dst_type * pd, \ - const src_type * ps, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t zero_src) \ -{ \ - if ((flags & SKIP_ZERO_SRC) && zero_src) \ - return; \ - pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps, \ - mask, w, \ - vx, unit_x); \ -} \ - \ -FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_cover_##op, \ - scaled_nearest_scanline_mips_##name##_##op, \ - src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\ -FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_none_##op, \ - scaled_nearest_scanline_mips_##name##_##op, \ - src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \ -FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_pad_##op, \ - scaled_nearest_scanline_mips_##name##_##op, \ - src_type, uint8_t, dst_type, PAD, TRUE, FALSE) - -/****************************************************************************/ - -#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op, \ - src_type, dst_type) \ -void \ -pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips( \ - dst_type * dst, \ - const src_type * src_top, \ - const src_type * src_bottom, \ - int32_t w, \ - int wt, \ - int wb, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x); \ -static force_inline void \ -scaled_bilinear_scanline_mips_##name##_##op (dst_type * dst, \ - const uint32_t * mask, \ - const src_type * src_top, \ - const src_type * src_bottom, \ - int32_t w, \ - int wt, \ - int wb, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t zero_src) \ -{ \ - if ((flags & SKIP_ZERO_SRC) && zero_src) \ - return; \ - pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips (dst, src_top, \ - src_bottom, w, \ - wt, wb, \ - vx, unit_x); \ -} \ - \ -FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op, \ - scaled_bilinear_scanline_mips_##name##_##op, \ - src_type, uint32_t, dst_type, COVER, FLAG_NONE) \ -FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op, \ - scaled_bilinear_scanline_mips_##name##_##op, \ - src_type, uint32_t, dst_type, NONE, FLAG_NONE) \ -FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op, \ - scaled_bilinear_scanline_mips_##name##_##op, \ - src_type, uint32_t, dst_type, PAD, FLAG_NONE) \ -FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op, \ - scaled_bilinear_scanline_mips_##name##_##op, \ - src_type, uint32_t, dst_type, NORMAL, \ - FLAG_NONE) - -/*****************************************************************************/ - -#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, name, op, \ - src_type, dst_type) \ -void \ -pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips ( \ - dst_type * dst, \ - const uint8_t * mask, \ - const src_type * top, \ - const src_type * bottom, \ - int wt, \ - int wb, \ - pixman_fixed_t x, \ - pixman_fixed_t ux, \ - int width); \ - \ -static force_inline void \ -scaled_bilinear_scanline_mips_##name##_##op (dst_type * dst, \ - const uint8_t * mask, \ - const src_type * src_top, \ - const src_type * src_bottom, \ - int32_t w, \ - int wt, \ - int wb, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t zero_src) \ -{ \ - if ((flags & SKIP_ZERO_SRC) && zero_src) \ - return; \ - pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips ( \ - dst, mask, src_top, src_bottom, wt, wb, vx, unit_x, w); \ -} \ - \ -FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op, \ - scaled_bilinear_scanline_mips_##name##_##op, \ - src_type, uint8_t, dst_type, COVER, \ - FLAG_HAVE_NON_SOLID_MASK) \ -FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op, \ - scaled_bilinear_scanline_mips_##name##_##op, \ - src_type, uint8_t, dst_type, NONE, \ - FLAG_HAVE_NON_SOLID_MASK) \ -FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op, \ - scaled_bilinear_scanline_mips_##name##_##op, \ - src_type, uint8_t, dst_type, PAD, \ - FLAG_HAVE_NON_SOLID_MASK) \ -FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op, \ - scaled_bilinear_scanline_mips_##name##_##op, \ - src_type, uint8_t, dst_type, NORMAL, \ - FLAG_HAVE_NON_SOLID_MASK) - -#endif //PIXMAN_MIPS_DSPR2_H diff --git a/vendor/pixman/pixman/pixman-mips-memcpy-asm.S b/vendor/pixman/pixman/pixman-mips-memcpy-asm.S deleted file mode 100644 index 9ad6da537..000000000 --- a/vendor/pixman/pixman/pixman-mips-memcpy-asm.S +++ /dev/null @@ -1,382 +0,0 @@ -/* - * Copyright (c) 2012 - * MIPS Technologies, Inc., California. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "pixman-mips-dspr2-asm.h" - -/* - * This routine could be optimized for MIPS64. The current code only - * uses MIPS32 instructions. - */ - -#ifdef EB -# define LWHI lwl /* high part is left in big-endian */ -# define SWHI swl /* high part is left in big-endian */ -# define LWLO lwr /* low part is right in big-endian */ -# define SWLO swr /* low part is right in big-endian */ -#else -# define LWHI lwr /* high part is right in little-endian */ -# define SWHI swr /* high part is right in little-endian */ -# define LWLO lwl /* low part is left in big-endian */ -# define SWLO swl /* low part is left in big-endian */ -#endif - -LEAF_MIPS32R2(pixman_mips_fast_memcpy) - - slti AT, a2, 8 - bne AT, zero, $last8 - move v0, a0 /* memcpy returns the dst pointer */ - -/* Test if the src and dst are word-aligned, or can be made word-aligned */ - xor t8, a1, a0 - andi t8, t8, 0x3 /* t8 is a0/a1 word-displacement */ - - bne t8, zero, $unaligned - negu a3, a0 - - andi a3, a3, 0x3 /* we need to copy a3 bytes to make a0/a1 aligned */ - beq a3, zero, $chk16w /* when a3=0 then the dst (a0) is word-aligned */ - subu a2, a2, a3 /* now a2 is the remining bytes count */ - - LWHI t8, 0(a1) - addu a1, a1, a3 - SWHI t8, 0(a0) - addu a0, a0, a3 - -/* Now the dst/src are mutually word-aligned with word-aligned addresses */ -$chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */ - /* t8 is the byte count after 64-byte chunks */ - - beq a2, t8, $chk8w /* if a2==t8, no 64-byte chunks */ - /* There will be at most 1 32-byte chunk after it */ - subu a3, a2, t8 /* subtract from a2 the reminder */ - /* Here a3 counts bytes in 16w chunks */ - addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */ - - addu t0, a0, a2 /* t0 is the "past the end" address */ - -/* - * When in the loop we exercise "pref 30, x(a0)", the a0+x should not be past - * the "t0-32" address - * This means: for x=128 the last "safe" a0 address is "t0-160" - * Alternatively, for x=64 the last "safe" a0 address is "t0-96" - * In the current version we use "pref 30, 128(a0)", so "t0-160" is the limit - */ - subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */ - - pref 0, 0(a1) /* bring the first line of src, addr 0 */ - pref 0, 32(a1) /* bring the second line of src, addr 32 */ - pref 0, 64(a1) /* bring the third line of src, addr 64 */ - pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */ -/* In case the a0 > t9 don't use "pref 30" at all */ - sgtu v1, a0, t9 - bgtz v1, $loop16w /* skip "pref 30, 64(a0)" for too short arrays */ - nop -/* otherwise, start with using pref30 */ - pref 30, 64(a0) -$loop16w: - pref 0, 96(a1) - lw t0, 0(a1) - bgtz v1, $skip_pref30_96 /* skip "pref 30, 96(a0)" */ - lw t1, 4(a1) - pref 30, 96(a0) /* continue setting up the dest, addr 96 */ -$skip_pref30_96: - lw t2, 8(a1) - lw t3, 12(a1) - lw t4, 16(a1) - lw t5, 20(a1) - lw t6, 24(a1) - lw t7, 28(a1) - pref 0, 128(a1) /* bring the next lines of src, addr 128 */ - - sw t0, 0(a0) - sw t1, 4(a0) - sw t2, 8(a0) - sw t3, 12(a0) - sw t4, 16(a0) - sw t5, 20(a0) - sw t6, 24(a0) - sw t7, 28(a0) - - lw t0, 32(a1) - bgtz v1, $skip_pref30_128 /* skip "pref 30, 128(a0)" */ - lw t1, 36(a1) - pref 30, 128(a0) /* continue setting up the dest, addr 128 */ -$skip_pref30_128: - lw t2, 40(a1) - lw t3, 44(a1) - lw t4, 48(a1) - lw t5, 52(a1) - lw t6, 56(a1) - lw t7, 60(a1) - pref 0, 160(a1) /* bring the next lines of src, addr 160 */ - - sw t0, 32(a0) - sw t1, 36(a0) - sw t2, 40(a0) - sw t3, 44(a0) - sw t4, 48(a0) - sw t5, 52(a0) - sw t6, 56(a0) - sw t7, 60(a0) - - addiu a0, a0, 64 /* adding 64 to dest */ - sgtu v1, a0, t9 - bne a0, a3, $loop16w - addiu a1, a1, 64 /* adding 64 to src */ - move a2, t8 - -/* Here we have src and dest word-aligned but less than 64-bytes to go */ - -$chk8w: - pref 0, 0x0(a1) - andi t8, a2, 0x1f /* is there a 32-byte chunk? */ - /* the t8 is the reminder count past 32-bytes */ - beq a2, t8, $chk1w /* when a2=t8, no 32-byte chunk */ - nop - - lw t0, 0(a1) - lw t1, 4(a1) - lw t2, 8(a1) - lw t3, 12(a1) - lw t4, 16(a1) - lw t5, 20(a1) - lw t6, 24(a1) - lw t7, 28(a1) - addiu a1, a1, 32 - - sw t0, 0(a0) - sw t1, 4(a0) - sw t2, 8(a0) - sw t3, 12(a0) - sw t4, 16(a0) - sw t5, 20(a0) - sw t6, 24(a0) - sw t7, 28(a0) - addiu a0, a0, 32 - -$chk1w: - andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */ - beq a2, t8, $last8 - subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */ - addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */ - -/* copying in words (4-byte chunks) */ -$wordCopy_loop: - lw t3, 0(a1) /* the first t3 may be equal t0 ... optimize? */ - addiu a1, a1, 4 - addiu a0, a0, 4 - bne a0, a3, $wordCopy_loop - sw t3, -4(a0) - -/* For the last (<8) bytes */ -$last8: - blez a2, leave - addu a3, a0, a2 /* a3 is the last dst address */ -$last8loop: - lb v1, 0(a1) - addiu a1, a1, 1 - addiu a0, a0, 1 - bne a0, a3, $last8loop - sb v1, -1(a0) - -leave: j ra - nop - -/* - * UNALIGNED case - */ - -$unaligned: - /* got here with a3="negu a0" */ - andi a3, a3, 0x3 /* test if the a0 is word aligned */ - beqz a3, $ua_chk16w - subu a2, a2, a3 /* bytes left after initial a3 bytes */ - - LWHI v1, 0(a1) - LWLO v1, 3(a1) - addu a1, a1, a3 /* a3 may be here 1, 2 or 3 */ - SWHI v1, 0(a0) - addu a0, a0, a3 /* below the dst will be word aligned (NOTE1) */ - -$ua_chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */ - /* t8 is the byte count after 64-byte chunks */ - beq a2, t8, $ua_chk8w /* if a2==t8, no 64-byte chunks */ - /* There will be at most 1 32-byte chunk after it */ - subu a3, a2, t8 /* subtract from a2 the reminder */ - /* Here a3 counts bytes in 16w chunks */ - addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */ - - addu t0, a0, a2 /* t0 is the "past the end" address */ - - subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */ - - pref 0, 0(a1) /* bring the first line of src, addr 0 */ - pref 0, 32(a1) /* bring the second line of src, addr 32 */ - pref 0, 64(a1) /* bring the third line of src, addr 64 */ - pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */ -/* In case the a0 > t9 don't use "pref 30" at all */ - sgtu v1, a0, t9 - bgtz v1, $ua_loop16w /* skip "pref 30, 64(a0)" for too short arrays */ - nop -/* otherwise, start with using pref30 */ - pref 30, 64(a0) -$ua_loop16w: - pref 0, 96(a1) - LWHI t0, 0(a1) - LWLO t0, 3(a1) - LWHI t1, 4(a1) - bgtz v1, $ua_skip_pref30_96 - LWLO t1, 7(a1) - pref 30, 96(a0) /* continue setting up the dest, addr 96 */ -$ua_skip_pref30_96: - LWHI t2, 8(a1) - LWLO t2, 11(a1) - LWHI t3, 12(a1) - LWLO t3, 15(a1) - LWHI t4, 16(a1) - LWLO t4, 19(a1) - LWHI t5, 20(a1) - LWLO t5, 23(a1) - LWHI t6, 24(a1) - LWLO t6, 27(a1) - LWHI t7, 28(a1) - LWLO t7, 31(a1) - pref 0, 128(a1) /* bring the next lines of src, addr 128 */ - - sw t0, 0(a0) - sw t1, 4(a0) - sw t2, 8(a0) - sw t3, 12(a0) - sw t4, 16(a0) - sw t5, 20(a0) - sw t6, 24(a0) - sw t7, 28(a0) - - LWHI t0, 32(a1) - LWLO t0, 35(a1) - LWHI t1, 36(a1) - bgtz v1, $ua_skip_pref30_128 - LWLO t1, 39(a1) - pref 30, 128(a0) /* continue setting up the dest, addr 128 */ -$ua_skip_pref30_128: - LWHI t2, 40(a1) - LWLO t2, 43(a1) - LWHI t3, 44(a1) - LWLO t3, 47(a1) - LWHI t4, 48(a1) - LWLO t4, 51(a1) - LWHI t5, 52(a1) - LWLO t5, 55(a1) - LWHI t6, 56(a1) - LWLO t6, 59(a1) - LWHI t7, 60(a1) - LWLO t7, 63(a1) - pref 0, 160(a1) /* bring the next lines of src, addr 160 */ - - sw t0, 32(a0) - sw t1, 36(a0) - sw t2, 40(a0) - sw t3, 44(a0) - sw t4, 48(a0) - sw t5, 52(a0) - sw t6, 56(a0) - sw t7, 60(a0) - - addiu a0, a0, 64 /* adding 64 to dest */ - sgtu v1, a0, t9 - bne a0, a3, $ua_loop16w - addiu a1, a1, 64 /* adding 64 to src */ - move a2, t8 - -/* Here we have src and dest word-aligned but less than 64-bytes to go */ - -$ua_chk8w: - pref 0, 0x0(a1) - andi t8, a2, 0x1f /* is there a 32-byte chunk? */ - /* the t8 is the reminder count */ - beq a2, t8, $ua_chk1w /* when a2=t8, no 32-byte chunk */ - - LWHI t0, 0(a1) - LWLO t0, 3(a1) - LWHI t1, 4(a1) - LWLO t1, 7(a1) - LWHI t2, 8(a1) - LWLO t2, 11(a1) - LWHI t3, 12(a1) - LWLO t3, 15(a1) - LWHI t4, 16(a1) - LWLO t4, 19(a1) - LWHI t5, 20(a1) - LWLO t5, 23(a1) - LWHI t6, 24(a1) - LWLO t6, 27(a1) - LWHI t7, 28(a1) - LWLO t7, 31(a1) - addiu a1, a1, 32 - - sw t0, 0(a0) - sw t1, 4(a0) - sw t2, 8(a0) - sw t3, 12(a0) - sw t4, 16(a0) - sw t5, 20(a0) - sw t6, 24(a0) - sw t7, 28(a0) - addiu a0, a0, 32 - -$ua_chk1w: - andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */ - beq a2, t8, $ua_smallCopy - subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */ - addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */ - -/* copying in words (4-byte chunks) */ -$ua_wordCopy_loop: - LWHI v1, 0(a1) - LWLO v1, 3(a1) - addiu a1, a1, 4 - addiu a0, a0, 4 /* note: dst=a0 is word aligned here, see NOTE1 */ - bne a0, a3, $ua_wordCopy_loop - sw v1, -4(a0) - -/* Now less than 4 bytes (value in a2) left to copy */ -$ua_smallCopy: - beqz a2, leave - addu a3, a0, a2 /* a3 is the last dst address */ -$ua_smallCopy_loop: - lb v1, 0(a1) - addiu a1, a1, 1 - addiu a0, a0, 1 - bne a0, a3, $ua_smallCopy_loop - sb v1, -1(a0) - - j ra - nop - -END(pixman_mips_fast_memcpy) diff --git a/vendor/pixman/pixman/pixman-mips.c b/vendor/pixman/pixman/pixman-mips.c deleted file mode 100644 index 7479a0884..000000000 --- a/vendor/pixman/pixman/pixman-mips.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "pixman-private.h" - -#if defined(USE_MIPS_DSPR2) || defined(USE_LOONGSON_MMI) - -#include -#include - -static pixman_bool_t -have_feature (const char *search_string) -{ -#if defined (__linux__) /* linux ELF */ - /* Simple detection of MIPS features at runtime for Linux. - * It is based on /proc/cpuinfo, which reveals hardware configuration - * to user-space applications. According to MIPS (early 2010), no similar - * facility is universally available on the MIPS architectures, so it's up - * to individual OSes to provide such. - */ - const char *file_name = "/proc/cpuinfo"; - char cpuinfo_line[256]; - FILE *f = NULL; - - if ((f = fopen (file_name, "r")) == NULL) - return FALSE; - - while (fgets (cpuinfo_line, sizeof (cpuinfo_line), f) != NULL) - { - if (strstr (cpuinfo_line, search_string) != NULL) - { - fclose (f); - return TRUE; - } - } - - fclose (f); -#endif - - /* Did not find string in the proc file, or not Linux ELF. */ - return FALSE; -} - -#endif - -pixman_implementation_t * -_pixman_mips_get_implementations (pixman_implementation_t *imp) -{ -#ifdef USE_LOONGSON_MMI - /* I really don't know if some Loongson CPUs don't have MMI. */ - if (!_pixman_disabled ("loongson-mmi") && have_feature ("Loongson")) - imp = _pixman_implementation_create_mmx (imp); -#endif - -#ifdef USE_MIPS_DSPR2 - if (!_pixman_disabled ("mips-dspr2")) - { - int already_compiling_everything_for_dspr2 = 0; -#if defined(__mips_dsp) && (__mips_dsp_rev >= 2) - already_compiling_everything_for_dspr2 = 1; -#endif - if (already_compiling_everything_for_dspr2 || - /* Only currently available MIPS core that supports DSPr2 is 74K. */ - have_feature ("MIPS 74K")) - { - imp = _pixman_implementation_create_mips_dspr2 (imp); - } - } -#endif - - return imp; -} diff --git a/vendor/pixman/pixman/pixman-mmx.c b/vendor/pixman/pixman/pixman-mmx.c deleted file mode 100644 index 3a85616b7..000000000 --- a/vendor/pixman/pixman/pixman-mmx.c +++ /dev/null @@ -1,4153 +0,0 @@ -/* - * Copyright © 2004, 2005 Red Hat, Inc. - * Copyright © 2004 Nicholas Miell - * Copyright © 2005 Trolltech AS - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Red Hat not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. Red Hat makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Søren Sandmann (sandmann@redhat.com) - * Minor Improvements: Nicholas Miell (nmiell@gmail.com) - * MMX code paths for fbcompose.c by Lars Knoll (lars@trolltech.com) - * - * Based on work by Owen Taylor - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#if defined USE_X86_MMX || defined USE_ARM_IWMMXT || defined USE_LOONGSON_MMI - -#ifdef USE_LOONGSON_MMI -#include -#else -#include -#endif -#include "pixman-private.h" -#include "pixman-combine32.h" -#include "pixman-inlines.h" - -#ifdef VERBOSE -#define CHECKPOINT() error_f ("at %s %d\n", __FUNCTION__, __LINE__) -#else -#define CHECKPOINT() -#endif - -#if defined USE_ARM_IWMMXT && __GNUC__ == 4 && __GNUC_MINOR__ < 8 -/* Empty the multimedia state. For some reason, ARM's mmintrin.h doesn't provide this. */ -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_empty (void) -{ - -} -#endif - -#ifdef USE_X86_MMX -# if (defined(__SSE2__) || defined(__SUNPRO_C) || defined(_MSC_VER) || defined(_WIN64)) -# include -# else -/* We have to compile with -msse to use xmmintrin.h, but that causes SSE - * instructions to be generated that we don't want. Just duplicate the - * functions we want to use. */ -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_movemask_pi8 (__m64 __A) -{ - int ret; - - asm ("pmovmskb %1, %0\n\t" - : "=r" (ret) - : "y" (__A) - ); - - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mulhi_pu16 (__m64 __A, __m64 __B) -{ - asm ("pmulhuw %1, %0\n\t" - : "+y" (__A) - : "y" (__B) - ); - return __A; -} - -# define _mm_shuffle_pi16(A, N) \ - ({ \ - __m64 ret; \ - \ - asm ("pshufw %2, %1, %0\n\t" \ - : "=y" (ret) \ - : "y" (A), "K" ((const int8_t)N) \ - ); \ - \ - ret; \ - }) -# endif -#endif - -#ifndef _MSC_VER -#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ - (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) -#endif - -/* Notes about writing mmx code - * - * give memory operands as the second operand. If you give it as the - * first, gcc will first load it into a register, then use that - * register - * - * ie. use - * - * _mm_mullo_pi16 (x, mmx_constant); - * - * not - * - * _mm_mullo_pi16 (mmx_constant, x); - * - * Also try to minimize dependencies. i.e. when you need a value, try - * to calculate it from a value that was calculated as early as - * possible. - */ - -/* --------------- MMX primitives ------------------------------------- */ - -/* If __m64 is defined as a struct or union, then define M64_MEMBER to be - * the name of the member used to access the data. - * If __m64 requires using mm_cvt* intrinsics functions to convert between - * uint64_t and __m64 values, then define USE_CVT_INTRINSICS. - * If __m64 and uint64_t values can just be cast to each other directly, - * then define USE_M64_CASTS. - * If __m64 is a double datatype, then define USE_M64_DOUBLE. - */ -#ifdef _MSC_VER -# define M64_MEMBER m64_u64 -#elif defined(__ICC) -# define USE_CVT_INTRINSICS -#elif defined(USE_LOONGSON_MMI) -# define USE_M64_DOUBLE -#elif defined(__GNUC__) -# define USE_M64_CASTS -#elif defined(__SUNPRO_C) -# if (__SUNPRO_C >= 0x5120) && !defined(__NOVECTORSIZE__) -/* Solaris Studio 12.3 (Sun C 5.12) introduces __attribute__(__vector_size__) - * support, and defaults to using it to define __m64, unless __NOVECTORSIZE__ - * is defined. If it is used, then the mm_cvt* intrinsics must be used. - */ -# define USE_CVT_INTRINSICS -# else -/* For Studio 12.2 or older, or when __attribute__(__vector_size__) is - * disabled, __m64 is defined as a struct containing "unsigned long long l_". - */ -# define M64_MEMBER l_ -# endif -#endif - -#if defined(USE_M64_CASTS) || defined(USE_CVT_INTRINSICS) || defined(USE_M64_DOUBLE) -typedef uint64_t mmxdatafield; -#else -typedef __m64 mmxdatafield; -#endif - -typedef struct -{ - mmxdatafield mmx_4x00ff; - mmxdatafield mmx_4x0080; - mmxdatafield mmx_565_rgb; - mmxdatafield mmx_565_unpack_multiplier; - mmxdatafield mmx_565_pack_multiplier; - mmxdatafield mmx_565_r; - mmxdatafield mmx_565_g; - mmxdatafield mmx_565_b; - mmxdatafield mmx_packed_565_rb; - mmxdatafield mmx_packed_565_g; - mmxdatafield mmx_expand_565_g; - mmxdatafield mmx_expand_565_b; - mmxdatafield mmx_expand_565_r; -#ifndef USE_LOONGSON_MMI - mmxdatafield mmx_mask_0; - mmxdatafield mmx_mask_1; - mmxdatafield mmx_mask_2; - mmxdatafield mmx_mask_3; -#endif - mmxdatafield mmx_full_alpha; - mmxdatafield mmx_4x0101; - mmxdatafield mmx_ff000000; -} mmx_data_t; - -#if defined(_MSC_VER) -# define MMXDATA_INIT(field, val) { val ## UI64 } -#elif defined(M64_MEMBER) /* __m64 is a struct, not an integral type */ -# define MMXDATA_INIT(field, val) field = { val ## ULL } -#else /* mmxdatafield is an integral type */ -# define MMXDATA_INIT(field, val) field = val ## ULL -#endif - -static const mmx_data_t c = -{ - MMXDATA_INIT (.mmx_4x00ff, 0x00ff00ff00ff00ff), - MMXDATA_INIT (.mmx_4x0080, 0x0080008000800080), - MMXDATA_INIT (.mmx_565_rgb, 0x000001f0003f001f), - MMXDATA_INIT (.mmx_565_unpack_multiplier, 0x0000008404100840), - MMXDATA_INIT (.mmx_565_pack_multiplier, 0x2000000420000004), - MMXDATA_INIT (.mmx_565_r, 0x000000f800000000), - MMXDATA_INIT (.mmx_565_g, 0x0000000000fc0000), - MMXDATA_INIT (.mmx_565_b, 0x00000000000000f8), - MMXDATA_INIT (.mmx_packed_565_rb, 0x00f800f800f800f8), - MMXDATA_INIT (.mmx_packed_565_g, 0x0000fc000000fc00), - MMXDATA_INIT (.mmx_expand_565_g, 0x07e007e007e007e0), - MMXDATA_INIT (.mmx_expand_565_b, 0x001f001f001f001f), - MMXDATA_INIT (.mmx_expand_565_r, 0xf800f800f800f800), -#ifndef USE_LOONGSON_MMI - MMXDATA_INIT (.mmx_mask_0, 0xffffffffffff0000), - MMXDATA_INIT (.mmx_mask_1, 0xffffffff0000ffff), - MMXDATA_INIT (.mmx_mask_2, 0xffff0000ffffffff), - MMXDATA_INIT (.mmx_mask_3, 0x0000ffffffffffff), -#endif - MMXDATA_INIT (.mmx_full_alpha, 0x00ff000000000000), - MMXDATA_INIT (.mmx_4x0101, 0x0101010101010101), - MMXDATA_INIT (.mmx_ff000000, 0xff000000ff000000), -}; - -#ifdef USE_CVT_INTRINSICS -# define MC(x) to_m64 (c.mmx_ ## x) -#elif defined(USE_M64_CASTS) -# define MC(x) ((__m64)c.mmx_ ## x) -#elif defined(USE_M64_DOUBLE) -# define MC(x) (*(__m64 *)&c.mmx_ ## x) -#else -# define MC(x) c.mmx_ ## x -#endif - -static force_inline __m64 -to_m64 (uint64_t x) -{ -#ifdef USE_CVT_INTRINSICS - return _mm_cvtsi64_m64 (x); -#elif defined M64_MEMBER /* __m64 is a struct, not an integral type */ - __m64 res; - - res.M64_MEMBER = x; - return res; -#elif defined USE_M64_DOUBLE - return *(__m64 *)&x; -#else /* USE_M64_CASTS */ - return (__m64)x; -#endif -} - -static force_inline uint64_t -to_uint64 (__m64 x) -{ -#ifdef USE_CVT_INTRINSICS - return _mm_cvtm64_si64 (x); -#elif defined M64_MEMBER /* __m64 is a struct, not an integral type */ - uint64_t res = x.M64_MEMBER; - return res; -#elif defined USE_M64_DOUBLE - return *(uint64_t *)&x; -#else /* USE_M64_CASTS */ - return (uint64_t)x; -#endif -} - -static force_inline __m64 -shift (__m64 v, - int s) -{ - if (s > 0) - return _mm_slli_si64 (v, s); - else if (s < 0) - return _mm_srli_si64 (v, -s); - else - return v; -} - -static force_inline __m64 -negate (__m64 mask) -{ - return _mm_xor_si64 (mask, MC (4x00ff)); -} - -/* Computes the product of two unsigned fixed-point 8-bit values from 0 to 1 - * and maps its result to the same range. - * - * Jim Blinn gives multiple ways to compute this in "Jim Blinn's Corner: - * Notation, Notation, Notation", the first of which is - * - * prod(a, b) = (a * b + 128) / 255. - * - * By approximating the division by 255 as 257/65536 it can be replaced by a - * multiply and a right shift. This is the implementation that we use in - * pix_multiply(), but we _mm_mulhi_pu16() by 257 (part of SSE1 or Extended - * 3DNow!, and unavailable at the time of the book's publication) to perform - * the multiplication and right shift in a single operation. - * - * prod(a, b) = ((a * b + 128) * 257) >> 16. - * - * A third way (how pix_multiply() was implemented prior to 14208344) exists - * also that performs the multiplication by 257 with adds and shifts. - * - * Where temp = a * b + 128 - * - * prod(a, b) = (temp + (temp >> 8)) >> 8. - */ -static force_inline __m64 -pix_multiply (__m64 a, __m64 b) -{ - __m64 res; - - res = _mm_mullo_pi16 (a, b); - res = _mm_adds_pu16 (res, MC (4x0080)); - res = _mm_mulhi_pu16 (res, MC (4x0101)); - - return res; -} - -static force_inline __m64 -pix_add (__m64 a, __m64 b) -{ - return _mm_adds_pu8 (a, b); -} - -static force_inline __m64 -expand_alpha (__m64 pixel) -{ - return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (3, 3, 3, 3)); -} - -static force_inline __m64 -expand_alpha_rev (__m64 pixel) -{ - return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (0, 0, 0, 0)); -} - -static force_inline __m64 -invert_colors (__m64 pixel) -{ - return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (3, 0, 1, 2)); -} - -static force_inline __m64 -over (__m64 src, - __m64 srca, - __m64 dest) -{ - return _mm_adds_pu8 (src, pix_multiply (dest, negate (srca))); -} - -static force_inline __m64 -over_rev_non_pre (__m64 src, __m64 dest) -{ - __m64 srca = expand_alpha (src); - __m64 srcfaaa = _mm_or_si64 (srca, MC (full_alpha)); - - return over (pix_multiply (invert_colors (src), srcfaaa), srca, dest); -} - -static force_inline __m64 -in (__m64 src, __m64 mask) -{ - return pix_multiply (src, mask); -} - -#ifndef _MSC_VER -static force_inline __m64 -in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest) -{ - return over (in (src, mask), pix_multiply (srca, mask), dest); -} - -#else - -#define in_over(src, srca, mask, dest) \ - over (in (src, mask), pix_multiply (srca, mask), dest) - -#endif - -/* Elemental unaligned loads */ - -static force_inline __m64 ldq_u(__m64 *p) -{ -#ifdef USE_X86_MMX - /* x86's alignment restrictions are very relaxed, but that's no excuse */ - __m64 r; - memcpy(&r, p, sizeof(__m64)); - return r; -#elif defined USE_ARM_IWMMXT - int align = (uintptr_t)p & 7; - __m64 *aligned_p; - if (align == 0) - return *p; - aligned_p = (__m64 *)((uintptr_t)p & ~7); - return (__m64) _mm_align_si64 (aligned_p[0], aligned_p[1], align); -#else - struct __una_u64 { __m64 x __attribute__((packed)); }; - const struct __una_u64 *ptr = (const struct __una_u64 *) p; - return (__m64) ptr->x; -#endif -} - -static force_inline uint32_t ldl_u(const uint32_t *p) -{ -#ifdef USE_X86_MMX - /* x86's alignment restrictions are very relaxed. */ - uint32_t r; - memcpy(&r, p, sizeof(uint32_t)); - return r; -#else - struct __una_u32 { uint32_t x __attribute__((packed)); }; - const struct __una_u32 *ptr = (const struct __una_u32 *) p; - return ptr->x; -#endif -} - -static force_inline __m64 -load (const uint32_t *v) -{ -#ifdef USE_LOONGSON_MMI - __m64 ret; - asm ("lwc1 %0, %1\n\t" - : "=f" (ret) - : "m" (*v) - ); - return ret; -#else - return _mm_cvtsi32_si64 (*v); -#endif -} - -static force_inline __m64 -load8888 (const uint32_t *v) -{ -#ifdef USE_LOONGSON_MMI - return _mm_unpacklo_pi8_f (*(__m32 *)v, _mm_setzero_si64 ()); -#else - return _mm_unpacklo_pi8 (load (v), _mm_setzero_si64 ()); -#endif -} - -static force_inline __m64 -load8888u (const uint32_t *v) -{ - uint32_t l = ldl_u (v); - return load8888 (&l); -} - -static force_inline __m64 -pack8888 (__m64 lo, __m64 hi) -{ - return _mm_packs_pu16 (lo, hi); -} - -static force_inline void -store (uint32_t *dest, __m64 v) -{ -#ifdef USE_LOONGSON_MMI - asm ("swc1 %1, %0\n\t" - : "=m" (*dest) - : "f" (v) - : "memory" - ); -#else - *dest = _mm_cvtsi64_si32 (v); -#endif -} - -static force_inline void -store8888 (uint32_t *dest, __m64 v) -{ - v = pack8888 (v, _mm_setzero_si64 ()); - store (dest, v); -} - -static force_inline pixman_bool_t -is_equal (__m64 a, __m64 b) -{ -#ifdef USE_LOONGSON_MMI - /* __m64 is double, we can compare directly. */ - return a == b; -#else - return _mm_movemask_pi8 (_mm_cmpeq_pi8 (a, b)) == 0xff; -#endif -} - -static force_inline pixman_bool_t -is_opaque (__m64 v) -{ -#ifdef USE_LOONGSON_MMI - return is_equal (_mm_and_si64 (v, MC (full_alpha)), MC (full_alpha)); -#else - __m64 ffs = _mm_cmpeq_pi8 (v, v); - return (_mm_movemask_pi8 (_mm_cmpeq_pi8 (v, ffs)) & 0x40); -#endif -} - -static force_inline pixman_bool_t -is_zero (__m64 v) -{ - return is_equal (v, _mm_setzero_si64 ()); -} - -/* Expand 16 bits positioned at @pos (0-3) of a mmx register into - * - * 00RR00GG00BB - * - * --- Expanding 565 in the low word --- - * - * m = (m << (32 - 3)) | (m << (16 - 5)) | m; - * m = m & (01f0003f001f); - * m = m * (008404100840); - * m = m >> 8; - * - * Note the trick here - the top word is shifted by another nibble to - * avoid it bumping into the middle word - */ -static force_inline __m64 -expand565 (__m64 pixel, int pos) -{ - __m64 p = pixel; - __m64 t1, t2; - - /* move pixel to low 16 bit and zero the rest */ -#ifdef USE_LOONGSON_MMI - p = loongson_extract_pi16 (p, pos); -#else - p = shift (shift (p, (3 - pos) * 16), -48); -#endif - - t1 = shift (p, 36 - 11); - t2 = shift (p, 16 - 5); - - p = _mm_or_si64 (t1, p); - p = _mm_or_si64 (t2, p); - p = _mm_and_si64 (p, MC (565_rgb)); - - pixel = _mm_mullo_pi16 (p, MC (565_unpack_multiplier)); - return _mm_srli_pi16 (pixel, 8); -} - -/* Expand 4 16 bit pixels in an mmx register into two mmx registers of - * - * AARRGGBBRRGGBB - */ -static force_inline void -expand_4xpacked565 (__m64 vin, __m64 *vout0, __m64 *vout1, int full_alpha) -{ - __m64 t0, t1, alpha = _mm_setzero_si64 (); - __m64 r = _mm_and_si64 (vin, MC (expand_565_r)); - __m64 g = _mm_and_si64 (vin, MC (expand_565_g)); - __m64 b = _mm_and_si64 (vin, MC (expand_565_b)); - if (full_alpha) - alpha = _mm_cmpeq_pi32 (alpha, alpha); - - /* Replicate high bits into empty low bits. */ - r = _mm_or_si64 (_mm_srli_pi16 (r, 8), _mm_srli_pi16 (r, 13)); - g = _mm_or_si64 (_mm_srli_pi16 (g, 3), _mm_srli_pi16 (g, 9)); - b = _mm_or_si64 (_mm_slli_pi16 (b, 3), _mm_srli_pi16 (b, 2)); - - r = _mm_packs_pu16 (r, _mm_setzero_si64 ()); /* 00 00 00 00 R3 R2 R1 R0 */ - g = _mm_packs_pu16 (g, _mm_setzero_si64 ()); /* 00 00 00 00 G3 G2 G1 G0 */ - b = _mm_packs_pu16 (b, _mm_setzero_si64 ()); /* 00 00 00 00 B3 B2 B1 B0 */ - - t1 = _mm_unpacklo_pi8 (r, alpha); /* A3 R3 A2 R2 A1 R1 A0 R0 */ - t0 = _mm_unpacklo_pi8 (b, g); /* G3 B3 G2 B2 G1 B1 G0 B0 */ - - *vout0 = _mm_unpacklo_pi16 (t0, t1); /* A1 R1 G1 B1 A0 R0 G0 B0 */ - *vout1 = _mm_unpackhi_pi16 (t0, t1); /* A3 R3 G3 B3 A2 R2 G2 B2 */ -} - -static force_inline __m64 -expand8888 (__m64 in, int pos) -{ - if (pos == 0) - return _mm_unpacklo_pi8 (in, _mm_setzero_si64 ()); - else - return _mm_unpackhi_pi8 (in, _mm_setzero_si64 ()); -} - -static force_inline __m64 -expandx888 (__m64 in, int pos) -{ - return _mm_or_si64 (expand8888 (in, pos), MC (full_alpha)); -} - -static force_inline void -expand_4x565 (__m64 vin, __m64 *vout0, __m64 *vout1, __m64 *vout2, __m64 *vout3, int full_alpha) -{ - __m64 v0, v1; - expand_4xpacked565 (vin, &v0, &v1, full_alpha); - *vout0 = expand8888 (v0, 0); - *vout1 = expand8888 (v0, 1); - *vout2 = expand8888 (v1, 0); - *vout3 = expand8888 (v1, 1); -} - -static force_inline __m64 -pack_565 (__m64 pixel, __m64 target, int pos) -{ - __m64 p = pixel; - __m64 t = target; - __m64 r, g, b; - - r = _mm_and_si64 (p, MC (565_r)); - g = _mm_and_si64 (p, MC (565_g)); - b = _mm_and_si64 (p, MC (565_b)); - -#ifdef USE_LOONGSON_MMI - r = shift (r, -(32 - 8)); - g = shift (g, -(16 - 3)); - b = shift (b, -(0 + 3)); - - p = _mm_or_si64 (r, g); - p = _mm_or_si64 (p, b); - return loongson_insert_pi16 (t, p, pos); -#else - r = shift (r, -(32 - 8) + pos * 16); - g = shift (g, -(16 - 3) + pos * 16); - b = shift (b, -(0 + 3) + pos * 16); - - if (pos == 0) - t = _mm_and_si64 (t, MC (mask_0)); - else if (pos == 1) - t = _mm_and_si64 (t, MC (mask_1)); - else if (pos == 2) - t = _mm_and_si64 (t, MC (mask_2)); - else if (pos == 3) - t = _mm_and_si64 (t, MC (mask_3)); - - p = _mm_or_si64 (r, t); - p = _mm_or_si64 (g, p); - - return _mm_or_si64 (b, p); -#endif -} - -static force_inline __m64 -pack_4xpacked565 (__m64 a, __m64 b) -{ - __m64 rb0 = _mm_and_si64 (a, MC (packed_565_rb)); - __m64 rb1 = _mm_and_si64 (b, MC (packed_565_rb)); - - __m64 t0 = _mm_madd_pi16 (rb0, MC (565_pack_multiplier)); - __m64 t1 = _mm_madd_pi16 (rb1, MC (565_pack_multiplier)); - - __m64 g0 = _mm_and_si64 (a, MC (packed_565_g)); - __m64 g1 = _mm_and_si64 (b, MC (packed_565_g)); - - t0 = _mm_or_si64 (t0, g0); - t1 = _mm_or_si64 (t1, g1); - - t0 = shift(t0, -5); -#ifdef USE_ARM_IWMMXT - t1 = shift(t1, -5); - return _mm_packs_pu32 (t0, t1); -#else - t1 = shift(t1, -5 + 16); - return _mm_shuffle_pi16 (_mm_or_si64 (t0, t1), _MM_SHUFFLE (3, 1, 2, 0)); -#endif -} - -#ifndef _MSC_VER - -static force_inline __m64 -pack_4x565 (__m64 v0, __m64 v1, __m64 v2, __m64 v3) -{ - return pack_4xpacked565 (pack8888 (v0, v1), pack8888 (v2, v3)); -} - -static force_inline __m64 -pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b) -{ - x = pix_multiply (x, a); - y = pix_multiply (y, b); - - return pix_add (x, y); -} - -#else - -/* MSVC only handles a "pass by register" of up to three SSE intrinsics */ - -#define pack_4x565(v0, v1, v2, v3) \ - pack_4xpacked565 (pack8888 (v0, v1), pack8888 (v2, v3)) - -#define pix_add_mul(x, a, y, b) \ - ( x = pix_multiply (x, a), \ - y = pix_multiply (y, b), \ - pix_add (x, y) ) - -#endif - -/* --------------- MMX code patch for fbcompose.c --------------------- */ - -static force_inline __m64 -combine (const uint32_t *src, const uint32_t *mask) -{ - __m64 vsrc = load8888 (src); - - if (mask) - { - __m64 m = load8888 (mask); - - m = expand_alpha (m); - vsrc = pix_multiply (vsrc, m); - } - - return vsrc; -} - -static force_inline __m64 -core_combine_over_u_pixel_mmx (__m64 vsrc, __m64 vdst) -{ - vsrc = _mm_unpacklo_pi8 (vsrc, _mm_setzero_si64 ()); - - if (is_opaque (vsrc)) - { - return vsrc; - } - else if (!is_zero (vsrc)) - { - return over (vsrc, expand_alpha (vsrc), - _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ())); - } - - return _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ()); -} - -static void -mmx_combine_over_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = dest + width; - - while (dest < end) - { - __m64 vsrc = combine (src, mask); - - if (is_opaque (vsrc)) - { - store8888 (dest, vsrc); - } - else if (!is_zero (vsrc)) - { - __m64 sa = expand_alpha (vsrc); - store8888 (dest, over (vsrc, sa, load8888 (dest))); - } - - ++dest; - ++src; - if (mask) - ++mask; - } - _mm_empty (); -} - -static void -mmx_combine_over_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = dest + width; - - while (dest < end) - { - __m64 d, da; - __m64 s = combine (src, mask); - - d = load8888 (dest); - da = expand_alpha (d); - store8888 (dest, over (d, da, s)); - - ++dest; - ++src; - if (mask) - mask++; - } - _mm_empty (); -} - -static void -mmx_combine_in_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = dest + width; - - while (dest < end) - { - __m64 a; - __m64 x = combine (src, mask); - - a = load8888 (dest); - a = expand_alpha (a); - x = pix_multiply (x, a); - - store8888 (dest, x); - - ++dest; - ++src; - if (mask) - mask++; - } - _mm_empty (); -} - -static void -mmx_combine_in_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = dest + width; - - while (dest < end) - { - __m64 a = combine (src, mask); - __m64 x; - - x = load8888 (dest); - a = expand_alpha (a); - x = pix_multiply (x, a); - store8888 (dest, x); - - ++dest; - ++src; - if (mask) - mask++; - } - _mm_empty (); -} - -static void -mmx_combine_out_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = dest + width; - - while (dest < end) - { - __m64 a; - __m64 x = combine (src, mask); - - a = load8888 (dest); - a = expand_alpha (a); - a = negate (a); - x = pix_multiply (x, a); - store8888 (dest, x); - - ++dest; - ++src; - if (mask) - mask++; - } - _mm_empty (); -} - -static void -mmx_combine_out_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = dest + width; - - while (dest < end) - { - __m64 a = combine (src, mask); - __m64 x; - - x = load8888 (dest); - a = expand_alpha (a); - a = negate (a); - x = pix_multiply (x, a); - - store8888 (dest, x); - - ++dest; - ++src; - if (mask) - mask++; - } - _mm_empty (); -} - -static void -mmx_combine_atop_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = dest + width; - - while (dest < end) - { - __m64 da, d, sia; - __m64 s = combine (src, mask); - - d = load8888 (dest); - sia = expand_alpha (s); - sia = negate (sia); - da = expand_alpha (d); - s = pix_add_mul (s, da, d, sia); - store8888 (dest, s); - - ++dest; - ++src; - if (mask) - mask++; - } - _mm_empty (); -} - -static void -mmx_combine_atop_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end; - - end = dest + width; - - while (dest < end) - { - __m64 dia, d, sa; - __m64 s = combine (src, mask); - - d = load8888 (dest); - sa = expand_alpha (s); - dia = expand_alpha (d); - dia = negate (dia); - s = pix_add_mul (s, dia, d, sa); - store8888 (dest, s); - - ++dest; - ++src; - if (mask) - mask++; - } - _mm_empty (); -} - -static void -mmx_combine_xor_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = dest + width; - - while (dest < end) - { - __m64 dia, d, sia; - __m64 s = combine (src, mask); - - d = load8888 (dest); - sia = expand_alpha (s); - dia = expand_alpha (d); - sia = negate (sia); - dia = negate (dia); - s = pix_add_mul (s, dia, d, sia); - store8888 (dest, s); - - ++dest; - ++src; - if (mask) - mask++; - } - _mm_empty (); -} - -static void -mmx_combine_add_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = dest + width; - - while (dest < end) - { - __m64 d; - __m64 s = combine (src, mask); - - d = load8888 (dest); - s = pix_add (s, d); - store8888 (dest, s); - - ++dest; - ++src; - if (mask) - mask++; - } - _mm_empty (); -} - -static void -mmx_combine_saturate_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = dest + width; - - while (dest < end) - { - uint32_t s, sa, da; - uint32_t d = *dest; - __m64 ms = combine (src, mask); - __m64 md = load8888 (dest); - - store8888(&s, ms); - da = ~d >> 24; - sa = s >> 24; - - if (sa > da) - { - uint32_t quot = DIV_UN8 (da, sa) << 24; - __m64 msa = load8888 ("); - msa = expand_alpha (msa); - ms = pix_multiply (ms, msa); - } - - md = pix_add (md, ms); - store8888 (dest, md); - - ++src; - ++dest; - if (mask) - mask++; - } - _mm_empty (); -} - -static void -mmx_combine_src_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = src + width; - - while (src < end) - { - __m64 a = load8888 (mask); - __m64 s = load8888 (src); - - s = pix_multiply (s, a); - store8888 (dest, s); - - ++src; - ++mask; - ++dest; - } - _mm_empty (); -} - -static void -mmx_combine_over_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = src + width; - - while (src < end) - { - __m64 a = load8888 (mask); - __m64 s = load8888 (src); - __m64 d = load8888 (dest); - __m64 sa = expand_alpha (s); - - store8888 (dest, in_over (s, sa, a, d)); - - ++src; - ++dest; - ++mask; - } - _mm_empty (); -} - -static void -mmx_combine_over_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = src + width; - - while (src < end) - { - __m64 a = load8888 (mask); - __m64 s = load8888 (src); - __m64 d = load8888 (dest); - __m64 da = expand_alpha (d); - - store8888 (dest, over (d, da, in (s, a))); - - ++src; - ++dest; - ++mask; - } - _mm_empty (); -} - -static void -mmx_combine_in_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = src + width; - - while (src < end) - { - __m64 a = load8888 (mask); - __m64 s = load8888 (src); - __m64 d = load8888 (dest); - __m64 da = expand_alpha (d); - - s = pix_multiply (s, a); - s = pix_multiply (s, da); - store8888 (dest, s); - - ++src; - ++dest; - ++mask; - } - _mm_empty (); -} - -static void -mmx_combine_in_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = src + width; - - while (src < end) - { - __m64 a = load8888 (mask); - __m64 s = load8888 (src); - __m64 d = load8888 (dest); - __m64 sa = expand_alpha (s); - - a = pix_multiply (a, sa); - d = pix_multiply (d, a); - store8888 (dest, d); - - ++src; - ++dest; - ++mask; - } - _mm_empty (); -} - -static void -mmx_combine_out_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = src + width; - - while (src < end) - { - __m64 a = load8888 (mask); - __m64 s = load8888 (src); - __m64 d = load8888 (dest); - __m64 da = expand_alpha (d); - - da = negate (da); - s = pix_multiply (s, a); - s = pix_multiply (s, da); - store8888 (dest, s); - - ++src; - ++dest; - ++mask; - } - _mm_empty (); -} - -static void -mmx_combine_out_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = src + width; - - while (src < end) - { - __m64 a = load8888 (mask); - __m64 s = load8888 (src); - __m64 d = load8888 (dest); - __m64 sa = expand_alpha (s); - - a = pix_multiply (a, sa); - a = negate (a); - d = pix_multiply (d, a); - store8888 (dest, d); - - ++src; - ++dest; - ++mask; - } - _mm_empty (); -} - -static void -mmx_combine_atop_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = src + width; - - while (src < end) - { - __m64 a = load8888 (mask); - __m64 s = load8888 (src); - __m64 d = load8888 (dest); - __m64 da = expand_alpha (d); - __m64 sa = expand_alpha (s); - - s = pix_multiply (s, a); - a = pix_multiply (a, sa); - a = negate (a); - d = pix_add_mul (d, a, s, da); - store8888 (dest, d); - - ++src; - ++dest; - ++mask; - } - _mm_empty (); -} - -static void -mmx_combine_atop_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = src + width; - - while (src < end) - { - __m64 a = load8888 (mask); - __m64 s = load8888 (src); - __m64 d = load8888 (dest); - __m64 da = expand_alpha (d); - __m64 sa = expand_alpha (s); - - s = pix_multiply (s, a); - a = pix_multiply (a, sa); - da = negate (da); - d = pix_add_mul (d, a, s, da); - store8888 (dest, d); - - ++src; - ++dest; - ++mask; - } - _mm_empty (); -} - -static void -mmx_combine_xor_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = src + width; - - while (src < end) - { - __m64 a = load8888 (mask); - __m64 s = load8888 (src); - __m64 d = load8888 (dest); - __m64 da = expand_alpha (d); - __m64 sa = expand_alpha (s); - - s = pix_multiply (s, a); - a = pix_multiply (a, sa); - da = negate (da); - a = negate (a); - d = pix_add_mul (d, a, s, da); - store8888 (dest, d); - - ++src; - ++dest; - ++mask; - } - _mm_empty (); -} - -static void -mmx_combine_add_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - const uint32_t *end = src + width; - - while (src < end) - { - __m64 a = load8888 (mask); - __m64 s = load8888 (src); - __m64 d = load8888 (dest); - - s = pix_multiply (s, a); - d = pix_add (s, d); - store8888 (dest, d); - - ++src; - ++dest; - ++mask; - } - _mm_empty (); -} - -/* ------------- MMX code paths called from fbpict.c -------------------- */ - -static void -mmx_composite_over_n_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src; - uint32_t *dst_line, *dst; - int32_t w; - int dst_stride; - __m64 vsrc, vsrca; - - CHECKPOINT (); - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - - vsrc = load8888 (&src); - vsrca = expand_alpha (vsrc); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - w = width; - - CHECKPOINT (); - - while (w && (uintptr_t)dst & 7) - { - store8888 (dst, over (vsrc, vsrca, load8888 (dst))); - - w--; - dst++; - } - - while (w >= 2) - { - __m64 vdest; - __m64 dest0, dest1; - - vdest = *(__m64 *)dst; - - dest0 = over (vsrc, vsrca, expand8888 (vdest, 0)); - dest1 = over (vsrc, vsrca, expand8888 (vdest, 1)); - - *(__m64 *)dst = pack8888 (dest0, dest1); - - dst += 2; - w -= 2; - } - - CHECKPOINT (); - - if (w) - { - store8888 (dst, over (vsrc, vsrca, load8888 (dst))); - } - } - - _mm_empty (); -} - -static void -mmx_composite_over_n_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src; - uint16_t *dst_line, *dst; - int32_t w; - int dst_stride; - __m64 vsrc, vsrca; - - CHECKPOINT (); - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - - vsrc = load8888 (&src); - vsrca = expand_alpha (vsrc); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - w = width; - - CHECKPOINT (); - - while (w && (uintptr_t)dst & 7) - { - uint64_t d = *dst; - __m64 vdest = expand565 (to_m64 (d), 0); - - vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0); - *dst = to_uint64 (vdest); - - w--; - dst++; - } - - while (w >= 4) - { - __m64 vdest = *(__m64 *)dst; - __m64 v0, v1, v2, v3; - - expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0); - - v0 = over (vsrc, vsrca, v0); - v1 = over (vsrc, vsrca, v1); - v2 = over (vsrc, vsrca, v2); - v3 = over (vsrc, vsrca, v3); - - *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3); - - dst += 4; - w -= 4; - } - - CHECKPOINT (); - - while (w) - { - uint64_t d = *dst; - __m64 vdest = expand565 (to_m64 (d), 0); - - vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0); - *dst = to_uint64 (vdest); - - w--; - dst++; - } - } - - _mm_empty (); -} - -static void -mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src; - uint32_t *dst_line; - uint32_t *mask_line; - int dst_stride, mask_stride; - __m64 vsrc, vsrca; - - CHECKPOINT (); - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - - vsrc = load8888 (&src); - vsrca = expand_alpha (vsrc); - - while (height--) - { - int twidth = width; - uint32_t *p = (uint32_t *)mask_line; - uint32_t *q = (uint32_t *)dst_line; - - while (twidth && (uintptr_t)q & 7) - { - uint32_t m = *(uint32_t *)p; - - if (m) - { - __m64 vdest = load8888 (q); - vdest = in_over (vsrc, vsrca, load8888 (&m), vdest); - store8888 (q, vdest); - } - - twidth--; - p++; - q++; - } - - while (twidth >= 2) - { - uint32_t m0, m1; - m0 = *p; - m1 = *(p + 1); - - if (m0 | m1) - { - __m64 dest0, dest1; - __m64 vdest = *(__m64 *)q; - - dest0 = in_over (vsrc, vsrca, load8888 (&m0), - expand8888 (vdest, 0)); - dest1 = in_over (vsrc, vsrca, load8888 (&m1), - expand8888 (vdest, 1)); - - *(__m64 *)q = pack8888 (dest0, dest1); - } - - p += 2; - q += 2; - twidth -= 2; - } - - if (twidth) - { - uint32_t m = *(uint32_t *)p; - - if (m) - { - __m64 vdest = load8888 (q); - vdest = in_over (vsrc, vsrca, load8888 (&m), vdest); - store8888 (q, vdest); - } - - twidth--; - p++; - q++; - } - - dst_line += dst_stride; - mask_line += mask_stride; - } - - _mm_empty (); -} - -static void -mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - uint32_t mask; - __m64 vmask; - int dst_stride, src_stride; - int32_t w; - - CHECKPOINT (); - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); - vmask = expand_alpha (load8888 (&mask)); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w && (uintptr_t)dst & 7) - { - __m64 s = load8888 (src); - __m64 d = load8888 (dst); - - store8888 (dst, in_over (s, expand_alpha (s), vmask, d)); - - w--; - dst++; - src++; - } - - while (w >= 2) - { - __m64 vs = ldq_u ((__m64 *)src); - __m64 vd = *(__m64 *)dst; - __m64 vsrc0 = expand8888 (vs, 0); - __m64 vsrc1 = expand8888 (vs, 1); - - *(__m64 *)dst = pack8888 ( - in_over (vsrc0, expand_alpha (vsrc0), vmask, expand8888 (vd, 0)), - in_over (vsrc1, expand_alpha (vsrc1), vmask, expand8888 (vd, 1))); - - w -= 2; - dst += 2; - src += 2; - } - - if (w) - { - __m64 s = load8888 (src); - __m64 d = load8888 (dst); - - store8888 (dst, in_over (s, expand_alpha (s), vmask, d)); - } - } - - _mm_empty (); -} - -static void -mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - uint32_t mask; - __m64 vmask; - int dst_stride, src_stride; - int32_t w; - __m64 srca; - - CHECKPOINT (); - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); - - vmask = expand_alpha (load8888 (&mask)); - srca = MC (4x00ff); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w && (uintptr_t)dst & 7) - { - uint32_t ssrc = *src | 0xff000000; - __m64 s = load8888 (&ssrc); - __m64 d = load8888 (dst); - - store8888 (dst, in_over (s, srca, vmask, d)); - - w--; - dst++; - src++; - } - - while (w >= 16) - { - __m64 vd0 = *(__m64 *)(dst + 0); - __m64 vd1 = *(__m64 *)(dst + 2); - __m64 vd2 = *(__m64 *)(dst + 4); - __m64 vd3 = *(__m64 *)(dst + 6); - __m64 vd4 = *(__m64 *)(dst + 8); - __m64 vd5 = *(__m64 *)(dst + 10); - __m64 vd6 = *(__m64 *)(dst + 12); - __m64 vd7 = *(__m64 *)(dst + 14); - - __m64 vs0 = ldq_u ((__m64 *)(src + 0)); - __m64 vs1 = ldq_u ((__m64 *)(src + 2)); - __m64 vs2 = ldq_u ((__m64 *)(src + 4)); - __m64 vs3 = ldq_u ((__m64 *)(src + 6)); - __m64 vs4 = ldq_u ((__m64 *)(src + 8)); - __m64 vs5 = ldq_u ((__m64 *)(src + 10)); - __m64 vs6 = ldq_u ((__m64 *)(src + 12)); - __m64 vs7 = ldq_u ((__m64 *)(src + 14)); - - vd0 = pack8888 ( - in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)), - in_over (expandx888 (vs0, 1), srca, vmask, expand8888 (vd0, 1))); - - vd1 = pack8888 ( - in_over (expandx888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)), - in_over (expandx888 (vs1, 1), srca, vmask, expand8888 (vd1, 1))); - - vd2 = pack8888 ( - in_over (expandx888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)), - in_over (expandx888 (vs2, 1), srca, vmask, expand8888 (vd2, 1))); - - vd3 = pack8888 ( - in_over (expandx888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)), - in_over (expandx888 (vs3, 1), srca, vmask, expand8888 (vd3, 1))); - - vd4 = pack8888 ( - in_over (expandx888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)), - in_over (expandx888 (vs4, 1), srca, vmask, expand8888 (vd4, 1))); - - vd5 = pack8888 ( - in_over (expandx888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)), - in_over (expandx888 (vs5, 1), srca, vmask, expand8888 (vd5, 1))); - - vd6 = pack8888 ( - in_over (expandx888 (vs6, 0), srca, vmask, expand8888 (vd6, 0)), - in_over (expandx888 (vs6, 1), srca, vmask, expand8888 (vd6, 1))); - - vd7 = pack8888 ( - in_over (expandx888 (vs7, 0), srca, vmask, expand8888 (vd7, 0)), - in_over (expandx888 (vs7, 1), srca, vmask, expand8888 (vd7, 1))); - - *(__m64 *)(dst + 0) = vd0; - *(__m64 *)(dst + 2) = vd1; - *(__m64 *)(dst + 4) = vd2; - *(__m64 *)(dst + 6) = vd3; - *(__m64 *)(dst + 8) = vd4; - *(__m64 *)(dst + 10) = vd5; - *(__m64 *)(dst + 12) = vd6; - *(__m64 *)(dst + 14) = vd7; - - w -= 16; - dst += 16; - src += 16; - } - - while (w) - { - uint32_t ssrc = *src | 0xff000000; - __m64 s = load8888 (&ssrc); - __m64 d = load8888 (dst); - - store8888 (dst, in_over (s, srca, vmask, d)); - - w--; - dst++; - src++; - } - } - - _mm_empty (); -} - -static void -mmx_composite_over_8888_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - uint32_t s; - int dst_stride, src_stride; - uint8_t a; - int32_t w; - - CHECKPOINT (); - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - a = s >> 24; - - if (a == 0xff) - { - *dst = s; - } - else if (s) - { - __m64 ms, sa; - ms = load8888 (&s); - sa = expand_alpha (ms); - store8888 (dst, over (ms, sa, load8888 (dst))); - } - - dst++; - } - } - _mm_empty (); -} - -static void -mmx_composite_over_8888_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint16_t *dst_line, *dst; - uint32_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; - - CHECKPOINT (); - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - -#if 0 - /* FIXME */ - assert (src_image->drawable == mask_image->drawable); -#endif - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - CHECKPOINT (); - - while (w && (uintptr_t)dst & 7) - { - __m64 vsrc = load8888 (src); - uint64_t d = *dst; - __m64 vdest = expand565 (to_m64 (d), 0); - - vdest = pack_565 ( - over (vsrc, expand_alpha (vsrc), vdest), vdest, 0); - - *dst = to_uint64 (vdest); - - w--; - dst++; - src++; - } - - CHECKPOINT (); - - while (w >= 4) - { - __m64 vdest = *(__m64 *)dst; - __m64 v0, v1, v2, v3; - __m64 vsrc0, vsrc1, vsrc2, vsrc3; - - expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0); - - vsrc0 = load8888 ((src + 0)); - vsrc1 = load8888 ((src + 1)); - vsrc2 = load8888 ((src + 2)); - vsrc3 = load8888 ((src + 3)); - - v0 = over (vsrc0, expand_alpha (vsrc0), v0); - v1 = over (vsrc1, expand_alpha (vsrc1), v1); - v2 = over (vsrc2, expand_alpha (vsrc2), v2); - v3 = over (vsrc3, expand_alpha (vsrc3), v3); - - *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3); - - w -= 4; - dst += 4; - src += 4; - } - - CHECKPOINT (); - - while (w) - { - __m64 vsrc = load8888 (src); - uint64_t d = *dst; - __m64 vdest = expand565 (to_m64 (d), 0); - - vdest = pack_565 (over (vsrc, expand_alpha (vsrc), vdest), vdest, 0); - - *dst = to_uint64 (vdest); - - w--; - dst++; - src++; - } - } - - _mm_empty (); -} - -static void -mmx_composite_over_n_8_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, srca; - uint32_t *dst_line, *dst; - uint8_t *mask_line, *mask; - int dst_stride, mask_stride; - int32_t w; - __m64 vsrc, vsrca; - uint64_t srcsrc; - - CHECKPOINT (); - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - srca = src >> 24; - if (src == 0) - return; - - srcsrc = (uint64_t)src << 32 | src; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - vsrc = load8888 (&src); - vsrca = expand_alpha (vsrc); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - CHECKPOINT (); - - while (w && (uintptr_t)dst & 7) - { - uint64_t m = *mask; - - if (m) - { - __m64 vdest = in_over (vsrc, vsrca, - expand_alpha_rev (to_m64 (m)), - load8888 (dst)); - - store8888 (dst, vdest); - } - - w--; - mask++; - dst++; - } - - CHECKPOINT (); - - while (w >= 2) - { - uint64_t m0, m1; - - m0 = *mask; - m1 = *(mask + 1); - - if (srca == 0xff && (m0 & m1) == 0xff) - { - *(uint64_t *)dst = srcsrc; - } - else if (m0 | m1) - { - __m64 vdest; - __m64 dest0, dest1; - - vdest = *(__m64 *)dst; - - dest0 = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m0)), - expand8888 (vdest, 0)); - dest1 = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m1)), - expand8888 (vdest, 1)); - - *(__m64 *)dst = pack8888 (dest0, dest1); - } - - mask += 2; - dst += 2; - w -= 2; - } - - CHECKPOINT (); - - if (w) - { - uint64_t m = *mask; - - if (m) - { - __m64 vdest = load8888 (dst); - - vdest = in_over ( - vsrc, vsrca, expand_alpha_rev (to_m64 (m)), vdest); - store8888 (dst, vdest); - } - } - } - - _mm_empty (); -} - -static pixman_bool_t -mmx_fill (pixman_implementation_t *imp, - uint32_t * bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t filler) -{ - uint64_t fill; - __m64 vfill; - uint32_t byte_width; - uint8_t *byte_line; - -#if defined __GNUC__ && defined USE_X86_MMX - __m64 v1, v2, v3, v4, v5, v6, v7; -#endif - - if (bpp != 16 && bpp != 32 && bpp != 8) - return FALSE; - - if (bpp == 8) - { - stride = stride * (int) sizeof (uint32_t) / 1; - byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x); - byte_width = width; - stride *= 1; - filler = (filler & 0xff) * 0x01010101; - } - else if (bpp == 16) - { - stride = stride * (int) sizeof (uint32_t) / 2; - byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); - byte_width = 2 * width; - stride *= 2; - filler = (filler & 0xffff) * 0x00010001; - } - else - { - stride = stride * (int) sizeof (uint32_t) / 4; - byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); - byte_width = 4 * width; - stride *= 4; - } - - fill = ((uint64_t)filler << 32) | filler; - vfill = to_m64 (fill); - -#if defined __GNUC__ && defined USE_X86_MMX - __asm__ ( - "movq %7, %0\n" - "movq %7, %1\n" - "movq %7, %2\n" - "movq %7, %3\n" - "movq %7, %4\n" - "movq %7, %5\n" - "movq %7, %6\n" - : "=&y" (v1), "=&y" (v2), "=&y" (v3), - "=&y" (v4), "=&y" (v5), "=&y" (v6), "=y" (v7) - : "y" (vfill)); -#endif - - while (height--) - { - int w; - uint8_t *d = byte_line; - - byte_line += stride; - w = byte_width; - - if (w >= 1 && ((uintptr_t)d & 1)) - { - *(uint8_t *)d = (filler & 0xff); - w--; - d++; - } - - if (w >= 2 && ((uintptr_t)d & 3)) - { - *(uint16_t *)d = filler; - w -= 2; - d += 2; - } - - while (w >= 4 && ((uintptr_t)d & 7)) - { - *(uint32_t *)d = filler; - - w -= 4; - d += 4; - } - - while (w >= 64) - { -#if defined __GNUC__ && defined USE_X86_MMX - __asm__ ( - "movq %1, (%0)\n" - "movq %2, 8(%0)\n" - "movq %3, 16(%0)\n" - "movq %4, 24(%0)\n" - "movq %5, 32(%0)\n" - "movq %6, 40(%0)\n" - "movq %7, 48(%0)\n" - "movq %8, 56(%0)\n" - : - : "r" (d), - "y" (vfill), "y" (v1), "y" (v2), "y" (v3), - "y" (v4), "y" (v5), "y" (v6), "y" (v7) - : "memory"); -#else - *(__m64*) (d + 0) = vfill; - *(__m64*) (d + 8) = vfill; - *(__m64*) (d + 16) = vfill; - *(__m64*) (d + 24) = vfill; - *(__m64*) (d + 32) = vfill; - *(__m64*) (d + 40) = vfill; - *(__m64*) (d + 48) = vfill; - *(__m64*) (d + 56) = vfill; -#endif - w -= 64; - d += 64; - } - - while (w >= 4) - { - *(uint32_t *)d = filler; - - w -= 4; - d += 4; - } - if (w >= 2) - { - *(uint16_t *)d = filler; - w -= 2; - d += 2; - } - if (w >= 1) - { - *(uint8_t *)d = (filler & 0xff); - w--; - d++; - } - - } - - _mm_empty (); - return TRUE; -} - -static void -mmx_composite_src_x888_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint16_t *dst_line, *dst; - uint32_t *src_line, *src, s; - int dst_stride, src_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w && (uintptr_t)dst & 7) - { - s = *src++; - *dst = convert_8888_to_0565 (s); - dst++; - w--; - } - - while (w >= 4) - { - __m64 vdest; - __m64 vsrc0 = ldq_u ((__m64 *)(src + 0)); - __m64 vsrc1 = ldq_u ((__m64 *)(src + 2)); - - vdest = pack_4xpacked565 (vsrc0, vsrc1); - - *(__m64 *)dst = vdest; - - w -= 4; - src += 4; - dst += 4; - } - - while (w) - { - s = *src++; - *dst = convert_8888_to_0565 (s); - dst++; - w--; - } - } - - _mm_empty (); -} - -static void -mmx_composite_src_n_8_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, srca; - uint32_t *dst_line, *dst; - uint8_t *mask_line, *mask; - int dst_stride, mask_stride; - int32_t w; - __m64 vsrc; - uint64_t srcsrc; - - CHECKPOINT (); - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - srca = src >> 24; - if (src == 0) - { - mmx_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride, - PIXMAN_FORMAT_BPP (dest_image->bits.format), - dest_x, dest_y, width, height, 0); - return; - } - - srcsrc = (uint64_t)src << 32 | src; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - vsrc = load8888 (&src); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - CHECKPOINT (); - - while (w && (uintptr_t)dst & 7) - { - uint64_t m = *mask; - - if (m) - { - __m64 vdest = in (vsrc, expand_alpha_rev (to_m64 (m))); - - store8888 (dst, vdest); - } - else - { - *dst = 0; - } - - w--; - mask++; - dst++; - } - - CHECKPOINT (); - - while (w >= 2) - { - uint64_t m0, m1; - m0 = *mask; - m1 = *(mask + 1); - - if (srca == 0xff && (m0 & m1) == 0xff) - { - *(uint64_t *)dst = srcsrc; - } - else if (m0 | m1) - { - __m64 dest0, dest1; - - dest0 = in (vsrc, expand_alpha_rev (to_m64 (m0))); - dest1 = in (vsrc, expand_alpha_rev (to_m64 (m1))); - - *(__m64 *)dst = pack8888 (dest0, dest1); - } - else - { - *(uint64_t *)dst = 0; - } - - mask += 2; - dst += 2; - w -= 2; - } - - CHECKPOINT (); - - if (w) - { - uint64_t m = *mask; - - if (m) - { - __m64 vdest = load8888 (dst); - - vdest = in (vsrc, expand_alpha_rev (to_m64 (m))); - store8888 (dst, vdest); - } - else - { - *dst = 0; - } - } - } - - _mm_empty (); -} - -static void -mmx_composite_over_n_8_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, srca; - uint16_t *dst_line, *dst; - uint8_t *mask_line, *mask; - int dst_stride, mask_stride; - int32_t w; - __m64 vsrc, vsrca, tmp; - __m64 srcsrcsrcsrc; - - CHECKPOINT (); - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - srca = src >> 24; - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - vsrc = load8888 (&src); - vsrca = expand_alpha (vsrc); - - tmp = pack_565 (vsrc, _mm_setzero_si64 (), 0); - srcsrcsrcsrc = expand_alpha_rev (tmp); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - CHECKPOINT (); - - while (w && (uintptr_t)dst & 7) - { - uint64_t m = *mask; - - if (m) - { - uint64_t d = *dst; - __m64 vd = to_m64 (d); - __m64 vdest = in_over ( - vsrc, vsrca, expand_alpha_rev (to_m64 (m)), expand565 (vd, 0)); - - vd = pack_565 (vdest, _mm_setzero_si64 (), 0); - *dst = to_uint64 (vd); - } - - w--; - mask++; - dst++; - } - - CHECKPOINT (); - - while (w >= 4) - { - uint64_t m0, m1, m2, m3; - m0 = *mask; - m1 = *(mask + 1); - m2 = *(mask + 2); - m3 = *(mask + 3); - - if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff) - { - *(__m64 *)dst = srcsrcsrcsrc; - } - else if (m0 | m1 | m2 | m3) - { - __m64 vdest = *(__m64 *)dst; - __m64 v0, v1, v2, v3; - __m64 vm0, vm1, vm2, vm3; - - expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0); - - vm0 = to_m64 (m0); - v0 = in_over (vsrc, vsrca, expand_alpha_rev (vm0), v0); - - vm1 = to_m64 (m1); - v1 = in_over (vsrc, vsrca, expand_alpha_rev (vm1), v1); - - vm2 = to_m64 (m2); - v2 = in_over (vsrc, vsrca, expand_alpha_rev (vm2), v2); - - vm3 = to_m64 (m3); - v3 = in_over (vsrc, vsrca, expand_alpha_rev (vm3), v3); - - *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);; - } - - w -= 4; - mask += 4; - dst += 4; - } - - CHECKPOINT (); - - while (w) - { - uint64_t m = *mask; - - if (m) - { - uint64_t d = *dst; - __m64 vd = to_m64 (d); - __m64 vdest = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m)), - expand565 (vd, 0)); - vd = pack_565 (vdest, _mm_setzero_si64 (), 0); - *dst = to_uint64 (vd); - } - - w--; - mask++; - dst++; - } - } - - _mm_empty (); -} - -static void -mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint16_t *dst_line, *dst; - uint32_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; - - CHECKPOINT (); - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - -#if 0 - /* FIXME */ - assert (src_image->drawable == mask_image->drawable); -#endif - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - CHECKPOINT (); - - while (w && (uintptr_t)dst & 7) - { - __m64 vsrc = load8888 (src); - uint64_t d = *dst; - __m64 vdest = expand565 (to_m64 (d), 0); - - vdest = pack_565 (over_rev_non_pre (vsrc, vdest), vdest, 0); - - *dst = to_uint64 (vdest); - - w--; - dst++; - src++; - } - - CHECKPOINT (); - - while (w >= 4) - { - uint32_t s0, s1, s2, s3; - unsigned char a0, a1, a2, a3; - - s0 = *src; - s1 = *(src + 1); - s2 = *(src + 2); - s3 = *(src + 3); - - a0 = (s0 >> 24); - a1 = (s1 >> 24); - a2 = (s2 >> 24); - a3 = (s3 >> 24); - - if ((a0 & a1 & a2 & a3) == 0xFF) - { - __m64 v0 = invert_colors (load8888 (&s0)); - __m64 v1 = invert_colors (load8888 (&s1)); - __m64 v2 = invert_colors (load8888 (&s2)); - __m64 v3 = invert_colors (load8888 (&s3)); - - *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3); - } - else if (s0 | s1 | s2 | s3) - { - __m64 vdest = *(__m64 *)dst; - __m64 v0, v1, v2, v3; - - __m64 vsrc0 = load8888 (&s0); - __m64 vsrc1 = load8888 (&s1); - __m64 vsrc2 = load8888 (&s2); - __m64 vsrc3 = load8888 (&s3); - - expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0); - - v0 = over_rev_non_pre (vsrc0, v0); - v1 = over_rev_non_pre (vsrc1, v1); - v2 = over_rev_non_pre (vsrc2, v2); - v3 = over_rev_non_pre (vsrc3, v3); - - *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3); - } - - w -= 4; - dst += 4; - src += 4; - } - - CHECKPOINT (); - - while (w) - { - __m64 vsrc = load8888 (src); - uint64_t d = *dst; - __m64 vdest = expand565 (to_m64 (d), 0); - - vdest = pack_565 (over_rev_non_pre (vsrc, vdest), vdest, 0); - - *dst = to_uint64 (vdest); - - w--; - dst++; - src++; - } - } - - _mm_empty (); -} - -static void -mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; - - CHECKPOINT (); - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - -#if 0 - /* FIXME */ - assert (src_image->drawable == mask_image->drawable); -#endif - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w && (uintptr_t)dst & 7) - { - __m64 s = load8888 (src); - __m64 d = load8888 (dst); - - store8888 (dst, over_rev_non_pre (s, d)); - - w--; - dst++; - src++; - } - - while (w >= 2) - { - uint32_t s0, s1; - unsigned char a0, a1; - __m64 d0, d1; - - s0 = *src; - s1 = *(src + 1); - - a0 = (s0 >> 24); - a1 = (s1 >> 24); - - if ((a0 & a1) == 0xFF) - { - d0 = invert_colors (load8888 (&s0)); - d1 = invert_colors (load8888 (&s1)); - - *(__m64 *)dst = pack8888 (d0, d1); - } - else if (s0 | s1) - { - __m64 vdest = *(__m64 *)dst; - - d0 = over_rev_non_pre (load8888 (&s0), expand8888 (vdest, 0)); - d1 = over_rev_non_pre (load8888 (&s1), expand8888 (vdest, 1)); - - *(__m64 *)dst = pack8888 (d0, d1); - } - - w -= 2; - dst += 2; - src += 2; - } - - if (w) - { - __m64 s = load8888 (src); - __m64 d = load8888 (dst); - - store8888 (dst, over_rev_non_pre (s, d)); - } - } - - _mm_empty (); -} - -static void -mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src; - uint16_t *dst_line; - uint32_t *mask_line; - int dst_stride, mask_stride; - __m64 vsrc, vsrca; - - CHECKPOINT (); - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - - vsrc = load8888 (&src); - vsrca = expand_alpha (vsrc); - - while (height--) - { - int twidth = width; - uint32_t *p = (uint32_t *)mask_line; - uint16_t *q = (uint16_t *)dst_line; - - while (twidth && ((uintptr_t)q & 7)) - { - uint32_t m = *(uint32_t *)p; - - if (m) - { - uint64_t d = *q; - __m64 vdest = expand565 (to_m64 (d), 0); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0); - *q = to_uint64 (vdest); - } - - twidth--; - p++; - q++; - } - - while (twidth >= 4) - { - uint32_t m0, m1, m2, m3; - - m0 = *p; - m1 = *(p + 1); - m2 = *(p + 2); - m3 = *(p + 3); - - if ((m0 | m1 | m2 | m3)) - { - __m64 vdest = *(__m64 *)q; - __m64 v0, v1, v2, v3; - - expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0); - - v0 = in_over (vsrc, vsrca, load8888 (&m0), v0); - v1 = in_over (vsrc, vsrca, load8888 (&m1), v1); - v2 = in_over (vsrc, vsrca, load8888 (&m2), v2); - v3 = in_over (vsrc, vsrca, load8888 (&m3), v3); - - *(__m64 *)q = pack_4x565 (v0, v1, v2, v3); - } - twidth -= 4; - p += 4; - q += 4; - } - - while (twidth) - { - uint32_t m; - - m = *(uint32_t *)p; - if (m) - { - uint64_t d = *q; - __m64 vdest = expand565 (to_m64 (d), 0); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0); - *q = to_uint64 (vdest); - } - - twidth--; - p++; - q++; - } - - mask_line += mask_stride; - dst_line += dst_stride; - } - - _mm_empty (); -} - -static void -mmx_composite_in_n_8_8 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint8_t *dst_line, *dst; - uint8_t *mask_line, *mask; - int dst_stride, mask_stride; - int32_t w; - uint32_t src; - uint8_t sa; - __m64 vsrc, vsrca; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - sa = src >> 24; - - vsrc = load8888 (&src); - vsrca = expand_alpha (vsrc); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w && (uintptr_t)dst & 7) - { - uint16_t tmp; - uint8_t a; - uint32_t m, d; - - a = *mask++; - d = *dst; - - m = MUL_UN8 (sa, a, tmp); - d = MUL_UN8 (m, d, tmp); - - *dst++ = d; - w--; - } - - while (w >= 4) - { - __m64 vmask; - __m64 vdest; - - vmask = load8888u ((uint32_t *)mask); - vdest = load8888 ((uint32_t *)dst); - - store8888 ((uint32_t *)dst, in (in (vsrca, vmask), vdest)); - - dst += 4; - mask += 4; - w -= 4; - } - - while (w--) - { - uint16_t tmp; - uint8_t a; - uint32_t m, d; - - a = *mask++; - d = *dst; - - m = MUL_UN8 (sa, a, tmp); - d = MUL_UN8 (m, d, tmp); - - *dst++ = d; - } - } - - _mm_empty (); -} - -static void -mmx_composite_in_8_8 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint8_t *dst_line, *dst; - uint8_t *src_line, *src; - int src_stride, dst_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w && (uintptr_t)dst & 3) - { - uint8_t s, d; - uint16_t tmp; - - s = *src; - d = *dst; - - *dst = MUL_UN8 (s, d, tmp); - - src++; - dst++; - w--; - } - - while (w >= 4) - { - uint32_t *s = (uint32_t *)src; - uint32_t *d = (uint32_t *)dst; - - store8888 (d, in (load8888u (s), load8888 (d))); - - w -= 4; - dst += 4; - src += 4; - } - - while (w--) - { - uint8_t s, d; - uint16_t tmp; - - s = *src; - d = *dst; - - *dst = MUL_UN8 (s, d, tmp); - - src++; - dst++; - } - } - - _mm_empty (); -} - -static void -mmx_composite_add_n_8_8 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint8_t *dst_line, *dst; - uint8_t *mask_line, *mask; - int dst_stride, mask_stride; - int32_t w; - uint32_t src; - uint8_t sa; - __m64 vsrc, vsrca; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - sa = src >> 24; - - if (src == 0) - return; - - vsrc = load8888 (&src); - vsrca = expand_alpha (vsrc); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w && (uintptr_t)dst & 3) - { - uint16_t tmp; - uint16_t a; - uint32_t m, d; - uint32_t r; - - a = *mask++; - d = *dst; - - m = MUL_UN8 (sa, a, tmp); - r = ADD_UN8 (m, d, tmp); - - *dst++ = r; - w--; - } - - while (w >= 4) - { - __m64 vmask; - __m64 vdest; - - vmask = load8888u ((uint32_t *)mask); - vdest = load8888 ((uint32_t *)dst); - - store8888 ((uint32_t *)dst, _mm_adds_pu8 (in (vsrca, vmask), vdest)); - - dst += 4; - mask += 4; - w -= 4; - } - - while (w--) - { - uint16_t tmp; - uint16_t a; - uint32_t m, d; - uint32_t r; - - a = *mask++; - d = *dst; - - m = MUL_UN8 (sa, a, tmp); - r = ADD_UN8 (m, d, tmp); - - *dst++ = r; - } - } - - _mm_empty (); -} - -static void -mmx_composite_add_8_8 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint8_t *dst_line, *dst; - uint8_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; - uint8_t s, d; - uint16_t t; - - CHECKPOINT (); - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w && (uintptr_t)dst & 7) - { - s = *src; - d = *dst; - t = d + s; - s = t | (0 - (t >> 8)); - *dst = s; - - dst++; - src++; - w--; - } - - while (w >= 8) - { - *(__m64*)dst = _mm_adds_pu8 (ldq_u ((__m64 *)src), *(__m64*)dst); - dst += 8; - src += 8; - w -= 8; - } - - while (w) - { - s = *src; - d = *dst; - t = d + s; - s = t | (0 - (t >> 8)); - *dst = s; - - dst++; - src++; - w--; - } - } - - _mm_empty (); -} - -static void -mmx_composite_add_0565_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint16_t *dst_line, *dst; - uint32_t d; - uint16_t *src_line, *src; - uint32_t s; - int dst_stride, src_stride; - int32_t w; - - CHECKPOINT (); - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w && (uintptr_t)dst & 7) - { - s = *src++; - if (s) - { - d = *dst; - s = convert_0565_to_8888 (s); - if (d) - { - d = convert_0565_to_8888 (d); - UN8x4_ADD_UN8x4 (s, d); - } - *dst = convert_8888_to_0565 (s); - } - dst++; - w--; - } - - while (w >= 4) - { - __m64 vdest = *(__m64 *)dst; - __m64 vsrc = ldq_u ((__m64 *)src); - __m64 vd0, vd1; - __m64 vs0, vs1; - - expand_4xpacked565 (vdest, &vd0, &vd1, 0); - expand_4xpacked565 (vsrc, &vs0, &vs1, 0); - - vd0 = _mm_adds_pu8 (vd0, vs0); - vd1 = _mm_adds_pu8 (vd1, vs1); - - *(__m64 *)dst = pack_4xpacked565 (vd0, vd1); - - dst += 4; - src += 4; - w -= 4; - } - - while (w--) - { - s = *src++; - if (s) - { - d = *dst; - s = convert_0565_to_8888 (s); - if (d) - { - d = convert_0565_to_8888 (d); - UN8x4_ADD_UN8x4 (s, d); - } - *dst = convert_8888_to_0565 (s); - } - dst++; - } - } - - _mm_empty (); -} - -static void -mmx_composite_add_8888_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; - - CHECKPOINT (); - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w && (uintptr_t)dst & 7) - { - store (dst, _mm_adds_pu8 (load ((const uint32_t *)src), - load ((const uint32_t *)dst))); - dst++; - src++; - w--; - } - - while (w >= 2) - { - *(__m64 *)dst = _mm_adds_pu8 (ldq_u ((__m64 *)src), *(__m64*)dst); - dst += 2; - src += 2; - w -= 2; - } - - if (w) - { - store (dst, _mm_adds_pu8 (load ((const uint32_t *)src), - load ((const uint32_t *)dst))); - - } - } - - _mm_empty (); -} - -static pixman_bool_t -mmx_blt (pixman_implementation_t *imp, - uint32_t * src_bits, - uint32_t * dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dest_x, - int dest_y, - int width, - int height) -{ - uint8_t * src_bytes; - uint8_t * dst_bytes; - int byte_width; - - if (src_bpp != dst_bpp) - return FALSE; - - if (src_bpp == 16) - { - src_stride = src_stride * (int) sizeof (uint32_t) / 2; - dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; - src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); - dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); - byte_width = 2 * width; - src_stride *= 2; - dst_stride *= 2; - } - else if (src_bpp == 32) - { - src_stride = src_stride * (int) sizeof (uint32_t) / 4; - dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; - src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); - dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); - byte_width = 4 * width; - src_stride *= 4; - dst_stride *= 4; - } - else - { - return FALSE; - } - - while (height--) - { - int w; - uint8_t *s = src_bytes; - uint8_t *d = dst_bytes; - src_bytes += src_stride; - dst_bytes += dst_stride; - w = byte_width; - - if (w >= 1 && ((uintptr_t)d & 1)) - { - *(uint8_t *)d = *(uint8_t *)s; - w -= 1; - s += 1; - d += 1; - } - - if (w >= 2 && ((uintptr_t)d & 3)) - { - *(uint16_t *)d = *(uint16_t *)s; - w -= 2; - s += 2; - d += 2; - } - - while (w >= 4 && ((uintptr_t)d & 7)) - { - *(uint32_t *)d = ldl_u ((uint32_t *)s); - - w -= 4; - s += 4; - d += 4; - } - - while (w >= 64) - { -#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined USE_X86_MMX - __asm__ ( - "movq (%1), %%mm0\n" - "movq 8(%1), %%mm1\n" - "movq 16(%1), %%mm2\n" - "movq 24(%1), %%mm3\n" - "movq 32(%1), %%mm4\n" - "movq 40(%1), %%mm5\n" - "movq 48(%1), %%mm6\n" - "movq 56(%1), %%mm7\n" - - "movq %%mm0, (%0)\n" - "movq %%mm1, 8(%0)\n" - "movq %%mm2, 16(%0)\n" - "movq %%mm3, 24(%0)\n" - "movq %%mm4, 32(%0)\n" - "movq %%mm5, 40(%0)\n" - "movq %%mm6, 48(%0)\n" - "movq %%mm7, 56(%0)\n" - : - : "r" (d), "r" (s) - : "memory", - "%mm0", "%mm1", "%mm2", "%mm3", - "%mm4", "%mm5", "%mm6", "%mm7"); -#else - __m64 v0 = ldq_u ((__m64 *)(s + 0)); - __m64 v1 = ldq_u ((__m64 *)(s + 8)); - __m64 v2 = ldq_u ((__m64 *)(s + 16)); - __m64 v3 = ldq_u ((__m64 *)(s + 24)); - __m64 v4 = ldq_u ((__m64 *)(s + 32)); - __m64 v5 = ldq_u ((__m64 *)(s + 40)); - __m64 v6 = ldq_u ((__m64 *)(s + 48)); - __m64 v7 = ldq_u ((__m64 *)(s + 56)); - *(__m64 *)(d + 0) = v0; - *(__m64 *)(d + 8) = v1; - *(__m64 *)(d + 16) = v2; - *(__m64 *)(d + 24) = v3; - *(__m64 *)(d + 32) = v4; - *(__m64 *)(d + 40) = v5; - *(__m64 *)(d + 48) = v6; - *(__m64 *)(d + 56) = v7; -#endif - - w -= 64; - s += 64; - d += 64; - } - while (w >= 4) - { - *(uint32_t *)d = ldl_u ((uint32_t *)s); - - w -= 4; - s += 4; - d += 4; - } - if (w >= 2) - { - *(uint16_t *)d = *(uint16_t *)s; - w -= 2; - s += 2; - d += 2; - } - } - - _mm_empty (); - - return TRUE; -} - -static void -mmx_composite_copy_area (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - - mmx_blt (imp, src_image->bits.bits, - dest_image->bits.bits, - src_image->bits.rowstride, - dest_image->bits.rowstride, - PIXMAN_FORMAT_BPP (src_image->bits.format), - PIXMAN_FORMAT_BPP (dest_image->bits.format), - src_x, src_y, dest_x, dest_y, width, height); -} - -static void -mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *src, *src_line; - uint32_t *dst, *dst_line; - uint8_t *mask, *mask_line; - int src_stride, mask_stride, dst_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - src = src_line; - src_line += src_stride; - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - - w = width; - - while (w--) - { - uint64_t m = *mask; - - if (m) - { - uint32_t ssrc = *src | 0xff000000; - __m64 s = load8888 (&ssrc); - - if (m == 0xff) - { - store8888 (dst, s); - } - else - { - __m64 sa = expand_alpha (s); - __m64 vm = expand_alpha_rev (to_m64 (m)); - __m64 vdest = in_over (s, sa, vm, load8888 (dst)); - - store8888 (dst, vdest); - } - } - - mask++; - dst++; - src++; - } - } - - _mm_empty (); -} - -static void -mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src; - uint32_t *dst_line, *dst; - int32_t w; - int dst_stride; - __m64 vsrc; - - CHECKPOINT (); - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - - vsrc = load8888 (&src); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - w = width; - - CHECKPOINT (); - - while (w && (uintptr_t)dst & 7) - { - __m64 vdest = load8888 (dst); - - store8888 (dst, over (vdest, expand_alpha (vdest), vsrc)); - - w--; - dst++; - } - - while (w >= 2) - { - __m64 vdest = *(__m64 *)dst; - __m64 dest0 = expand8888 (vdest, 0); - __m64 dest1 = expand8888 (vdest, 1); - - - dest0 = over (dest0, expand_alpha (dest0), vsrc); - dest1 = over (dest1, expand_alpha (dest1), vsrc); - - *(__m64 *)dst = pack8888 (dest0, dest1); - - dst += 2; - w -= 2; - } - - CHECKPOINT (); - - if (w) - { - __m64 vdest = load8888 (dst); - - store8888 (dst, over (vdest, expand_alpha (vdest), vsrc)); - } - } - - _mm_empty (); -} - -static force_inline void -scaled_nearest_scanline_mmx_8888_8888_OVER (uint32_t* pd, - const uint32_t* ps, - int32_t w, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t src_width_fixed, - pixman_bool_t fully_transparent_src) -{ - if (fully_transparent_src) - return; - - while (w) - { - __m64 d = load (pd); - __m64 s = load (ps + pixman_fixed_to_int (vx)); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - - store8888 (pd, core_combine_over_u_pixel_mmx (s, d)); - pd++; - - w--; - } - - _mm_empty (); -} - -FAST_NEAREST_MAINLOOP (mmx_8888_8888_cover_OVER, - scaled_nearest_scanline_mmx_8888_8888_OVER, - uint32_t, uint32_t, COVER) -FAST_NEAREST_MAINLOOP (mmx_8888_8888_none_OVER, - scaled_nearest_scanline_mmx_8888_8888_OVER, - uint32_t, uint32_t, NONE) -FAST_NEAREST_MAINLOOP (mmx_8888_8888_pad_OVER, - scaled_nearest_scanline_mmx_8888_8888_OVER, - uint32_t, uint32_t, PAD) -FAST_NEAREST_MAINLOOP (mmx_8888_8888_normal_OVER, - scaled_nearest_scanline_mmx_8888_8888_OVER, - uint32_t, uint32_t, NORMAL) - -static force_inline void -scaled_nearest_scanline_mmx_8888_n_8888_OVER (const uint32_t * mask, - uint32_t * dst, - const uint32_t * src, - int32_t w, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t src_width_fixed, - pixman_bool_t zero_src) -{ - __m64 mm_mask; - - if (zero_src || (*mask >> 24) == 0) - { - /* A workaround for https://gcc.gnu.org/PR47759 */ - _mm_empty (); - return; - } - - mm_mask = expand_alpha (load8888 (mask)); - - while (w) - { - uint32_t s = *(src + pixman_fixed_to_int (vx)); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - - if (s) - { - __m64 ms = load8888 (&s); - __m64 alpha = expand_alpha (ms); - __m64 dest = load8888 (dst); - - store8888 (dst, (in_over (ms, alpha, mm_mask, dest))); - } - - dst++; - w--; - } - - _mm_empty (); -} - -FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_cover_OVER, - scaled_nearest_scanline_mmx_8888_n_8888_OVER, - uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE) -FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_pad_OVER, - scaled_nearest_scanline_mmx_8888_n_8888_OVER, - uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE) -FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_none_OVER, - scaled_nearest_scanline_mmx_8888_n_8888_OVER, - uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE) -FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_normal_OVER, - scaled_nearest_scanline_mmx_8888_n_8888_OVER, - uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE) - -#define BSHIFT ((1 << BILINEAR_INTERPOLATION_BITS)) -#define BMSK (BSHIFT - 1) - -#define BILINEAR_DECLARE_VARIABLES \ - const __m64 mm_wt = _mm_set_pi16 (wt, wt, wt, wt); \ - const __m64 mm_wb = _mm_set_pi16 (wb, wb, wb, wb); \ - const __m64 mm_addc7 = _mm_set_pi16 (0, 1, 0, 1); \ - const __m64 mm_xorc7 = _mm_set_pi16 (0, BMSK, 0, BMSK); \ - const __m64 mm_ux = _mm_set_pi16 (unit_x, unit_x, unit_x, unit_x); \ - const __m64 mm_zero = _mm_setzero_si64 (); \ - __m64 mm_x = _mm_set_pi16 (vx, vx, vx, vx) - -#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \ -do { \ - /* fetch 2x2 pixel block into 2 mmx registers */ \ - __m64 t = ldq_u ((__m64 *)&src_top [pixman_fixed_to_int (vx)]); \ - __m64 b = ldq_u ((__m64 *)&src_bottom [pixman_fixed_to_int (vx)]); \ - /* vertical interpolation */ \ - __m64 t_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (t, mm_zero), mm_wt); \ - __m64 t_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (t, mm_zero), mm_wt); \ - __m64 b_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (b, mm_zero), mm_wb); \ - __m64 b_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (b, mm_zero), mm_wb); \ - __m64 hi = _mm_add_pi16 (t_hi, b_hi); \ - __m64 lo = _mm_add_pi16 (t_lo, b_lo); \ - /* calculate horizontal weights */ \ - __m64 mm_wh = _mm_add_pi16 (mm_addc7, _mm_xor_si64 (mm_xorc7, \ - _mm_srli_pi16 (mm_x, \ - 16 - BILINEAR_INTERPOLATION_BITS))); \ - /* horizontal interpolation */ \ - __m64 p = _mm_unpacklo_pi16 (lo, hi); \ - __m64 q = _mm_unpackhi_pi16 (lo, hi); \ - vx += unit_x; \ - lo = _mm_madd_pi16 (p, mm_wh); \ - hi = _mm_madd_pi16 (q, mm_wh); \ - mm_x = _mm_add_pi16 (mm_x, mm_ux); \ - /* shift and pack the result */ \ - hi = _mm_srli_pi32 (hi, BILINEAR_INTERPOLATION_BITS * 2); \ - lo = _mm_srli_pi32 (lo, BILINEAR_INTERPOLATION_BITS * 2); \ - lo = _mm_packs_pi32 (lo, hi); \ - lo = _mm_packs_pu16 (lo, lo); \ - pix = lo; \ -} while (0) - -#define BILINEAR_SKIP_ONE_PIXEL() \ -do { \ - vx += unit_x; \ - mm_x = _mm_add_pi16 (mm_x, mm_ux); \ -} while(0) - -static force_inline void -scaled_bilinear_scanline_mmx_8888_8888_SRC (uint32_t * dst, - const uint32_t * mask, - const uint32_t * src_top, - const uint32_t * src_bottom, - int32_t w, - int wt, - int wb, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t max_vx, - pixman_bool_t zero_src) -{ - BILINEAR_DECLARE_VARIABLES; - __m64 pix; - - while (w--) - { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix); - store (dst, pix); - dst++; - } - - _mm_empty (); -} - -FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_SRC, - scaled_bilinear_scanline_mmx_8888_8888_SRC, - uint32_t, uint32_t, uint32_t, - COVER, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_pad_SRC, - scaled_bilinear_scanline_mmx_8888_8888_SRC, - uint32_t, uint32_t, uint32_t, - PAD, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_SRC, - scaled_bilinear_scanline_mmx_8888_8888_SRC, - uint32_t, uint32_t, uint32_t, - NONE, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_SRC, - scaled_bilinear_scanline_mmx_8888_8888_SRC, - uint32_t, uint32_t, uint32_t, - NORMAL, FLAG_NONE) - -static force_inline void -scaled_bilinear_scanline_mmx_8888_8888_OVER (uint32_t * dst, - const uint32_t * mask, - const uint32_t * src_top, - const uint32_t * src_bottom, - int32_t w, - int wt, - int wb, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t max_vx, - pixman_bool_t zero_src) -{ - BILINEAR_DECLARE_VARIABLES; - __m64 pix1, pix2; - - while (w) - { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - - if (!is_zero (pix1)) - { - pix2 = load (dst); - store8888 (dst, core_combine_over_u_pixel_mmx (pix1, pix2)); - } - - w--; - dst++; - } - - _mm_empty (); -} - -FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_OVER, - scaled_bilinear_scanline_mmx_8888_8888_OVER, - uint32_t, uint32_t, uint32_t, - COVER, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_pad_OVER, - scaled_bilinear_scanline_mmx_8888_8888_OVER, - uint32_t, uint32_t, uint32_t, - PAD, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_OVER, - scaled_bilinear_scanline_mmx_8888_8888_OVER, - uint32_t, uint32_t, uint32_t, - NONE, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_OVER, - scaled_bilinear_scanline_mmx_8888_8888_OVER, - uint32_t, uint32_t, uint32_t, - NORMAL, FLAG_NONE) - -static force_inline void -scaled_bilinear_scanline_mmx_8888_8_8888_OVER (uint32_t * dst, - const uint8_t * mask, - const uint32_t * src_top, - const uint32_t * src_bottom, - int32_t w, - int wt, - int wb, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t max_vx, - pixman_bool_t zero_src) -{ - BILINEAR_DECLARE_VARIABLES; - __m64 pix1, pix2; - uint32_t m; - - while (w) - { - m = (uint32_t) *mask++; - - if (m) - { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - - if (m == 0xff && is_opaque (pix1)) - { - store (dst, pix1); - } - else - { - __m64 ms, md, ma, msa; - - pix2 = load (dst); - ma = expand_alpha_rev (to_m64 (m)); - ms = _mm_unpacklo_pi8 (pix1, _mm_setzero_si64 ()); - md = _mm_unpacklo_pi8 (pix2, _mm_setzero_si64 ()); - - msa = expand_alpha (ms); - - store8888 (dst, (in_over (ms, msa, ma, md))); - } - } - else - { - BILINEAR_SKIP_ONE_PIXEL (); - } - - w--; - dst++; - } - - _mm_empty (); -} - -FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_cover_OVER, - scaled_bilinear_scanline_mmx_8888_8_8888_OVER, - uint32_t, uint8_t, uint32_t, - COVER, FLAG_HAVE_NON_SOLID_MASK) -FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_pad_OVER, - scaled_bilinear_scanline_mmx_8888_8_8888_OVER, - uint32_t, uint8_t, uint32_t, - PAD, FLAG_HAVE_NON_SOLID_MASK) -FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_none_OVER, - scaled_bilinear_scanline_mmx_8888_8_8888_OVER, - uint32_t, uint8_t, uint32_t, - NONE, FLAG_HAVE_NON_SOLID_MASK) -FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_normal_OVER, - scaled_bilinear_scanline_mmx_8888_8_8888_OVER, - uint32_t, uint8_t, uint32_t, - NORMAL, FLAG_HAVE_NON_SOLID_MASK) - -static uint32_t * -mmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) -{ - int w = iter->width; - uint32_t *dst = iter->buffer; - uint32_t *src = (uint32_t *)iter->bits; - - iter->bits += iter->stride; - - while (w && ((uintptr_t)dst) & 7) - { - *dst++ = (*src++) | 0xff000000; - w--; - } - - while (w >= 8) - { - __m64 vsrc1 = ldq_u ((__m64 *)(src + 0)); - __m64 vsrc2 = ldq_u ((__m64 *)(src + 2)); - __m64 vsrc3 = ldq_u ((__m64 *)(src + 4)); - __m64 vsrc4 = ldq_u ((__m64 *)(src + 6)); - - *(__m64 *)(dst + 0) = _mm_or_si64 (vsrc1, MC (ff000000)); - *(__m64 *)(dst + 2) = _mm_or_si64 (vsrc2, MC (ff000000)); - *(__m64 *)(dst + 4) = _mm_or_si64 (vsrc3, MC (ff000000)); - *(__m64 *)(dst + 6) = _mm_or_si64 (vsrc4, MC (ff000000)); - - dst += 8; - src += 8; - w -= 8; - } - - while (w) - { - *dst++ = (*src++) | 0xff000000; - w--; - } - - _mm_empty (); - return iter->buffer; -} - -static uint32_t * -mmx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) -{ - int w = iter->width; - uint32_t *dst = iter->buffer; - uint16_t *src = (uint16_t *)iter->bits; - - iter->bits += iter->stride; - - while (w && ((uintptr_t)dst) & 0x0f) - { - uint16_t s = *src++; - - *dst++ = convert_0565_to_8888 (s); - w--; - } - - while (w >= 4) - { - __m64 vsrc = ldq_u ((__m64 *)src); - __m64 mm0, mm1; - - expand_4xpacked565 (vsrc, &mm0, &mm1, 1); - - *(__m64 *)(dst + 0) = mm0; - *(__m64 *)(dst + 2) = mm1; - - dst += 4; - src += 4; - w -= 4; - } - - while (w) - { - uint16_t s = *src++; - - *dst++ = convert_0565_to_8888 (s); - w--; - } - - _mm_empty (); - return iter->buffer; -} - -static uint32_t * -mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) -{ - int w = iter->width; - uint32_t *dst = iter->buffer; - uint8_t *src = iter->bits; - - iter->bits += iter->stride; - - while (w && (((uintptr_t)dst) & 15)) - { - *dst++ = (uint32_t)*(src++) << 24; - w--; - } - - while (w >= 8) - { - __m64 mm0 = ldq_u ((__m64 *)src); - - __m64 mm1 = _mm_unpacklo_pi8 (_mm_setzero_si64(), mm0); - __m64 mm2 = _mm_unpackhi_pi8 (_mm_setzero_si64(), mm0); - __m64 mm3 = _mm_unpacklo_pi16 (_mm_setzero_si64(), mm1); - __m64 mm4 = _mm_unpackhi_pi16 (_mm_setzero_si64(), mm1); - __m64 mm5 = _mm_unpacklo_pi16 (_mm_setzero_si64(), mm2); - __m64 mm6 = _mm_unpackhi_pi16 (_mm_setzero_si64(), mm2); - - *(__m64 *)(dst + 0) = mm3; - *(__m64 *)(dst + 2) = mm4; - *(__m64 *)(dst + 4) = mm5; - *(__m64 *)(dst + 6) = mm6; - - dst += 8; - src += 8; - w -= 8; - } - - while (w) - { - *dst++ = (uint32_t)*(src++) << 24; - w--; - } - - _mm_empty (); - return iter->buffer; -} - -#define IMAGE_FLAGS \ - (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ - FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) - -static const pixman_iter_info_t mmx_iters[] = -{ - { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, - _pixman_iter_init_bits_stride, mmx_fetch_x8r8g8b8, NULL - }, - { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW, - _pixman_iter_init_bits_stride, mmx_fetch_r5g6b5, NULL - }, - { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, - _pixman_iter_init_bits_stride, mmx_fetch_a8, NULL - }, - { PIXMAN_null }, -}; - -static const pixman_fast_path_t mmx_fast_paths[] = -{ - PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mmx_composite_over_n_8_0565 ), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, mmx_composite_over_n_8_0565 ), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, mmx_composite_over_n_8_8888 ), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, mmx_composite_over_n_8_8888 ), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, mmx_composite_over_n_8_8888 ), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, mmx_composite_over_n_8_8888 ), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, mmx_composite_over_n_8888_8888_ca ), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, mmx_composite_over_n_8888_8888_ca ), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, mmx_composite_over_n_8888_0565_ca ), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, mmx_composite_over_n_8888_8888_ca ), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, mmx_composite_over_n_8888_8888_ca ), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, mmx_composite_over_n_8888_0565_ca ), - PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, mmx_composite_over_pixbuf_8888 ), - PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, mmx_composite_over_pixbuf_8888 ), - PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, mmx_composite_over_pixbuf_0565 ), - PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, mmx_composite_over_pixbuf_8888 ), - PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, mmx_composite_over_pixbuf_8888 ), - PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, mmx_composite_over_pixbuf_0565 ), - PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, mmx_composite_over_x888_n_8888 ), - PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, mmx_composite_over_x888_n_8888 ), - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, mmx_composite_over_x888_n_8888 ), - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, mmx_composite_over_x888_n_8888 ), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, mmx_composite_over_8888_n_8888 ), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, mmx_composite_over_8888_n_8888 ), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, mmx_composite_over_8888_n_8888 ), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, mmx_composite_over_8888_n_8888 ), - PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, mmx_composite_over_x888_8_8888 ), - PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, mmx_composite_over_x888_8_8888 ), - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, mmx_composite_over_x888_8_8888 ), - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, mmx_composite_over_x888_8_8888 ), - PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, mmx_composite_over_n_8888 ), - PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, mmx_composite_over_n_8888 ), - PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, mmx_composite_over_n_0565 ), - PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, mmx_composite_over_n_0565 ), - PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, mmx_composite_copy_area ), - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, mmx_composite_copy_area ), - - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, mmx_composite_over_8888_8888 ), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, mmx_composite_over_8888_8888 ), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, mmx_composite_over_8888_0565 ), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, mmx_composite_over_8888_8888 ), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, mmx_composite_over_8888_8888 ), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, mmx_composite_over_8888_0565 ), - - PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, mmx_composite_over_reverse_n_8888), - PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, mmx_composite_over_reverse_n_8888), - - PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, mmx_composite_add_0565_0565 ), - PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, mmx_composite_add_0565_0565 ), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, mmx_composite_add_8888_8888 ), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, mmx_composite_add_8888_8888 ), - PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, mmx_composite_add_8_8 ), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, mmx_composite_add_n_8_8 ), - - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, mmx_composite_src_x888_0565 ), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, mmx_composite_src_x888_0565 ), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, mmx_composite_src_x888_0565 ), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, mmx_composite_src_x888_0565 ), - PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, mmx_composite_src_n_8_8888 ), - PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, mmx_composite_src_n_8_8888 ), - PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, mmx_composite_src_n_8_8888 ), - PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, mmx_composite_src_n_8_8888 ), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, mmx_composite_copy_area ), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, mmx_composite_copy_area ), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, mmx_composite_copy_area ), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, mmx_composite_copy_area ), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, mmx_composite_copy_area ), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, mmx_composite_copy_area ), - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, mmx_composite_copy_area ), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, mmx_composite_copy_area ), - - PIXMAN_STD_FAST_PATH (IN, a8, null, a8, mmx_composite_in_8_8 ), - PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, mmx_composite_in_n_8_8 ), - - SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ), - SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ), - SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ), - SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ), - - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_n_8888 ), - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_n_8888 ), - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_n_8888 ), - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_n_8888 ), - - SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ), - SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ), - SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mmx_8888_8888 ), - SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ), - SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ), - SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, mmx_8888_8888 ), - - SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ), - SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ), - SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ), - SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ), - - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8_8888 ), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8_8888 ), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8_8888 ), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8_8888 ), - - { PIXMAN_OP_NONE }, -}; - -pixman_implementation_t * -_pixman_implementation_create_mmx (pixman_implementation_t *fallback) -{ - pixman_implementation_t *imp = _pixman_implementation_create (fallback, mmx_fast_paths); - - imp->combine_32[PIXMAN_OP_OVER] = mmx_combine_over_u; - imp->combine_32[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_u; - imp->combine_32[PIXMAN_OP_IN] = mmx_combine_in_u; - imp->combine_32[PIXMAN_OP_IN_REVERSE] = mmx_combine_in_reverse_u; - imp->combine_32[PIXMAN_OP_OUT] = mmx_combine_out_u; - imp->combine_32[PIXMAN_OP_OUT_REVERSE] = mmx_combine_out_reverse_u; - imp->combine_32[PIXMAN_OP_ATOP] = mmx_combine_atop_u; - imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = mmx_combine_atop_reverse_u; - imp->combine_32[PIXMAN_OP_XOR] = mmx_combine_xor_u; - imp->combine_32[PIXMAN_OP_ADD] = mmx_combine_add_u; - imp->combine_32[PIXMAN_OP_SATURATE] = mmx_combine_saturate_u; - - imp->combine_32_ca[PIXMAN_OP_SRC] = mmx_combine_src_ca; - imp->combine_32_ca[PIXMAN_OP_OVER] = mmx_combine_over_ca; - imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_IN] = mmx_combine_in_ca; - imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = mmx_combine_in_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_OUT] = mmx_combine_out_ca; - imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = mmx_combine_out_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_ATOP] = mmx_combine_atop_ca; - imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = mmx_combine_atop_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_XOR] = mmx_combine_xor_ca; - imp->combine_32_ca[PIXMAN_OP_ADD] = mmx_combine_add_ca; - - imp->blt = mmx_blt; - imp->fill = mmx_fill; - - imp->iter_info = mmx_iters; - - return imp; -} - -#endif /* USE_X86_MMX || USE_ARM_IWMMXT || USE_LOONGSON_MMI */ diff --git a/vendor/pixman/pixman/pixman-noop.c b/vendor/pixman/pixman/pixman-noop.c deleted file mode 100644 index e43199bc1..000000000 --- a/vendor/pixman/pixman/pixman-noop.c +++ /dev/null @@ -1,161 +0,0 @@ -/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ -/* - * Copyright © 2011 Red Hat, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#ifdef HAVE_CONFIG_H -#include -#endif -#include -#include -#include "pixman-private.h" -#include "pixman-combine32.h" -#include "pixman-inlines.h" - -static void -noop_composite (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - return; -} - -static uint32_t * -noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask) -{ - uint32_t *result = iter->buffer; - - iter->buffer += iter->image->bits.rowstride; - - return result; -} - -static void -noop_init_solid_narrow (pixman_iter_t *iter, - const pixman_iter_info_t *info) -{ - pixman_image_t *image = iter->image; - uint32_t *buffer = iter->buffer; - uint32_t *end = buffer + iter->width; - uint32_t color; - - if (iter->image->type == SOLID) - color = image->solid.color_32; - else - color = image->bits.fetch_pixel_32 (&image->bits, 0, 0); - - while (buffer < end) - *(buffer++) = color; -} - -static void -noop_init_solid_wide (pixman_iter_t *iter, - const pixman_iter_info_t *info) -{ - pixman_image_t *image = iter->image; - argb_t *buffer = (argb_t *)iter->buffer; - argb_t *end = buffer + iter->width; - argb_t color; - - if (iter->image->type == SOLID) - color = image->solid.color_float; - else - color = image->bits.fetch_pixel_float (&image->bits, 0, 0); - - while (buffer < end) - *(buffer++) = color; -} - -static void -noop_init_direct_buffer (pixman_iter_t *iter, const pixman_iter_info_t *info) -{ - pixman_image_t *image = iter->image; - - iter->buffer = - image->bits.bits + iter->y * image->bits.rowstride + iter->x; -} - -static void -dest_write_back_direct (pixman_iter_t *iter) -{ - iter->buffer += iter->image->bits.rowstride; -} - -static const pixman_iter_info_t noop_iters[] = -{ - /* Source iters */ - { PIXMAN_any, - 0, ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_SRC, - NULL, - _pixman_iter_get_scanline_noop, - NULL - }, - { PIXMAN_solid, - FAST_PATH_NO_ALPHA_MAP, ITER_NARROW | ITER_SRC, - noop_init_solid_narrow, - _pixman_iter_get_scanline_noop, - NULL, - }, - { PIXMAN_solid, - FAST_PATH_NO_ALPHA_MAP, ITER_WIDE | ITER_SRC, - noop_init_solid_wide, - _pixman_iter_get_scanline_noop, - NULL - }, - { PIXMAN_a8r8g8b8, - FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | - FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, - ITER_NARROW | ITER_SRC, - noop_init_direct_buffer, - noop_get_scanline, - NULL - }, - /* Dest iters */ - { PIXMAN_a8r8g8b8, - FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST, - noop_init_direct_buffer, - _pixman_iter_get_scanline_noop, - dest_write_back_direct - }, - { PIXMAN_x8r8g8b8, - FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST | ITER_LOCALIZED_ALPHA, - noop_init_direct_buffer, - _pixman_iter_get_scanline_noop, - dest_write_back_direct - }, - { PIXMAN_null }, -}; - -static const pixman_fast_path_t noop_fast_paths[] = -{ - { PIXMAN_OP_DST, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, noop_composite }, - { PIXMAN_OP_NONE }, -}; - -pixman_implementation_t * -_pixman_implementation_create_noop (pixman_implementation_t *fallback) -{ - pixman_implementation_t *imp = - _pixman_implementation_create (fallback, noop_fast_paths); - - imp->iter_info = noop_iters; - - return imp; -} diff --git a/vendor/pixman/pixman/pixman-ppc.c b/vendor/pixman/pixman/pixman-ppc.c deleted file mode 100644 index 926eb445f..000000000 --- a/vendor/pixman/pixman/pixman-ppc.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "pixman-private.h" - -#ifdef USE_VMX - -/* The CPU detection code needs to be in a file not compiled with - * "-maltivec -mabi=altivec", as gcc would try to save vector register - * across function calls causing SIGILL on cpus without Altivec/vmx. - */ -#ifdef __APPLE__ -#include - -static pixman_bool_t -pixman_have_vmx (void) -{ - int error, have_vmx; - size_t length = sizeof(have_vmx); - - error = sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0); - - if (error) - return FALSE; - - return have_vmx; -} - -#elif defined (__OpenBSD__) -#include -#include -#include - -static pixman_bool_t -pixman_have_vmx (void) -{ - int error, have_vmx; - int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC }; - size_t length = sizeof(have_vmx); - - error = sysctl (mib, 2, &have_vmx, &length, NULL, 0); - - if (error != 0) - return FALSE; - - return have_vmx; -} - -#elif defined (__FreeBSD__) -#include -#include - -static pixman_bool_t -pixman_have_vmx (void) -{ - - unsigned long cpufeatures; - int have_vmx; - - if (elf_aux_info(AT_HWCAP, &cpufeatures, sizeof(cpufeatures))) - return FALSE; - - have_vmx = cpufeatures & PPC_FEATURE_HAS_ALTIVEC; - return have_vmx; -} - -#elif defined (__linux__) - -#include -#include -#include -#include -#include -#include -#include - -static pixman_bool_t -pixman_have_vmx (void) -{ - int have_vmx = FALSE; - int fd; - struct - { - unsigned long type; - unsigned long value; - } aux; - - fd = open ("/proc/self/auxv", O_RDONLY); - if (fd >= 0) - { - while (read (fd, &aux, sizeof (aux)) == sizeof (aux)) - { - if (aux.type == AT_HWCAP && (aux.value & PPC_FEATURE_HAS_ALTIVEC)) - { - have_vmx = TRUE; - break; - } - } - - close (fd); - } - - return have_vmx; -} - -#else /* !__APPLE__ && !__OpenBSD__ && !__linux__ */ -#include -#include - -static jmp_buf jump_env; - -static void -vmx_test (int sig, - siginfo_t *si, - void * unused) -{ - longjmp (jump_env, 1); -} - -static pixman_bool_t -pixman_have_vmx (void) -{ - struct sigaction sa, osa; - int jmp_result; - - sa.sa_flags = SA_SIGINFO; - sigemptyset (&sa.sa_mask); - sa.sa_sigaction = vmx_test; - sigaction (SIGILL, &sa, &osa); - jmp_result = setjmp (jump_env); - if (jmp_result == 0) - { - asm volatile ( "vor 0, 0, 0" ); - } - sigaction (SIGILL, &osa, NULL); - return (jmp_result == 0); -} - -#endif /* __APPLE__ */ -#endif /* USE_VMX */ - -pixman_implementation_t * -_pixman_ppc_get_implementations (pixman_implementation_t *imp) -{ -#ifdef USE_VMX - if (!_pixman_disabled ("vmx") && pixman_have_vmx ()) - imp = _pixman_implementation_create_vmx (imp); -#endif - - return imp; -} diff --git a/vendor/pixman/pixman/pixman-private.h b/vendor/pixman/pixman/pixman-private.h deleted file mode 100644 index 7316c801b..000000000 --- a/vendor/pixman/pixman/pixman-private.h +++ /dev/null @@ -1,1193 +0,0 @@ -#ifndef PIXMAN_PRIVATE_H -#define PIXMAN_PRIVATE_H - -/* - * The defines which are shared between C and assembly code - */ - -/* bilinear interpolation precision (must be < 8) */ -#define BILINEAR_INTERPOLATION_BITS 7 -#define BILINEAR_INTERPOLATION_RANGE (1 << BILINEAR_INTERPOLATION_BITS) - -/* - * C specific part - */ - -#ifndef __ASSEMBLER__ - -#ifndef PACKAGE -# error config.h must be included before pixman-private.h -#endif - -#define PIXMAN_DISABLE_DEPRECATED -#define PIXMAN_USE_INTERNAL_API - -#include "pixman.h" -#include -#include -#include -#include -#include -#include - -#include "pixman-compiler.h" - -/* - * Images - */ -typedef struct image_common image_common_t; -typedef struct solid_fill solid_fill_t; -typedef struct gradient gradient_t; -typedef struct linear_gradient linear_gradient_t; -typedef struct horizontal_gradient horizontal_gradient_t; -typedef struct vertical_gradient vertical_gradient_t; -typedef struct conical_gradient conical_gradient_t; -typedef struct radial_gradient radial_gradient_t; -typedef struct bits_image bits_image_t; -typedef struct circle circle_t; - -typedef struct argb_t argb_t; - -struct argb_t -{ - float a; - float r; - float g; - float b; -}; - -typedef void (*fetch_scanline_t) (bits_image_t *image, - int x, - int y, - int width, - uint32_t *buffer, - const uint32_t *mask); - -typedef uint32_t (*fetch_pixel_32_t) (bits_image_t *image, - int x, - int y); - -typedef argb_t (*fetch_pixel_float_t) (bits_image_t *image, - int x, - int y); - -typedef void (*store_scanline_t) (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values); - -typedef enum -{ - BITS, - LINEAR, - CONICAL, - RADIAL, - SOLID -} image_type_t; - -typedef void (*property_changed_func_t) (pixman_image_t *image); - -struct image_common -{ - image_type_t type; - int32_t ref_count; - pixman_region32_t clip_region; - int32_t alpha_count; /* How many times this image is being used as an alpha map */ - pixman_bool_t have_clip_region; /* FALSE if there is no clip */ - pixman_bool_t client_clip; /* Whether the source clip was - set by a client */ - pixman_bool_t clip_sources; /* Whether the clip applies when - * the image is used as a source - */ - pixman_bool_t dirty; - pixman_transform_t * transform; - pixman_repeat_t repeat; - pixman_filter_t filter; - pixman_fixed_t * filter_params; - int n_filter_params; - bits_image_t * alpha_map; - int alpha_origin_x; - int alpha_origin_y; - pixman_bool_t component_alpha; - property_changed_func_t property_changed; - - pixman_image_destroy_func_t destroy_func; - void * destroy_data; - - uint32_t flags; - pixman_format_code_t extended_format_code; -}; - -struct solid_fill -{ - image_common_t common; - pixman_color_t color; - - uint32_t color_32; - argb_t color_float; -}; - -struct gradient -{ - image_common_t common; - int n_stops; - pixman_gradient_stop_t *stops; -}; - -struct linear_gradient -{ - gradient_t common; - pixman_point_fixed_t p1; - pixman_point_fixed_t p2; -}; - -struct circle -{ - pixman_fixed_t x; - pixman_fixed_t y; - pixman_fixed_t radius; -}; - -struct radial_gradient -{ - gradient_t common; - - circle_t c1; - circle_t c2; - - circle_t delta; - double a; - double inva; - double mindr; -}; - -struct conical_gradient -{ - gradient_t common; - pixman_point_fixed_t center; - double angle; -}; - -struct bits_image -{ - image_common_t common; - pixman_format_code_t format; - const pixman_indexed_t * indexed; - int width; - int height; - uint32_t * bits; - uint32_t * free_me; - int rowstride; /* in number of uint32_t's */ - - pixman_dither_t dither; - uint32_t dither_offset_y; - uint32_t dither_offset_x; - - fetch_scanline_t fetch_scanline_32; - fetch_pixel_32_t fetch_pixel_32; - store_scanline_t store_scanline_32; - - fetch_scanline_t fetch_scanline_float; - fetch_pixel_float_t fetch_pixel_float; - store_scanline_t store_scanline_float; - - /* Used for indirect access to the bits */ - pixman_read_memory_func_t read_func; - pixman_write_memory_func_t write_func; -}; - -union pixman_image -{ - image_type_t type; - image_common_t common; - bits_image_t bits; - gradient_t gradient; - linear_gradient_t linear; - conical_gradient_t conical; - radial_gradient_t radial; - solid_fill_t solid; -}; - -typedef struct pixman_iter_t pixman_iter_t; -typedef uint32_t *(* pixman_iter_get_scanline_t) (pixman_iter_t *iter, const uint32_t *mask); -typedef void (* pixman_iter_write_back_t) (pixman_iter_t *iter); -typedef void (* pixman_iter_fini_t) (pixman_iter_t *iter); - -typedef enum -{ - ITER_NARROW = (1 << 0), - ITER_WIDE = (1 << 1), - - /* "Localized alpha" is when the alpha channel is used only to compute - * the alpha value of the destination. This means that the computation - * of the RGB values of the result is independent of the alpha value. - * - * For example, the OVER operator has localized alpha for the - * destination, because the RGB values of the result can be computed - * without knowing the destination alpha. Similarly, ADD has localized - * alpha for both source and destination because the RGB values of the - * result can be computed without knowing the alpha value of source or - * destination. - * - * When he destination is xRGB, this is useful knowledge, because then - * we can treat it as if it were ARGB, which means in some cases we can - * avoid copying it to a temporary buffer. - */ - ITER_LOCALIZED_ALPHA = (1 << 2), - ITER_IGNORE_ALPHA = (1 << 3), - ITER_IGNORE_RGB = (1 << 4), - - /* These indicate whether the iterator is for a source - * or a destination image - */ - ITER_SRC = (1 << 5), - ITER_DEST = (1 << 6) -} iter_flags_t; - -struct pixman_iter_t -{ - /* These are initialized by _pixman_implementation_{src,dest}_init */ - pixman_image_t * image; - uint32_t * buffer; - int x, y; - int width; - int height; - iter_flags_t iter_flags; - uint32_t image_flags; - - /* These function pointers are initialized by the implementation */ - pixman_iter_get_scanline_t get_scanline; - pixman_iter_write_back_t write_back; - pixman_iter_fini_t fini; - - /* These fields are scratch data that implementations can use */ - void * data; - uint8_t * bits; - int stride; -}; - -typedef struct pixman_iter_info_t pixman_iter_info_t; -typedef void (* pixman_iter_initializer_t) (pixman_iter_t *iter, - const pixman_iter_info_t *info); -struct pixman_iter_info_t -{ - pixman_format_code_t format; - uint32_t image_flags; - iter_flags_t iter_flags; - pixman_iter_initializer_t initializer; - pixman_iter_get_scanline_t get_scanline; - pixman_iter_write_back_t write_back; -}; - -void -_pixman_bits_image_setup_accessors (bits_image_t *image); - -void -_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter); - -void -_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter); - -void -_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter); - -void -_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter); - -void -_pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter); - -void -_pixman_image_init (pixman_image_t *image); - -pixman_bool_t -_pixman_bits_image_init (pixman_image_t * image, - pixman_format_code_t format, - int width, - int height, - uint32_t * bits, - int rowstride, - pixman_bool_t clear); -pixman_bool_t -_pixman_image_fini (pixman_image_t *image); - -pixman_image_t * -_pixman_image_allocate (void); - -pixman_bool_t -_pixman_init_gradient (gradient_t * gradient, - const pixman_gradient_stop_t *stops, - int n_stops); -void -_pixman_image_reset_clip_region (pixman_image_t *image); - -void -_pixman_image_validate (pixman_image_t *image); - -#define PIXMAN_IMAGE_GET_LINE(image, x, y, type, out_stride, line, mul) \ - do \ - { \ - uint32_t *__bits__; \ - int __stride__; \ - \ - __bits__ = image->bits.bits; \ - __stride__ = image->bits.rowstride; \ - (out_stride) = \ - __stride__ * (int) sizeof (uint32_t) / (int) sizeof (type); \ - (line) = \ - ((type *) __bits__) + (out_stride) * (y) + (mul) * (x); \ - } while (0) - -/* - * Gradient walker - */ -typedef struct -{ - float a_s, a_b; - float r_s, r_b; - float g_s, g_b; - float b_s, b_b; - pixman_fixed_48_16_t left_x; - pixman_fixed_48_16_t right_x; - - pixman_gradient_stop_t *stops; - int num_stops; - pixman_repeat_t repeat; - - pixman_bool_t need_reset; -} pixman_gradient_walker_t; - -void -_pixman_gradient_walker_init (pixman_gradient_walker_t *walker, - gradient_t * gradient, - pixman_repeat_t repeat); - -void -_pixman_gradient_walker_reset (pixman_gradient_walker_t *walker, - pixman_fixed_48_16_t pos); - -typedef void (*pixman_gradient_walker_write_t) ( - pixman_gradient_walker_t *walker, - pixman_fixed_48_16_t x, - uint32_t *buffer); - -void -_pixman_gradient_walker_write_narrow(pixman_gradient_walker_t *walker, - pixman_fixed_48_16_t x, - uint32_t *buffer); - -void -_pixman_gradient_walker_write_wide(pixman_gradient_walker_t *walker, - pixman_fixed_48_16_t x, - uint32_t *buffer); - -typedef void (*pixman_gradient_walker_fill_t) ( - pixman_gradient_walker_t *walker, - pixman_fixed_48_16_t x, - uint32_t *buffer, - uint32_t *end); - -void -_pixman_gradient_walker_fill_narrow(pixman_gradient_walker_t *walker, - pixman_fixed_48_16_t x, - uint32_t *buffer, - uint32_t *end); - -void -_pixman_gradient_walker_fill_wide(pixman_gradient_walker_t *walker, - pixman_fixed_48_16_t x, - uint32_t *buffer, - uint32_t *end); - -/* - * Edges - */ - -#define MAX_ALPHA(n) ((1 << (n)) - 1) -#define N_Y_FRAC(n) ((n) == 1 ? 1 : (1 << ((n) / 2)) - 1) -#define N_X_FRAC(n) ((n) == 1 ? 1 : (1 << ((n) / 2)) + 1) - -#define STEP_Y_SMALL(n) (pixman_fixed_1 / N_Y_FRAC (n)) -#define STEP_Y_BIG(n) (pixman_fixed_1 - (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n)) - -#define Y_FRAC_FIRST(n) (STEP_Y_BIG (n) / 2) -#define Y_FRAC_LAST(n) (Y_FRAC_FIRST (n) + (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n)) - -#define STEP_X_SMALL(n) (pixman_fixed_1 / N_X_FRAC (n)) -#define STEP_X_BIG(n) (pixman_fixed_1 - (N_X_FRAC (n) - 1) * STEP_X_SMALL (n)) - -#define X_FRAC_FIRST(n) (STEP_X_BIG (n) / 2) -#define X_FRAC_LAST(n) (X_FRAC_FIRST (n) + (N_X_FRAC (n) - 1) * STEP_X_SMALL (n)) - -#define RENDER_SAMPLES_X(x, n) \ - ((n) == 1? 0 : (pixman_fixed_frac (x) + \ - X_FRAC_FIRST (n)) / STEP_X_SMALL (n)) - -void -pixman_rasterize_edges_accessors (pixman_image_t *image, - pixman_edge_t * l, - pixman_edge_t * r, - pixman_fixed_t t, - pixman_fixed_t b); - -/* - * Implementations - */ -typedef struct pixman_implementation_t pixman_implementation_t; - -typedef struct -{ - pixman_op_t op; - pixman_image_t * src_image; - pixman_image_t * mask_image; - pixman_image_t * dest_image; - int32_t src_x; - int32_t src_y; - int32_t mask_x; - int32_t mask_y; - int32_t dest_x; - int32_t dest_y; - int32_t width; - int32_t height; - - uint32_t src_flags; - uint32_t mask_flags; - uint32_t dest_flags; -} pixman_composite_info_t; - -#define PIXMAN_COMPOSITE_ARGS(info) \ - MAYBE_UNUSED pixman_op_t op = info->op; \ - MAYBE_UNUSED pixman_image_t * src_image = info->src_image; \ - MAYBE_UNUSED pixman_image_t * mask_image = info->mask_image; \ - MAYBE_UNUSED pixman_image_t * dest_image = info->dest_image; \ - MAYBE_UNUSED int32_t src_x = info->src_x; \ - MAYBE_UNUSED int32_t src_y = info->src_y; \ - MAYBE_UNUSED int32_t mask_x = info->mask_x; \ - MAYBE_UNUSED int32_t mask_y = info->mask_y; \ - MAYBE_UNUSED int32_t dest_x = info->dest_x; \ - MAYBE_UNUSED int32_t dest_y = info->dest_y; \ - MAYBE_UNUSED int32_t width = info->width; \ - MAYBE_UNUSED int32_t height = info->height - -typedef void (*pixman_combine_32_func_t) (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width); - -typedef void (*pixman_combine_float_func_t) (pixman_implementation_t *imp, - pixman_op_t op, - float * dest, - const float * src, - const float * mask, - int n_pixels); - -typedef void (*pixman_composite_func_t) (pixman_implementation_t *imp, - pixman_composite_info_t *info); -typedef pixman_bool_t (*pixman_blt_func_t) (pixman_implementation_t *imp, - uint32_t * src_bits, - uint32_t * dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dest_x, - int dest_y, - int width, - int height); -typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp, - uint32_t * bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t filler); - -void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp); -void _pixman_setup_combiner_functions_float (pixman_implementation_t *imp); - -typedef struct -{ - pixman_op_t op; - pixman_format_code_t src_format; - uint32_t src_flags; - pixman_format_code_t mask_format; - uint32_t mask_flags; - pixman_format_code_t dest_format; - uint32_t dest_flags; - pixman_composite_func_t func; -} pixman_fast_path_t; - -struct pixman_implementation_t -{ - pixman_implementation_t * toplevel; - pixman_implementation_t * fallback; - const pixman_fast_path_t * fast_paths; - const pixman_iter_info_t * iter_info; - - pixman_blt_func_t blt; - pixman_fill_func_t fill; - - pixman_combine_32_func_t combine_32[PIXMAN_N_OPERATORS]; - pixman_combine_32_func_t combine_32_ca[PIXMAN_N_OPERATORS]; - pixman_combine_float_func_t combine_float[PIXMAN_N_OPERATORS]; - pixman_combine_float_func_t combine_float_ca[PIXMAN_N_OPERATORS]; -}; - -uint32_t -_pixman_image_get_solid (pixman_implementation_t *imp, - pixman_image_t * image, - pixman_format_code_t format); - -pixman_implementation_t * -_pixman_implementation_create (pixman_implementation_t *fallback, - const pixman_fast_path_t *fast_paths); - -void -_pixman_implementation_lookup_composite (pixman_implementation_t *toplevel, - pixman_op_t op, - pixman_format_code_t src_format, - uint32_t src_flags, - pixman_format_code_t mask_format, - uint32_t mask_flags, - pixman_format_code_t dest_format, - uint32_t dest_flags, - pixman_implementation_t **out_imp, - pixman_composite_func_t *out_func); - -pixman_combine_32_func_t -_pixman_implementation_lookup_combiner (pixman_implementation_t *imp, - pixman_op_t op, - pixman_bool_t component_alpha, - pixman_bool_t wide); - -pixman_bool_t -_pixman_implementation_blt (pixman_implementation_t *imp, - uint32_t * src_bits, - uint32_t * dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dest_x, - int dest_y, - int width, - int height); - -pixman_bool_t -_pixman_implementation_fill (pixman_implementation_t *imp, - uint32_t * bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t filler); - -void -_pixman_implementation_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t flags, - uint32_t image_flags); - -/* Specific implementations */ -pixman_implementation_t * -_pixman_implementation_create_general (void); - -pixman_implementation_t * -_pixman_implementation_create_fast_path (pixman_implementation_t *fallback); - -pixman_implementation_t * -_pixman_implementation_create_noop (pixman_implementation_t *fallback); - -#if defined USE_X86_MMX || defined USE_ARM_IWMMXT || defined USE_LOONGSON_MMI -pixman_implementation_t * -_pixman_implementation_create_mmx (pixman_implementation_t *fallback); -#endif - -#ifdef USE_SSE2 -pixman_implementation_t * -_pixman_implementation_create_sse2 (pixman_implementation_t *fallback); -#endif - -#ifdef USE_SSSE3 -pixman_implementation_t * -_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback); -#endif - -#ifdef USE_ARM_SIMD -pixman_implementation_t * -_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback); -#endif - -#ifdef USE_ARM_NEON -pixman_implementation_t * -_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback); -#endif - -#ifdef USE_ARM_A64_NEON -pixman_implementation_t * -_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback); -#endif - -#ifdef USE_MIPS_DSPR2 -pixman_implementation_t * -_pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback); -#endif - -#ifdef USE_VMX -pixman_implementation_t * -_pixman_implementation_create_vmx (pixman_implementation_t *fallback); -#endif - -pixman_bool_t -_pixman_implementation_disabled (const char *name); - -pixman_implementation_t * -_pixman_x86_get_implementations (pixman_implementation_t *imp); - -pixman_implementation_t * -_pixman_arm_get_implementations (pixman_implementation_t *imp); - -pixman_implementation_t * -_pixman_ppc_get_implementations (pixman_implementation_t *imp); - -pixman_implementation_t * -_pixman_mips_get_implementations (pixman_implementation_t *imp); - -pixman_implementation_t * -_pixman_choose_implementation (void); - -pixman_bool_t -_pixman_disabled (const char *name); - - -/* - * Utilities - */ -pixman_bool_t -_pixman_compute_composite_region32 (pixman_region32_t * region, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dest_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height); -uint32_t * -_pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask); - -void -_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info); - -/* These "formats" all have depth 0, so they - * will never clash with any real ones - */ -#define PIXMAN_null PIXMAN_FORMAT (0, 0, 0, 0, 0, 0) -#define PIXMAN_solid PIXMAN_FORMAT (0, 1, 0, 0, 0, 0) -#define PIXMAN_pixbuf PIXMAN_FORMAT (0, 2, 0, 0, 0, 0) -#define PIXMAN_rpixbuf PIXMAN_FORMAT (0, 3, 0, 0, 0, 0) -#define PIXMAN_unknown PIXMAN_FORMAT (0, 4, 0, 0, 0, 0) -#define PIXMAN_any PIXMAN_FORMAT (0, 5, 0, 0, 0, 0) - -#define PIXMAN_OP_any (PIXMAN_N_OPERATORS + 1) - -#define FAST_PATH_ID_TRANSFORM (1 << 0) -#define FAST_PATH_NO_ALPHA_MAP (1 << 1) -#define FAST_PATH_NO_CONVOLUTION_FILTER (1 << 2) -#define FAST_PATH_NO_PAD_REPEAT (1 << 3) -#define FAST_PATH_NO_REFLECT_REPEAT (1 << 4) -#define FAST_PATH_NO_ACCESSORS (1 << 5) -#define FAST_PATH_NARROW_FORMAT (1 << 6) -#define FAST_PATH_COMPONENT_ALPHA (1 << 8) -#define FAST_PATH_SAMPLES_OPAQUE (1 << 7) -#define FAST_PATH_UNIFIED_ALPHA (1 << 9) -#define FAST_PATH_SCALE_TRANSFORM (1 << 10) -#define FAST_PATH_NEAREST_FILTER (1 << 11) -#define FAST_PATH_HAS_TRANSFORM (1 << 12) -#define FAST_PATH_IS_OPAQUE (1 << 13) -#define FAST_PATH_NO_NORMAL_REPEAT (1 << 14) -#define FAST_PATH_NO_NONE_REPEAT (1 << 15) -#define FAST_PATH_X_UNIT_POSITIVE (1 << 16) -#define FAST_PATH_AFFINE_TRANSFORM (1 << 17) -#define FAST_PATH_Y_UNIT_ZERO (1 << 18) -#define FAST_PATH_BILINEAR_FILTER (1 << 19) -#define FAST_PATH_ROTATE_90_TRANSFORM (1 << 20) -#define FAST_PATH_ROTATE_180_TRANSFORM (1 << 21) -#define FAST_PATH_ROTATE_270_TRANSFORM (1 << 22) -#define FAST_PATH_SAMPLES_COVER_CLIP_NEAREST (1 << 23) -#define FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR (1 << 24) -#define FAST_PATH_BITS_IMAGE (1 << 25) -#define FAST_PATH_SEPARABLE_CONVOLUTION_FILTER (1 << 26) - -#define FAST_PATH_PAD_REPEAT \ - (FAST_PATH_NO_NONE_REPEAT | \ - FAST_PATH_NO_NORMAL_REPEAT | \ - FAST_PATH_NO_REFLECT_REPEAT) - -#define FAST_PATH_NORMAL_REPEAT \ - (FAST_PATH_NO_NONE_REPEAT | \ - FAST_PATH_NO_PAD_REPEAT | \ - FAST_PATH_NO_REFLECT_REPEAT) - -#define FAST_PATH_NONE_REPEAT \ - (FAST_PATH_NO_NORMAL_REPEAT | \ - FAST_PATH_NO_PAD_REPEAT | \ - FAST_PATH_NO_REFLECT_REPEAT) - -#define FAST_PATH_REFLECT_REPEAT \ - (FAST_PATH_NO_NONE_REPEAT | \ - FAST_PATH_NO_NORMAL_REPEAT | \ - FAST_PATH_NO_PAD_REPEAT) - -#define FAST_PATH_STANDARD_FLAGS \ - (FAST_PATH_NO_CONVOLUTION_FILTER | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NARROW_FORMAT) - -#define FAST_PATH_STD_DEST_FLAGS \ - (FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NARROW_FORMAT) - -#define SOURCE_FLAGS(format) \ - (FAST_PATH_STANDARD_FLAGS | \ - ((PIXMAN_ ## format == PIXMAN_solid) ? \ - 0 : (FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | FAST_PATH_NEAREST_FILTER | FAST_PATH_ID_TRANSFORM))) - -#define MASK_FLAGS(format, extra) \ - ((PIXMAN_ ## format == PIXMAN_null) ? 0 : (SOURCE_FLAGS (format) | extra)) - -#define FAST_PATH(op, src, src_flags, mask, mask_flags, dest, dest_flags, func) \ - PIXMAN_OP_ ## op, \ - PIXMAN_ ## src, \ - src_flags, \ - PIXMAN_ ## mask, \ - mask_flags, \ - PIXMAN_ ## dest, \ - dest_flags, \ - func - -#define PIXMAN_STD_FAST_PATH(op, src, mask, dest, func) \ - { FAST_PATH ( \ - op, \ - src, SOURCE_FLAGS (src), \ - mask, MASK_FLAGS (mask, FAST_PATH_UNIFIED_ALPHA), \ - dest, FAST_PATH_STD_DEST_FLAGS, \ - func) } - -#define PIXMAN_STD_FAST_PATH_CA(op, src, mask, dest, func) \ - { FAST_PATH ( \ - op, \ - src, SOURCE_FLAGS (src), \ - mask, MASK_FLAGS (mask, FAST_PATH_COMPONENT_ALPHA), \ - dest, FAST_PATH_STD_DEST_FLAGS, \ - func) } - -extern pixman_implementation_t *global_implementation; - -static force_inline pixman_implementation_t * -get_implementation (void) -{ -#ifndef TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR - if (!global_implementation) - global_implementation = _pixman_choose_implementation (); -#endif - return global_implementation; -} - -/* This function is exported for the sake of the test suite and not part - * of the ABI. - */ -PIXMAN_EXPORT pixman_implementation_t * -_pixman_internal_only_get_implementation (void); - -/* Memory allocation helpers */ -void * -pixman_malloc_ab (unsigned int n, unsigned int b); - -void * -pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c); - -void * -pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c); - -pixman_bool_t -_pixman_multiply_overflows_size (size_t a, size_t b); - -pixman_bool_t -_pixman_multiply_overflows_int (unsigned int a, unsigned int b); - -pixman_bool_t -_pixman_addition_overflows_int (unsigned int a, unsigned int b); - -/* Compositing utilities */ -void -pixman_expand_to_float (argb_t *dst, - const uint32_t *src, - pixman_format_code_t format, - int width); - -void -pixman_contract_from_float (uint32_t *dst, - const argb_t *src, - int width); - -/* Region Helpers */ -pixman_bool_t -pixman_region32_copy_from_region16 (pixman_region32_t *dst, - pixman_region16_t *src); - -pixman_bool_t -pixman_region16_copy_from_region32 (pixman_region16_t *dst, - pixman_region32_t *src); - -/* Doubly linked lists */ -typedef struct pixman_link_t pixman_link_t; -struct pixman_link_t -{ - pixman_link_t *next; - pixman_link_t *prev; -}; - -typedef struct pixman_list_t pixman_list_t; -struct pixman_list_t -{ - pixman_link_t *head; - pixman_link_t *tail; -}; - -static force_inline void -pixman_list_init (pixman_list_t *list) -{ - list->head = (pixman_link_t *)list; - list->tail = (pixman_link_t *)list; -} - -static force_inline void -pixman_list_prepend (pixman_list_t *list, pixman_link_t *link) -{ - link->next = list->head; - link->prev = (pixman_link_t *)list; - list->head->prev = link; - list->head = link; -} - -static force_inline void -pixman_list_unlink (pixman_link_t *link) -{ - link->prev->next = link->next; - link->next->prev = link->prev; -} - -static force_inline void -pixman_list_move_to_front (pixman_list_t *list, pixman_link_t *link) -{ - pixman_list_unlink (link); - pixman_list_prepend (list, link); -} - -/* Misc macros */ - -#ifndef FALSE -# define FALSE 0 -#endif - -#ifndef TRUE -# define TRUE 1 -#endif - -#ifndef MIN -# define MIN(a, b) ((a < b) ? a : b) -#endif - -#ifndef MAX -# define MAX(a, b) ((a > b) ? a : b) -#endif - -/* Integer division that rounds towards -infinity */ -#define DIV(a, b) \ - ((((a) < 0) == ((b) < 0)) ? (a) / (b) : \ - ((a) - (b) + 1 - (((b) < 0) << 1)) / (b)) - -/* Modulus that produces the remainder wrt. DIV */ -#define MOD(a, b) ((a) < 0 ? ((b) - ((-(a) - 1) % (b))) - 1 : (a) % (b)) - -#define CLIP(v, low, high) ((v) < (low) ? (low) : ((v) > (high) ? (high) : (v))) - -#define FLOAT_IS_ZERO(f) (-FLT_MIN < (f) && (f) < FLT_MIN) - -/* Conversion between 8888 and 0565 */ - -static force_inline uint16_t -convert_8888_to_0565 (uint32_t s) -{ - /* The following code can be compiled into just 4 instructions on ARM */ - uint32_t a, b; - a = (s >> 3) & 0x1F001F; - b = s & 0xFC00; - a |= a >> 5; - a |= b >> 5; - return (uint16_t)a; -} - -static force_inline uint32_t -convert_0565_to_0888 (uint16_t s) -{ - return (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) | - ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) | - ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000))); -} - -static force_inline uint32_t -convert_0565_to_8888 (uint16_t s) -{ - return convert_0565_to_0888 (s) | 0xff000000; -} - -/* Trivial versions that are useful in macros */ - -static force_inline uint32_t -convert_8888_to_8888 (uint32_t s) -{ - return s; -} - -static force_inline uint32_t -convert_x888_to_8888 (uint32_t s) -{ - return s | 0xff000000; -} - -static force_inline uint16_t -convert_0565_to_0565 (uint16_t s) -{ - return s; -} - -#define PIXMAN_FORMAT_IS_WIDE(f) \ - (PIXMAN_FORMAT_A (f) > 8 || \ - PIXMAN_FORMAT_R (f) > 8 || \ - PIXMAN_FORMAT_G (f) > 8 || \ - PIXMAN_FORMAT_B (f) > 8 || \ - PIXMAN_FORMAT_TYPE (f) == PIXMAN_TYPE_ARGB_SRGB) - -#ifdef WORDS_BIGENDIAN -# define SCREEN_SHIFT_LEFT(x,n) ((x) << (n)) -# define SCREEN_SHIFT_RIGHT(x,n) ((x) >> (n)) -#else -# define SCREEN_SHIFT_LEFT(x,n) ((x) >> (n)) -# define SCREEN_SHIFT_RIGHT(x,n) ((x) << (n)) -#endif - -static force_inline uint32_t -unorm_to_unorm (uint32_t val, int from_bits, int to_bits) -{ - uint32_t result; - - if (from_bits == 0) - return 0; - - /* Delete any extra bits */ - val &= ((1 << from_bits) - 1); - - if (from_bits >= to_bits) - return val >> (from_bits - to_bits); - - /* Start out with the high bit of val in the high bit of result. */ - result = val << (to_bits - from_bits); - - /* Copy the bits in result, doubling the number of bits each time, until - * we fill all to_bits. Unrolled manually because from_bits and to_bits - * are usually known statically, so the compiler can turn all of this - * into a few shifts. - */ -#define REPLICATE() \ - do \ - { \ - if (from_bits < to_bits) \ - { \ - result |= result >> from_bits; \ - \ - from_bits *= 2; \ - } \ - } \ - while (0) - - REPLICATE(); - REPLICATE(); - REPLICATE(); - REPLICATE(); - REPLICATE(); - - return result; -} - -uint16_t pixman_float_to_unorm (float f, int n_bits); -float pixman_unorm_to_float (uint16_t u, int n_bits); - -/* - * Various debugging code - */ - -#undef DEBUG - -#define COMPILE_TIME_ASSERT(x) \ - do { typedef int compile_time_assertion [(x)?1:-1]; } while (0) - -/* Turn on debugging depending on what type of release this is - */ -#if (((PIXMAN_VERSION_MICRO % 2) == 0) && ((PIXMAN_VERSION_MINOR % 2) == 1)) - -/* Debugging gets turned on for development releases because these - * are the things that end up in bleeding edge distributions such - * as Rawhide etc. - * - * For performance reasons we don't turn it on for stable releases or - * random git checkouts. (Random git checkouts are often used for - * performance work). - */ - -# define DEBUG - -#endif - -void -_pixman_log_error (const char *function, const char *message); - -#define return_if_fail(expr) \ - do \ - { \ - if (unlikely (!(expr))) \ - { \ - _pixman_log_error (FUNC, "The expression " # expr " was false"); \ - return; \ - } \ - } \ - while (0) - -#define return_val_if_fail(expr, retval) \ - do \ - { \ - if (unlikely (!(expr))) \ - { \ - _pixman_log_error (FUNC, "The expression " # expr " was false"); \ - return (retval); \ - } \ - } \ - while (0) - -#define critical_if_fail(expr) \ - do \ - { \ - if (unlikely (!(expr))) \ - _pixman_log_error (FUNC, "The expression " # expr " was false"); \ - } \ - while (0) - -/* - * Matrix - */ - -typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t; - -PIXMAN_EXPORT -pixman_bool_t -pixman_transform_point_31_16 (const pixman_transform_t *t, - const pixman_vector_48_16_t *v, - pixman_vector_48_16_t *result); - -PIXMAN_EXPORT -void -pixman_transform_point_31_16_3d (const pixman_transform_t *t, - const pixman_vector_48_16_t *v, - pixman_vector_48_16_t *result); - -PIXMAN_EXPORT -void -pixman_transform_point_31_16_affine (const pixman_transform_t *t, - const pixman_vector_48_16_t *v, - pixman_vector_48_16_t *result); - -/* - * Timers - */ - -#ifdef PIXMAN_TIMERS - -static inline uint64_t -oil_profile_stamp_rdtsc (void) -{ - uint32_t hi, lo; - - __asm__ __volatile__ ("rdtsc\n" : "=a" (lo), "=d" (hi)); - - return lo | (((uint64_t)hi) << 32); -} - -#define OIL_STAMP oil_profile_stamp_rdtsc - -typedef struct pixman_timer_t pixman_timer_t; - -struct pixman_timer_t -{ - int initialized; - const char * name; - uint64_t n_times; - uint64_t total; - pixman_timer_t *next; -}; - -extern int timer_defined; - -void pixman_timer_register (pixman_timer_t *timer); - -#define TIMER_BEGIN(tname) \ - { \ - static pixman_timer_t timer ## tname; \ - uint64_t begin ## tname; \ - \ - if (!timer ## tname.initialized) \ - { \ - timer ## tname.initialized = 1; \ - timer ## tname.name = # tname; \ - pixman_timer_register (&timer ## tname); \ - } \ - \ - timer ## tname.n_times++; \ - begin ## tname = OIL_STAMP (); - -#define TIMER_END(tname) \ - timer ## tname.total += OIL_STAMP () - begin ## tname; \ - } - -#else - -#define TIMER_BEGIN(tname) -#define TIMER_END(tname) - -#endif /* PIXMAN_TIMERS */ - -#endif /* __ASSEMBLER__ */ - -#endif /* PIXMAN_PRIVATE_H */ diff --git a/vendor/pixman/pixman/pixman-radial-gradient.c b/vendor/pixman/pixman/pixman-radial-gradient.c deleted file mode 100644 index 38e1052f3..000000000 --- a/vendor/pixman/pixman/pixman-radial-gradient.c +++ /dev/null @@ -1,509 +0,0 @@ -/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ -/* - * - * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. - * Copyright © 2000 SuSE, Inc. - * 2005 Lars Knoll & Zack Rusin, Trolltech - * Copyright © 2007 Red Hat, Inc. - * - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif -#include -#include -#include "pixman-private.h" - -static inline pixman_fixed_32_32_t -dot (pixman_fixed_48_16_t x1, - pixman_fixed_48_16_t y1, - pixman_fixed_48_16_t z1, - pixman_fixed_48_16_t x2, - pixman_fixed_48_16_t y2, - pixman_fixed_48_16_t z2) -{ - /* - * Exact computation, assuming that the input values can - * be represented as pixman_fixed_16_16_t - */ - return x1 * x2 + y1 * y2 + z1 * z2; -} - -static inline double -fdot (double x1, - double y1, - double z1, - double x2, - double y2, - double z2) -{ - /* - * Error can be unbound in some special cases. - * Using clever dot product algorithms (for example compensated - * dot product) would improve this but make the code much less - * obvious - */ - return x1 * x2 + y1 * y2 + z1 * z2; -} - -static void -radial_write_color (double a, - double b, - double c, - double inva, - double dr, - double mindr, - pixman_gradient_walker_t *walker, - pixman_repeat_t repeat, - int Bpp, - pixman_gradient_walker_write_t write_pixel, - uint32_t *buffer) -{ - /* - * In this function error propagation can lead to bad results: - * - discr can have an unbound error (if b*b-a*c is very small), - * potentially making it the opposite sign of what it should have been - * (thus clearing a pixel that would have been colored or vice-versa) - * or propagating the error to sqrtdiscr; - * if discr has the wrong sign or b is very small, this can lead to bad - * results - * - * - the algorithm used to compute the solutions of the quadratic - * equation is not numerically stable (but saves one division compared - * to the numerically stable one); - * this can be a problem if a*c is much smaller than b*b - * - * - the above problems are worse if a is small (as inva becomes bigger) - */ - double discr; - - if (a == 0) - { - double t; - - if (b == 0) - { - memset (buffer, 0, Bpp); - return; - } - - t = pixman_fixed_1 / 2 * c / b; - if (repeat == PIXMAN_REPEAT_NONE) - { - if (0 <= t && t <= pixman_fixed_1) - { - write_pixel (walker, t, buffer); - return; - } - } - else - { - if (t * dr >= mindr) - { - write_pixel (walker, t, buffer); - return; - } - } - - memset (buffer, 0, Bpp); - return; - } - - discr = fdot (b, a, 0, b, -c, 0); - if (discr >= 0) - { - double sqrtdiscr, t0, t1; - - sqrtdiscr = sqrt (discr); - t0 = (b + sqrtdiscr) * inva; - t1 = (b - sqrtdiscr) * inva; - - /* - * The root that must be used is the biggest one that belongs - * to the valid range ([0,1] for PIXMAN_REPEAT_NONE, any - * solution that results in a positive radius otherwise). - * - * If a > 0, t0 is the biggest solution, so if it is valid, it - * is the correct result. - * - * If a < 0, only one of the solutions can be valid, so the - * order in which they are tested is not important. - */ - if (repeat == PIXMAN_REPEAT_NONE) - { - if (0 <= t0 && t0 <= pixman_fixed_1) - { - write_pixel (walker, t0, buffer); - return; - } - else if (0 <= t1 && t1 <= pixman_fixed_1) - { - write_pixel (walker, t1, buffer); - return; - } - } - else - { - if (t0 * dr >= mindr) - { - write_pixel (walker, t0, buffer); - return; - } - else if (t1 * dr >= mindr) - { - write_pixel (walker, t1, buffer); - return; - } - } - } - - memset (buffer, 0, Bpp); - return; -} - -static uint32_t * -radial_get_scanline (pixman_iter_t *iter, - const uint32_t *mask, - int Bpp, - pixman_gradient_walker_write_t write_pixel) -{ - /* - * Implementation of radial gradients following the PDF specification. - * See section 8.7.4.5.4 Type 3 (Radial) Shadings of the PDF Reference - * Manual (PDF 32000-1:2008 at the time of this writing). - * - * In the radial gradient problem we are given two circles (c₁,r₁) and - * (c₂,r₂) that define the gradient itself. - * - * Mathematically the gradient can be defined as the family of circles - * - * ((1-t)·c₁ + t·(c₂), (1-t)·r₁ + t·r₂) - * - * excluding those circles whose radius would be < 0. When a point - * belongs to more than one circle, the one with a bigger t is the only - * one that contributes to its color. When a point does not belong - * to any of the circles, it is transparent black, i.e. RGBA (0, 0, 0, 0). - * Further limitations on the range of values for t are imposed when - * the gradient is not repeated, namely t must belong to [0,1]. - * - * The graphical result is the same as drawing the valid (radius > 0) - * circles with increasing t in [-inf, +inf] (or in [0,1] if the gradient - * is not repeated) using SOURCE operator composition. - * - * It looks like a cone pointing towards the viewer if the ending circle - * is smaller than the starting one, a cone pointing inside the page if - * the starting circle is the smaller one and like a cylinder if they - * have the same radius. - * - * What we actually do is, given the point whose color we are interested - * in, compute the t values for that point, solving for t in: - * - * length((1-t)·c₁ + t·(c₂) - p) = (1-t)·r₁ + t·r₂ - * - * Let's rewrite it in a simpler way, by defining some auxiliary - * variables: - * - * cd = c₂ - c₁ - * pd = p - c₁ - * dr = r₂ - r₁ - * length(t·cd - pd) = r₁ + t·dr - * - * which actually means - * - * hypot(t·cdx - pdx, t·cdy - pdy) = r₁ + t·dr - * - * or - * - * ⎷((t·cdx - pdx)² + (t·cdy - pdy)²) = r₁ + t·dr. - * - * If we impose (as stated earlier) that r₁ + t·dr >= 0, it becomes: - * - * (t·cdx - pdx)² + (t·cdy - pdy)² = (r₁ + t·dr)² - * - * where we can actually expand the squares and solve for t: - * - * t²cdx² - 2t·cdx·pdx + pdx² + t²cdy² - 2t·cdy·pdy + pdy² = - * = r₁² + 2·r₁·t·dr + t²·dr² - * - * (cdx² + cdy² - dr²)t² - 2(cdx·pdx + cdy·pdy + r₁·dr)t + - * (pdx² + pdy² - r₁²) = 0 - * - * A = cdx² + cdy² - dr² - * B = pdx·cdx + pdy·cdy + r₁·dr - * C = pdx² + pdy² - r₁² - * At² - 2Bt + C = 0 - * - * The solutions (unless the equation degenerates because of A = 0) are: - * - * t = (B ± ⎷(B² - A·C)) / A - * - * The solution we are going to prefer is the bigger one, unless the - * radius associated to it is negative (or it falls outside the valid t - * range). - * - * Additional observations (useful for optimizations): - * A does not depend on p - * - * A < 0 <=> one of the two circles completely contains the other one - * <=> for every p, the radiuses associated with the two t solutions - * have opposite sign - */ - pixman_image_t *image = iter->image; - int x = iter->x; - int y = iter->y; - int width = iter->width; - uint32_t *buffer = iter->buffer; - - gradient_t *gradient = (gradient_t *)image; - radial_gradient_t *radial = (radial_gradient_t *)image; - uint32_t *end = buffer + width * (Bpp / 4); - pixman_gradient_walker_t walker; - pixman_vector_t v, unit; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - _pixman_gradient_walker_init (&walker, gradient, image->common.repeat); - - if (image->common.transform) - { - if (!pixman_transform_point_3d (image->common.transform, &v)) - return iter->buffer; - - unit.vector[0] = image->common.transform->matrix[0][0]; - unit.vector[1] = image->common.transform->matrix[1][0]; - unit.vector[2] = image->common.transform->matrix[2][0]; - } - else - { - unit.vector[0] = pixman_fixed_1; - unit.vector[1] = 0; - unit.vector[2] = 0; - } - - if (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1) - { - /* - * Given: - * - * t = (B ± ⎷(B² - A·C)) / A - * - * where - * - * A = cdx² + cdy² - dr² - * B = pdx·cdx + pdy·cdy + r₁·dr - * C = pdx² + pdy² - r₁² - * det = B² - A·C - * - * Since we have an affine transformation, we know that (pdx, pdy) - * increase linearly with each pixel, - * - * pdx = pdx₀ + n·ux, - * pdy = pdy₀ + n·uy, - * - * we can then express B, C and det through multiple differentiation. - */ - pixman_fixed_32_32_t b, db, c, dc, ddc; - - /* warning: this computation may overflow */ - v.vector[0] -= radial->c1.x; - v.vector[1] -= radial->c1.y; - - /* - * B and C are computed and updated exactly. - * If fdot was used instead of dot, in the worst case it would - * lose 11 bits of precision in each of the multiplication and - * summing up would zero out all the bit that were preserved, - * thus making the result 0 instead of the correct one. - * This would mean a worst case of unbound relative error or - * about 2^10 absolute error - */ - b = dot (v.vector[0], v.vector[1], radial->c1.radius, - radial->delta.x, radial->delta.y, radial->delta.radius); - db = dot (unit.vector[0], unit.vector[1], 0, - radial->delta.x, radial->delta.y, 0); - - c = dot (v.vector[0], v.vector[1], - -((pixman_fixed_48_16_t) radial->c1.radius), - v.vector[0], v.vector[1], radial->c1.radius); - dc = dot (2 * (pixman_fixed_48_16_t) v.vector[0] + unit.vector[0], - 2 * (pixman_fixed_48_16_t) v.vector[1] + unit.vector[1], - 0, - unit.vector[0], unit.vector[1], 0); - ddc = 2 * dot (unit.vector[0], unit.vector[1], 0, - unit.vector[0], unit.vector[1], 0); - - while (buffer < end) - { - if (!mask || *mask++) - { - radial_write_color (radial->a, b, c, - radial->inva, - radial->delta.radius, - radial->mindr, - &walker, - image->common.repeat, - Bpp, - write_pixel, - buffer); - } - - b += db; - c += dc; - dc += ddc; - buffer += (Bpp / 4); - } - } - else - { - /* projective */ - /* Warning: - * error propagation guarantees are much looser than in the affine case - */ - while (buffer < end) - { - if (!mask || *mask++) - { - if (v.vector[2] != 0) - { - double pdx, pdy, invv2, b, c; - - invv2 = 1. * pixman_fixed_1 / v.vector[2]; - - pdx = v.vector[0] * invv2 - radial->c1.x; - /* / pixman_fixed_1 */ - - pdy = v.vector[1] * invv2 - radial->c1.y; - /* / pixman_fixed_1 */ - - b = fdot (pdx, pdy, radial->c1.radius, - radial->delta.x, radial->delta.y, - radial->delta.radius); - /* / pixman_fixed_1 / pixman_fixed_1 */ - - c = fdot (pdx, pdy, -radial->c1.radius, - pdx, pdy, radial->c1.radius); - /* / pixman_fixed_1 / pixman_fixed_1 */ - - radial_write_color (radial->a, b, c, - radial->inva, - radial->delta.radius, - radial->mindr, - &walker, - image->common.repeat, - Bpp, - write_pixel, - buffer); - } - else - { - memset (buffer, 0, Bpp); - } - } - - buffer += (Bpp / 4); - - v.vector[0] += unit.vector[0]; - v.vector[1] += unit.vector[1]; - v.vector[2] += unit.vector[2]; - } - } - - iter->y++; - return iter->buffer; -} - -static uint32_t * -radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) -{ - return radial_get_scanline (iter, mask, 4, - _pixman_gradient_walker_write_narrow); -} - -static uint32_t * -radial_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) -{ - return radial_get_scanline (iter, NULL, 16, - _pixman_gradient_walker_write_wide); -} - -void -_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter) -{ - if (iter->iter_flags & ITER_NARROW) - iter->get_scanline = radial_get_scanline_narrow; - else - iter->get_scanline = radial_get_scanline_wide; -} - -PIXMAN_EXPORT pixman_image_t * -pixman_image_create_radial_gradient (const pixman_point_fixed_t * inner, - const pixman_point_fixed_t * outer, - pixman_fixed_t inner_radius, - pixman_fixed_t outer_radius, - const pixman_gradient_stop_t *stops, - int n_stops) -{ - pixman_image_t *image; - radial_gradient_t *radial; - - image = _pixman_image_allocate (); - - if (!image) - return NULL; - - radial = &image->radial; - - if (!_pixman_init_gradient (&radial->common, stops, n_stops)) - { - free (image); - return NULL; - } - - image->type = RADIAL; - - radial->c1.x = inner->x; - radial->c1.y = inner->y; - radial->c1.radius = inner_radius; - radial->c2.x = outer->x; - radial->c2.y = outer->y; - radial->c2.radius = outer_radius; - - /* warning: this computations may overflow */ - radial->delta.x = radial->c2.x - radial->c1.x; - radial->delta.y = radial->c2.y - radial->c1.y; - radial->delta.radius = radial->c2.radius - radial->c1.radius; - - /* computed exactly, then cast to double -> every bit of the double - representation is correct (53 bits) */ - radial->a = dot (radial->delta.x, radial->delta.y, -radial->delta.radius, - radial->delta.x, radial->delta.y, radial->delta.radius); - if (radial->a != 0) - radial->inva = 1. * pixman_fixed_1 / radial->a; - - radial->mindr = -1. * pixman_fixed_1 * radial->c1.radius; - - return image; -} diff --git a/vendor/pixman/pixman/pixman-region.c b/vendor/pixman/pixman/pixman-region.c deleted file mode 100644 index 537d5fbe4..000000000 --- a/vendor/pixman/pixman/pixman-region.c +++ /dev/null @@ -1,2800 +0,0 @@ -/* - * Copyright 1987, 1988, 1989, 1998 The Open Group - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation. - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Except as contained in this notice, the name of The Open Group shall not be - * used in advertising or otherwise to promote the sale, use or other dealings - * in this Software without prior written authorization from The Open Group. - * - * Copyright 1987, 1988, 1989 by - * Digital Equipment Corporation, Maynard, Massachusetts. - * - * All Rights Reserved - * - * Permission to use, copy, modify, and distribute this software and its - * documentation for any purpose and without fee is hereby granted, - * provided that the above copyright notice appear in all copies and that - * both that copyright notice and this permission notice appear in - * supporting documentation, and that the name of Digital not be - * used in advertising or publicity pertaining to distribution of the - * software without specific, written prior permission. - * - * DIGITAL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING - * ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL - * DIGITAL BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR - * ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, - * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, - * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Copyright © 1998 Keith Packard - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - */ - -#include -#include -#include -#include -#include "pixman-private.h" - -#define PIXREGION_NIL(reg) ((reg)->data && !(reg)->data->numRects) -/* not a region */ -#define PIXREGION_NAR(reg) ((reg)->data == pixman_broken_data) -#define PIXREGION_NUMRECTS(reg) ((reg)->data ? (reg)->data->numRects : 1) -#define PIXREGION_SIZE(reg) ((reg)->data ? (reg)->data->size : 0) -#define PIXREGION_RECTS(reg) \ - ((reg)->data ? (box_type_t *)((reg)->data + 1) \ - : (box_type_t *)&(reg)->extents) -#define PIXREGION_BOXPTR(reg) ((box_type_t *)((reg)->data + 1)) -#define PIXREGION_BOX(reg, i) (&PIXREGION_BOXPTR (reg)[i]) -#define PIXREGION_TOP(reg) PIXREGION_BOX (reg, (reg)->data->numRects) -#define PIXREGION_END(reg) PIXREGION_BOX (reg, (reg)->data->numRects - 1) - -#define GOOD_RECT(rect) ((rect)->x1 < (rect)->x2 && (rect)->y1 < (rect)->y2) -#define BAD_RECT(rect) ((rect)->x1 > (rect)->x2 || (rect)->y1 > (rect)->y2) - -#ifdef DEBUG - -#define GOOD(reg) \ - do \ - { \ - if (!PREFIX (_selfcheck (reg))) \ - _pixman_log_error (FUNC, "Malformed region " # reg); \ - } while (0) - -#else - -#define GOOD(reg) - -#endif - -static const box_type_t PREFIX (_empty_box_) = { 0, 0, 0, 0 }; -static const region_data_type_t PREFIX (_empty_data_) = { 0, 0 }; -#if defined (__llvm__) && !defined (__clang__) -static const volatile region_data_type_t PREFIX (_broken_data_) = { 0, 0 }; -#else -static const region_data_type_t PREFIX (_broken_data_) = { 0, 0 }; -#endif - -static box_type_t *pixman_region_empty_box = - (box_type_t *)&PREFIX (_empty_box_); -static region_data_type_t *pixman_region_empty_data = - (region_data_type_t *)&PREFIX (_empty_data_); -static region_data_type_t *pixman_broken_data = - (region_data_type_t *)&PREFIX (_broken_data_); - -static pixman_bool_t -pixman_break (region_type_t *region); - -/* - * The functions in this file implement the Region abstraction used extensively - * throughout the X11 sample server. A Region is simply a set of disjoint - * (non-overlapping) rectangles, plus an "extent" rectangle which is the - * smallest single rectangle that contains all the non-overlapping rectangles. - * - * A Region is implemented as a "y-x-banded" array of rectangles. This array - * imposes two degrees of order. First, all rectangles are sorted by top side - * y coordinate first (y1), and then by left side x coordinate (x1). - * - * Furthermore, the rectangles are grouped into "bands". Each rectangle in a - * band has the same top y coordinate (y1), and each has the same bottom y - * coordinate (y2). Thus all rectangles in a band differ only in their left - * and right side (x1 and x2). Bands are implicit in the array of rectangles: - * there is no separate list of band start pointers. - * - * The y-x band representation does not minimize rectangles. In particular, - * if a rectangle vertically crosses a band (the rectangle has scanlines in - * the y1 to y2 area spanned by the band), then the rectangle may be broken - * down into two or more smaller rectangles stacked one atop the other. - * - * ----------- ----------- - * | | | | band 0 - * | | -------- ----------- -------- - * | | | | in y-x banded | | | | band 1 - * | | | | form is | | | | - * ----------- | | ----------- -------- - * | | | | band 2 - * -------- -------- - * - * An added constraint on the rectangles is that they must cover as much - * horizontal area as possible: no two rectangles within a band are allowed - * to touch. - * - * Whenever possible, bands will be merged together to cover a greater vertical - * distance (and thus reduce the number of rectangles). Two bands can be merged - * only if the bottom of one touches the top of the other and they have - * rectangles in the same places (of the same width, of course). - * - * Adam de Boor wrote most of the original region code. Joel McCormack - * substantially modified or rewrote most of the core arithmetic routines, and - * added pixman_region_validate in order to support several speed improvements - * to pixman_region_validate_tree. Bob Scheifler changed the representation - * to be more compact when empty or a single rectangle, and did a bunch of - * gratuitous reformatting. Carl Worth did further gratuitous reformatting - * while re-merging the server and client region code into libpixregion. - * Soren Sandmann did even more gratuitous reformatting. - */ - -/* true iff two Boxes overlap */ -#define EXTENTCHECK(r1, r2) \ - (!( ((r1)->x2 <= (r2)->x1) || \ - ((r1)->x1 >= (r2)->x2) || \ - ((r1)->y2 <= (r2)->y1) || \ - ((r1)->y1 >= (r2)->y2) ) ) - -/* true iff (x,y) is in Box */ -#define INBOX(r, x, y) \ - ( ((r)->x2 > x) && \ - ((r)->x1 <= x) && \ - ((r)->y2 > y) && \ - ((r)->y1 <= y) ) - -/* true iff Box r1 contains Box r2 */ -#define SUBSUMES(r1, r2) \ - ( ((r1)->x1 <= (r2)->x1) && \ - ((r1)->x2 >= (r2)->x2) && \ - ((r1)->y1 <= (r2)->y1) && \ - ((r1)->y2 >= (r2)->y2) ) - -static size_t -PIXREGION_SZOF (size_t n) -{ - size_t size = n * sizeof(box_type_t); - - if (n > UINT32_MAX / sizeof(box_type_t)) - return 0; - - if (sizeof(region_data_type_t) > UINT32_MAX - size) - return 0; - - return size + sizeof(region_data_type_t); -} - -static region_data_type_t * -alloc_data (size_t n) -{ - size_t sz = PIXREGION_SZOF (n); - - if (!sz) - return NULL; - - return malloc (sz); -} - -#define FREE_DATA(reg) if ((reg)->data && (reg)->data->size) free ((reg)->data) - -#define RECTALLOC_BAIL(region, n, bail) \ - do \ - { \ - if (!(region)->data || \ - (((region)->data->numRects + (n)) > (region)->data->size)) \ - { \ - if (!pixman_rect_alloc (region, n)) \ - goto bail; \ - } \ - } while (0) - -#define RECTALLOC(region, n) \ - do \ - { \ - if (!(region)->data || \ - (((region)->data->numRects + (n)) > (region)->data->size)) \ - { \ - if (!pixman_rect_alloc (region, n)) { \ - return FALSE; \ - } \ - } \ - } while (0) - -#define ADDRECT(next_rect, nx1, ny1, nx2, ny2) \ - do \ - { \ - next_rect->x1 = nx1; \ - next_rect->y1 = ny1; \ - next_rect->x2 = nx2; \ - next_rect->y2 = ny2; \ - next_rect++; \ - } \ - while (0) - -#define NEWRECT(region, next_rect, nx1, ny1, nx2, ny2) \ - do \ - { \ - if (!(region)->data || \ - ((region)->data->numRects == (region)->data->size)) \ - { \ - if (!pixman_rect_alloc (region, 1)) \ - return FALSE; \ - next_rect = PIXREGION_TOP (region); \ - } \ - ADDRECT (next_rect, nx1, ny1, nx2, ny2); \ - region->data->numRects++; \ - critical_if_fail (region->data->numRects <= region->data->size); \ - } while (0) - -#define DOWNSIZE(reg, numRects) \ - do \ - { \ - if (((numRects) < ((reg)->data->size >> 1)) && \ - ((reg)->data->size > 50)) \ - { \ - region_data_type_t * new_data; \ - size_t data_size = PIXREGION_SZOF (numRects); \ - \ - if (!data_size) \ - { \ - new_data = NULL; \ - } \ - else \ - { \ - new_data = (region_data_type_t *) \ - realloc ((reg)->data, data_size); \ - } \ - \ - if (new_data) \ - { \ - new_data->size = (numRects); \ - (reg)->data = new_data; \ - } \ - } \ - } while (0) - -PIXMAN_EXPORT pixman_bool_t -PREFIX (_equal) (const region_type_t *reg1, const region_type_t *reg2) -{ - int i; - box_type_t *rects1; - box_type_t *rects2; - - if (reg1->extents.x1 != reg2->extents.x1) - return FALSE; - - if (reg1->extents.x2 != reg2->extents.x2) - return FALSE; - - if (reg1->extents.y1 != reg2->extents.y1) - return FALSE; - - if (reg1->extents.y2 != reg2->extents.y2) - return FALSE; - - if (PIXREGION_NUMRECTS (reg1) != PIXREGION_NUMRECTS (reg2)) - return FALSE; - - rects1 = PIXREGION_RECTS (reg1); - rects2 = PIXREGION_RECTS (reg2); - - for (i = 0; i != PIXREGION_NUMRECTS (reg1); i++) - { - if (rects1[i].x1 != rects2[i].x1) - return FALSE; - - if (rects1[i].x2 != rects2[i].x2) - return FALSE; - - if (rects1[i].y1 != rects2[i].y1) - return FALSE; - - if (rects1[i].y2 != rects2[i].y2) - return FALSE; - } - - return TRUE; -} - -int -PREFIX (_print) (region_type_t *rgn) -{ - int num, size; - int i; - box_type_t * rects; - - num = PIXREGION_NUMRECTS (rgn); - size = PIXREGION_SIZE (rgn); - rects = PIXREGION_RECTS (rgn); - - fprintf (stderr, "num: %d size: %d\n", num, size); - fprintf (stderr, "extents: %d %d %d %d\n", - rgn->extents.x1, - rgn->extents.y1, - rgn->extents.x2, - rgn->extents.y2); - - for (i = 0; i < num; i++) - { - fprintf (stderr, "%d %d %d %d \n", - rects[i].x1, rects[i].y1, rects[i].x2, rects[i].y2); - } - - fprintf (stderr, "\n"); - - return(num); -} - - -PIXMAN_EXPORT void -PREFIX (_init) (region_type_t *region) -{ - region->extents = *pixman_region_empty_box; - region->data = pixman_region_empty_data; -} - -PIXMAN_EXPORT void -PREFIX (_init_rect) (region_type_t * region, - int x, - int y, - unsigned int width, - unsigned int height) -{ - region->extents.x1 = x; - region->extents.y1 = y; - region->extents.x2 = x + width; - region->extents.y2 = y + height; - - if (!GOOD_RECT (®ion->extents)) - { - if (BAD_RECT (®ion->extents)) - _pixman_log_error (FUNC, "Invalid rectangle passed"); - PREFIX (_init) (region); - return; - } - - region->data = NULL; -} - -PIXMAN_EXPORT void -PREFIX (_init_with_extents) (region_type_t *region, const box_type_t *extents) -{ - if (!GOOD_RECT (extents)) - { - if (BAD_RECT (extents)) - _pixman_log_error (FUNC, "Invalid rectangle passed"); - PREFIX (_init) (region); - return; - } - region->extents = *extents; - - region->data = NULL; -} - -PIXMAN_EXPORT void -PREFIX (_fini) (region_type_t *region) -{ - GOOD (region); - FREE_DATA (region); -} - -PIXMAN_EXPORT int -PREFIX (_n_rects) (const region_type_t *region) -{ - return PIXREGION_NUMRECTS (region); -} - -PIXMAN_EXPORT box_type_t * -PREFIX (_rectangles) (const region_type_t *region, - int *n_rects) -{ - if (n_rects) - *n_rects = PIXREGION_NUMRECTS (region); - - return PIXREGION_RECTS (region); -} - -static pixman_bool_t -pixman_break (region_type_t *region) -{ - FREE_DATA (region); - - region->extents = *pixman_region_empty_box; - region->data = pixman_broken_data; - - return FALSE; -} - -static pixman_bool_t -pixman_rect_alloc (region_type_t * region, - int n) -{ - region_data_type_t *data; - - if (!region->data) - { - n++; - region->data = alloc_data (n); - - if (!region->data) - return pixman_break (region); - - region->data->numRects = 1; - *PIXREGION_BOXPTR (region) = region->extents; - } - else if (!region->data->size) - { - region->data = alloc_data (n); - - if (!region->data) - return pixman_break (region); - - region->data->numRects = 0; - } - else - { - size_t data_size; - - if (n == 1) - { - n = region->data->numRects; - if (n > 500) /* XXX pick numbers out of a hat */ - n = 250; - } - - n += region->data->numRects; - data_size = PIXREGION_SZOF (n); - - if (!data_size) - { - data = NULL; - } - else - { - data = (region_data_type_t *) - realloc (region->data, PIXREGION_SZOF (n)); - } - - if (!data) - return pixman_break (region); - - region->data = data; - } - - region->data->size = n; - - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t -PREFIX (_copy) (region_type_t *dst, const region_type_t *src) -{ - GOOD (dst); - GOOD (src); - - if (dst == src) - return TRUE; - - dst->extents = src->extents; - - if (!src->data || !src->data->size) - { - FREE_DATA (dst); - dst->data = src->data; - return TRUE; - } - - if (!dst->data || (dst->data->size < src->data->numRects)) - { - FREE_DATA (dst); - - dst->data = alloc_data (src->data->numRects); - - if (!dst->data) - return pixman_break (dst); - - dst->data->size = src->data->numRects; - } - - dst->data->numRects = src->data->numRects; - - memmove ((char *)PIXREGION_BOXPTR (dst), (char *)PIXREGION_BOXPTR (src), - dst->data->numRects * sizeof(box_type_t)); - - return TRUE; -} - -/*====================================================================== - * Generic Region Operator - *====================================================================*/ - -/*- - *----------------------------------------------------------------------- - * pixman_coalesce -- - * Attempt to merge the boxes in the current band with those in the - * previous one. We are guaranteed that the current band extends to - * the end of the rects array. Used only by pixman_op. - * - * Results: - * The new index for the previous band. - * - * Side Effects: - * If coalescing takes place: - * - rectangles in the previous band will have their y2 fields - * altered. - * - region->data->numRects will be decreased. - * - *----------------------------------------------------------------------- - */ -static inline int -pixman_coalesce (region_type_t * region, /* Region to coalesce */ - int prev_start, /* Index of start of previous band */ - int cur_start) /* Index of start of current band */ -{ - box_type_t *prev_box; /* Current box in previous band */ - box_type_t *cur_box; /* Current box in current band */ - int numRects; /* Number rectangles in both bands */ - int y2; /* Bottom of current band */ - - /* - * Figure out how many rectangles are in the band. - */ - numRects = cur_start - prev_start; - critical_if_fail (numRects == region->data->numRects - cur_start); - - if (!numRects) return cur_start; - - /* - * The bands may only be coalesced if the bottom of the previous - * matches the top scanline of the current. - */ - prev_box = PIXREGION_BOX (region, prev_start); - cur_box = PIXREGION_BOX (region, cur_start); - if (prev_box->y2 != cur_box->y1) return cur_start; - - /* - * Make sure the bands have boxes in the same places. This - * assumes that boxes have been added in such a way that they - * cover the most area possible. I.e. two boxes in a band must - * have some horizontal space between them. - */ - y2 = cur_box->y2; - - do - { - if ((prev_box->x1 != cur_box->x1) || (prev_box->x2 != cur_box->x2)) - return (cur_start); - - prev_box++; - cur_box++; - numRects--; - } - while (numRects); - - /* - * The bands may be merged, so set the bottom y of each box - * in the previous band to the bottom y of the current band. - */ - numRects = cur_start - prev_start; - region->data->numRects -= numRects; - - do - { - prev_box--; - prev_box->y2 = y2; - numRects--; - } - while (numRects); - - return prev_start; -} - -/* Quicky macro to avoid trivial reject procedure calls to pixman_coalesce */ - -#define COALESCE(new_reg, prev_band, cur_band) \ - do \ - { \ - if (cur_band - prev_band == new_reg->data->numRects - cur_band) \ - prev_band = pixman_coalesce (new_reg, prev_band, cur_band); \ - else \ - prev_band = cur_band; \ - } while (0) - -/*- - *----------------------------------------------------------------------- - * pixman_region_append_non_o -- - * Handle a non-overlapping band for the union and subtract operations. - * Just adds the (top/bottom-clipped) rectangles into the region. - * Doesn't have to check for subsumption or anything. - * - * Results: - * None. - * - * Side Effects: - * region->data->numRects is incremented and the rectangles overwritten - * with the rectangles we're passed. - * - *----------------------------------------------------------------------- - */ -static inline pixman_bool_t -pixman_region_append_non_o (region_type_t * region, - box_type_t * r, - box_type_t * r_end, - int y1, - int y2) -{ - box_type_t *next_rect; - int new_rects; - - new_rects = r_end - r; - - critical_if_fail (y1 < y2); - critical_if_fail (new_rects != 0); - - /* Make sure we have enough space for all rectangles to be added */ - RECTALLOC (region, new_rects); - next_rect = PIXREGION_TOP (region); - region->data->numRects += new_rects; - - do - { - critical_if_fail (r->x1 < r->x2); - ADDRECT (next_rect, r->x1, y1, r->x2, y2); - r++; - } - while (r != r_end); - - return TRUE; -} - -#define FIND_BAND(r, r_band_end, r_end, ry1) \ - do \ - { \ - ry1 = r->y1; \ - r_band_end = r + 1; \ - while ((r_band_end != r_end) && (r_band_end->y1 == ry1)) { \ - r_band_end++; \ - } \ - } while (0) - -#define APPEND_REGIONS(new_reg, r, r_end) \ - do \ - { \ - int new_rects; \ - if ((new_rects = r_end - r)) { \ - RECTALLOC_BAIL (new_reg, new_rects, bail); \ - memmove ((char *)PIXREGION_TOP (new_reg), (char *)r, \ - new_rects * sizeof(box_type_t)); \ - new_reg->data->numRects += new_rects; \ - } \ - } while (0) - -/*- - *----------------------------------------------------------------------- - * pixman_op -- - * Apply an operation to two regions. Called by pixman_region_union, pixman_region_inverse, - * pixman_region_subtract, pixman_region_intersect.... Both regions MUST have at least one - * rectangle, and cannot be the same object. - * - * Results: - * TRUE if successful. - * - * Side Effects: - * The new region is overwritten. - * overlap set to TRUE if overlap_func ever returns TRUE. - * - * Notes: - * The idea behind this function is to view the two regions as sets. - * Together they cover a rectangle of area that this function divides - * into horizontal bands where points are covered only by one region - * or by both. For the first case, the non_overlap_func is called with - * each the band and the band's upper and lower extents. For the - * second, the overlap_func is called to process the entire band. It - * is responsible for clipping the rectangles in the band, though - * this function provides the boundaries. - * At the end of each band, the new region is coalesced, if possible, - * to reduce the number of rectangles in the region. - * - *----------------------------------------------------------------------- - */ - -typedef pixman_bool_t (*overlap_proc_ptr) (region_type_t *region, - box_type_t * r1, - box_type_t * r1_end, - box_type_t * r2, - box_type_t * r2_end, - int y1, - int y2); - -static pixman_bool_t -pixman_op (region_type_t * new_reg, /* Place to store result */ - const region_type_t * reg1, /* First region in operation */ - const region_type_t * reg2, /* 2d region in operation */ - overlap_proc_ptr overlap_func, /* Function to call for over- - * lapping bands */ - int append_non1, /* Append non-overlapping bands - * in region 1 ? - */ - int append_non2 /* Append non-overlapping bands - * in region 2 ? - */ - ) -{ - box_type_t *r1; /* Pointer into first region */ - box_type_t *r2; /* Pointer into 2d region */ - box_type_t *r1_end; /* End of 1st region */ - box_type_t *r2_end; /* End of 2d region */ - int ybot; /* Bottom of intersection */ - int ytop; /* Top of intersection */ - region_data_type_t *old_data; /* Old data for new_reg */ - int prev_band; /* Index of start of - * previous band in new_reg */ - int cur_band; /* Index of start of current - * band in new_reg */ - box_type_t * r1_band_end; /* End of current band in r1 */ - box_type_t * r2_band_end; /* End of current band in r2 */ - int top; /* Top of non-overlapping band */ - int bot; /* Bottom of non-overlapping band*/ - int r1y1; /* Temps for r1->y1 and r2->y1 */ - int r2y1; - int new_size; - int numRects; - - /* - * Break any region computed from a broken region - */ - if (PIXREGION_NAR (reg1) || PIXREGION_NAR (reg2)) - return pixman_break (new_reg); - - /* - * Initialization: - * set r1, r2, r1_end and r2_end appropriately, save the rectangles - * of the destination region until the end in case it's one of - * the two source regions, then mark the "new" region empty, allocating - * another array of rectangles for it to use. - */ - - r1 = PIXREGION_RECTS (reg1); - new_size = PIXREGION_NUMRECTS (reg1); - r1_end = r1 + new_size; - - numRects = PIXREGION_NUMRECTS (reg2); - r2 = PIXREGION_RECTS (reg2); - r2_end = r2 + numRects; - - critical_if_fail (r1 != r1_end); - critical_if_fail (r2 != r2_end); - - old_data = (region_data_type_t *)NULL; - - if (((new_reg == reg1) && (new_size > 1)) || - ((new_reg == reg2) && (numRects > 1))) - { - old_data = new_reg->data; - new_reg->data = pixman_region_empty_data; - } - - /* guess at new size */ - if (numRects > new_size) - new_size = numRects; - - new_size <<= 1; - - if (!new_reg->data) - new_reg->data = pixman_region_empty_data; - else if (new_reg->data->size) - new_reg->data->numRects = 0; - - if (new_size > new_reg->data->size) - { - if (!pixman_rect_alloc (new_reg, new_size)) - { - free (old_data); - return FALSE; - } - } - - /* - * Initialize ybot. - * In the upcoming loop, ybot and ytop serve different functions depending - * on whether the band being handled is an overlapping or non-overlapping - * band. - * In the case of a non-overlapping band (only one of the regions - * has points in the band), ybot is the bottom of the most recent - * intersection and thus clips the top of the rectangles in that band. - * ytop is the top of the next intersection between the two regions and - * serves to clip the bottom of the rectangles in the current band. - * For an overlapping band (where the two regions intersect), ytop clips - * the top of the rectangles of both regions and ybot clips the bottoms. - */ - - ybot = MIN (r1->y1, r2->y1); - - /* - * prev_band serves to mark the start of the previous band so rectangles - * can be coalesced into larger rectangles. qv. pixman_coalesce, above. - * In the beginning, there is no previous band, so prev_band == cur_band - * (cur_band is set later on, of course, but the first band will always - * start at index 0). prev_band and cur_band must be indices because of - * the possible expansion, and resultant moving, of the new region's - * array of rectangles. - */ - prev_band = 0; - - do - { - /* - * This algorithm proceeds one source-band (as opposed to a - * destination band, which is determined by where the two regions - * intersect) at a time. r1_band_end and r2_band_end serve to mark the - * rectangle after the last one in the current band for their - * respective regions. - */ - critical_if_fail (r1 != r1_end); - critical_if_fail (r2 != r2_end); - - FIND_BAND (r1, r1_band_end, r1_end, r1y1); - FIND_BAND (r2, r2_band_end, r2_end, r2y1); - - /* - * First handle the band that doesn't intersect, if any. - * - * Note that attention is restricted to one band in the - * non-intersecting region at once, so if a region has n - * bands between the current position and the next place it overlaps - * the other, this entire loop will be passed through n times. - */ - if (r1y1 < r2y1) - { - if (append_non1) - { - top = MAX (r1y1, ybot); - bot = MIN (r1->y2, r2y1); - if (top != bot) - { - cur_band = new_reg->data->numRects; - if (!pixman_region_append_non_o (new_reg, r1, r1_band_end, top, bot)) - goto bail; - COALESCE (new_reg, prev_band, cur_band); - } - } - ytop = r2y1; - } - else if (r2y1 < r1y1) - { - if (append_non2) - { - top = MAX (r2y1, ybot); - bot = MIN (r2->y2, r1y1); - - if (top != bot) - { - cur_band = new_reg->data->numRects; - - if (!pixman_region_append_non_o (new_reg, r2, r2_band_end, top, bot)) - goto bail; - - COALESCE (new_reg, prev_band, cur_band); - } - } - ytop = r1y1; - } - else - { - ytop = r1y1; - } - - /* - * Now see if we've hit an intersecting band. The two bands only - * intersect if ybot > ytop - */ - ybot = MIN (r1->y2, r2->y2); - if (ybot > ytop) - { - cur_band = new_reg->data->numRects; - - if (!(*overlap_func)(new_reg, - r1, r1_band_end, - r2, r2_band_end, - ytop, ybot)) - { - goto bail; - } - - COALESCE (new_reg, prev_band, cur_band); - } - - /* - * If we've finished with a band (y2 == ybot) we skip forward - * in the region to the next band. - */ - if (r1->y2 == ybot) - r1 = r1_band_end; - - if (r2->y2 == ybot) - r2 = r2_band_end; - - } - while (r1 != r1_end && r2 != r2_end); - - /* - * Deal with whichever region (if any) still has rectangles left. - * - * We only need to worry about banding and coalescing for the very first - * band left. After that, we can just group all remaining boxes, - * regardless of how many bands, into one final append to the list. - */ - - if ((r1 != r1_end) && append_non1) - { - /* Do first non_overlap1Func call, which may be able to coalesce */ - FIND_BAND (r1, r1_band_end, r1_end, r1y1); - - cur_band = new_reg->data->numRects; - - if (!pixman_region_append_non_o (new_reg, - r1, r1_band_end, - MAX (r1y1, ybot), r1->y2)) - { - goto bail; - } - - COALESCE (new_reg, prev_band, cur_band); - - /* Just append the rest of the boxes */ - APPEND_REGIONS (new_reg, r1_band_end, r1_end); - } - else if ((r2 != r2_end) && append_non2) - { - /* Do first non_overlap2Func call, which may be able to coalesce */ - FIND_BAND (r2, r2_band_end, r2_end, r2y1); - - cur_band = new_reg->data->numRects; - - if (!pixman_region_append_non_o (new_reg, - r2, r2_band_end, - MAX (r2y1, ybot), r2->y2)) - { - goto bail; - } - - COALESCE (new_reg, prev_band, cur_band); - - /* Append rest of boxes */ - APPEND_REGIONS (new_reg, r2_band_end, r2_end); - } - - free (old_data); - - if (!(numRects = new_reg->data->numRects)) - { - FREE_DATA (new_reg); - new_reg->data = pixman_region_empty_data; - } - else if (numRects == 1) - { - new_reg->extents = *PIXREGION_BOXPTR (new_reg); - FREE_DATA (new_reg); - new_reg->data = (region_data_type_t *)NULL; - } - else - { - DOWNSIZE (new_reg, numRects); - } - - return TRUE; - -bail: - free (old_data); - - return pixman_break (new_reg); -} - -/*- - *----------------------------------------------------------------------- - * pixman_set_extents -- - * Reset the extents of a region to what they should be. Called by - * pixman_region_subtract and pixman_region_intersect as they can't - * figure it out along the way or do so easily, as pixman_region_union can. - * - * Results: - * None. - * - * Side Effects: - * The region's 'extents' structure is overwritten. - * - *----------------------------------------------------------------------- - */ -static void -pixman_set_extents (region_type_t *region) -{ - box_type_t *box, *box_end; - - if (!region->data) - return; - - if (!region->data->size) - { - region->extents.x2 = region->extents.x1; - region->extents.y2 = region->extents.y1; - return; - } - - box = PIXREGION_BOXPTR (region); - box_end = PIXREGION_END (region); - - /* - * Since box is the first rectangle in the region, it must have the - * smallest y1 and since box_end is the last rectangle in the region, - * it must have the largest y2, because of banding. Initialize x1 and - * x2 from box and box_end, resp., as good things to initialize them - * to... - */ - region->extents.x1 = box->x1; - region->extents.y1 = box->y1; - region->extents.x2 = box_end->x2; - region->extents.y2 = box_end->y2; - - critical_if_fail (region->extents.y1 < region->extents.y2); - - while (box <= box_end) - { - if (box->x1 < region->extents.x1) - region->extents.x1 = box->x1; - if (box->x2 > region->extents.x2) - region->extents.x2 = box->x2; - box++; - } - - critical_if_fail (region->extents.x1 < region->extents.x2); -} - -/*====================================================================== - * Region Intersection - *====================================================================*/ -/*- - *----------------------------------------------------------------------- - * pixman_region_intersect_o -- - * Handle an overlapping band for pixman_region_intersect. - * - * Results: - * TRUE if successful. - * - * Side Effects: - * Rectangles may be added to the region. - * - *----------------------------------------------------------------------- - */ -/*ARGSUSED*/ -static pixman_bool_t -pixman_region_intersect_o (region_type_t *region, - box_type_t * r1, - box_type_t * r1_end, - box_type_t * r2, - box_type_t * r2_end, - int y1, - int y2) -{ - int x1; - int x2; - box_type_t * next_rect; - - next_rect = PIXREGION_TOP (region); - - critical_if_fail (y1 < y2); - critical_if_fail (r1 != r1_end && r2 != r2_end); - - do - { - x1 = MAX (r1->x1, r2->x1); - x2 = MIN (r1->x2, r2->x2); - - /* - * If there's any overlap between the two rectangles, add that - * overlap to the new region. - */ - if (x1 < x2) - NEWRECT (region, next_rect, x1, y1, x2, y2); - - /* - * Advance the pointer(s) with the leftmost right side, since the next - * rectangle on that list may still overlap the other region's - * current rectangle. - */ - if (r1->x2 == x2) - { - r1++; - } - if (r2->x2 == x2) - { - r2++; - } - } - while ((r1 != r1_end) && (r2 != r2_end)); - - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t -PREFIX (_intersect) (region_type_t * new_reg, - const region_type_t * reg1, - const region_type_t * reg2) -{ - GOOD (reg1); - GOOD (reg2); - GOOD (new_reg); - - /* check for trivial reject */ - if (PIXREGION_NIL (reg1) || PIXREGION_NIL (reg2) || - !EXTENTCHECK (®1->extents, ®2->extents)) - { - /* Covers about 20% of all cases */ - FREE_DATA (new_reg); - new_reg->extents.x2 = new_reg->extents.x1; - new_reg->extents.y2 = new_reg->extents.y1; - if (PIXREGION_NAR (reg1) || PIXREGION_NAR (reg2)) - { - new_reg->data = pixman_broken_data; - return FALSE; - } - else - { - new_reg->data = pixman_region_empty_data; - } - } - else if (!reg1->data && !reg2->data) - { - /* Covers about 80% of cases that aren't trivially rejected */ - new_reg->extents.x1 = MAX (reg1->extents.x1, reg2->extents.x1); - new_reg->extents.y1 = MAX (reg1->extents.y1, reg2->extents.y1); - new_reg->extents.x2 = MIN (reg1->extents.x2, reg2->extents.x2); - new_reg->extents.y2 = MIN (reg1->extents.y2, reg2->extents.y2); - - FREE_DATA (new_reg); - - new_reg->data = (region_data_type_t *)NULL; - } - else if (!reg2->data && SUBSUMES (®2->extents, ®1->extents)) - { - return PREFIX (_copy) (new_reg, reg1); - } - else if (!reg1->data && SUBSUMES (®1->extents, ®2->extents)) - { - return PREFIX (_copy) (new_reg, reg2); - } - else if (reg1 == reg2) - { - return PREFIX (_copy) (new_reg, reg1); - } - else - { - /* General purpose intersection */ - - if (!pixman_op (new_reg, reg1, reg2, pixman_region_intersect_o, FALSE, FALSE)) - return FALSE; - - pixman_set_extents (new_reg); - } - - GOOD (new_reg); - return(TRUE); -} - -#define MERGERECT(r) \ - do \ - { \ - if (r->x1 <= x2) \ - { \ - /* Merge with current rectangle */ \ - if (x2 < r->x2) \ - x2 = r->x2; \ - } \ - else \ - { \ - /* Add current rectangle, start new one */ \ - NEWRECT (region, next_rect, x1, y1, x2, y2); \ - x1 = r->x1; \ - x2 = r->x2; \ - } \ - r++; \ - } while (0) - -/*====================================================================== - * Region Union - *====================================================================*/ - -/*- - *----------------------------------------------------------------------- - * pixman_region_union_o -- - * Handle an overlapping band for the union operation. Picks the - * left-most rectangle each time and merges it into the region. - * - * Results: - * TRUE if successful. - * - * Side Effects: - * region is overwritten. - * overlap is set to TRUE if any boxes overlap. - * - *----------------------------------------------------------------------- - */ -static pixman_bool_t -pixman_region_union_o (region_type_t *region, - box_type_t * r1, - box_type_t * r1_end, - box_type_t * r2, - box_type_t * r2_end, - int y1, - int y2) -{ - box_type_t *next_rect; - int x1; /* left and right side of current union */ - int x2; - - critical_if_fail (y1 < y2); - critical_if_fail (r1 != r1_end && r2 != r2_end); - - next_rect = PIXREGION_TOP (region); - - /* Start off current rectangle */ - if (r1->x1 < r2->x1) - { - x1 = r1->x1; - x2 = r1->x2; - r1++; - } - else - { - x1 = r2->x1; - x2 = r2->x2; - r2++; - } - while (r1 != r1_end && r2 != r2_end) - { - if (r1->x1 < r2->x1) - MERGERECT (r1); - else - MERGERECT (r2); - } - - /* Finish off whoever (if any) is left */ - if (r1 != r1_end) - { - do - { - MERGERECT (r1); - } - while (r1 != r1_end); - } - else if (r2 != r2_end) - { - do - { - MERGERECT (r2); - } - while (r2 != r2_end); - } - - /* Add current rectangle */ - NEWRECT (region, next_rect, x1, y1, x2, y2); - - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t -PREFIX(_intersect_rect) (region_type_t *dest, - const region_type_t *source, - int x, int y, - unsigned int width, - unsigned int height) -{ - region_type_t region; - - region.data = NULL; - region.extents.x1 = x; - region.extents.y1 = y; - region.extents.x2 = x + width; - region.extents.y2 = y + height; - - return PREFIX(_intersect) (dest, source, ®ion); -} - -/* Convenience function for performing union of region with a - * single rectangle - */ -PIXMAN_EXPORT pixman_bool_t -PREFIX (_union_rect) (region_type_t *dest, - const region_type_t *source, - int x, - int y, - unsigned int width, - unsigned int height) -{ - region_type_t region; - - region.extents.x1 = x; - region.extents.y1 = y; - region.extents.x2 = x + width; - region.extents.y2 = y + height; - - if (!GOOD_RECT (®ion.extents)) - { - if (BAD_RECT (®ion.extents)) - _pixman_log_error (FUNC, "Invalid rectangle passed"); - return PREFIX (_copy) (dest, source); - } - - region.data = NULL; - - return PREFIX (_union) (dest, source, ®ion); -} - -PIXMAN_EXPORT pixman_bool_t -PREFIX (_union) (region_type_t * new_reg, - const region_type_t *reg1, - const region_type_t *reg2) -{ - /* Return TRUE if some overlap - * between reg1, reg2 - */ - GOOD (reg1); - GOOD (reg2); - GOOD (new_reg); - - /* checks all the simple cases */ - - /* - * Region 1 and 2 are the same - */ - if (reg1 == reg2) - return PREFIX (_copy) (new_reg, reg1); - - /* - * Region 1 is empty - */ - if (PIXREGION_NIL (reg1)) - { - if (PIXREGION_NAR (reg1)) - return pixman_break (new_reg); - - if (new_reg != reg2) - return PREFIX (_copy) (new_reg, reg2); - - return TRUE; - } - - /* - * Region 2 is empty - */ - if (PIXREGION_NIL (reg2)) - { - if (PIXREGION_NAR (reg2)) - return pixman_break (new_reg); - - if (new_reg != reg1) - return PREFIX (_copy) (new_reg, reg1); - - return TRUE; - } - - /* - * Region 1 completely subsumes region 2 - */ - if (!reg1->data && SUBSUMES (®1->extents, ®2->extents)) - { - if (new_reg != reg1) - return PREFIX (_copy) (new_reg, reg1); - - return TRUE; - } - - /* - * Region 2 completely subsumes region 1 - */ - if (!reg2->data && SUBSUMES (®2->extents, ®1->extents)) - { - if (new_reg != reg2) - return PREFIX (_copy) (new_reg, reg2); - - return TRUE; - } - - if (!pixman_op (new_reg, reg1, reg2, pixman_region_union_o, TRUE, TRUE)) - return FALSE; - - new_reg->extents.x1 = MIN (reg1->extents.x1, reg2->extents.x1); - new_reg->extents.y1 = MIN (reg1->extents.y1, reg2->extents.y1); - new_reg->extents.x2 = MAX (reg1->extents.x2, reg2->extents.x2); - new_reg->extents.y2 = MAX (reg1->extents.y2, reg2->extents.y2); - - GOOD (new_reg); - - return TRUE; -} - -/*====================================================================== - * Batch Rectangle Union - *====================================================================*/ - -#define EXCHANGE_RECTS(a, b) \ - { \ - box_type_t t; \ - t = rects[a]; \ - rects[a] = rects[b]; \ - rects[b] = t; \ - } - -static void -quick_sort_rects ( - box_type_t rects[], - int numRects) -{ - int y1; - int x1; - int i, j; - box_type_t *r; - - /* Always called with numRects > 1 */ - - do - { - if (numRects == 2) - { - if (rects[0].y1 > rects[1].y1 || - (rects[0].y1 == rects[1].y1 && rects[0].x1 > rects[1].x1)) - { - EXCHANGE_RECTS (0, 1); - } - - return; - } - - /* Choose partition element, stick in location 0 */ - EXCHANGE_RECTS (0, numRects >> 1); - y1 = rects[0].y1; - x1 = rects[0].x1; - - /* Partition array */ - i = 0; - j = numRects; - - do - { - r = &(rects[i]); - do - { - r++; - i++; - } - while (i != numRects && (r->y1 < y1 || (r->y1 == y1 && r->x1 < x1))); - - r = &(rects[j]); - do - { - r--; - j--; - } - while (y1 < r->y1 || (y1 == r->y1 && x1 < r->x1)); - - if (i < j) - EXCHANGE_RECTS (i, j); - } - while (i < j); - - /* Move partition element back to middle */ - EXCHANGE_RECTS (0, j); - - /* Recurse */ - if (numRects - j - 1 > 1) - quick_sort_rects (&rects[j + 1], numRects - j - 1); - - numRects = j; - } - while (numRects > 1); -} - -/*- - *----------------------------------------------------------------------- - * pixman_region_validate -- - * - * Take a ``region'' which is a non-y-x-banded random collection of - * rectangles, and compute a nice region which is the union of all the - * rectangles. - * - * Results: - * TRUE if successful. - * - * Side Effects: - * The passed-in ``region'' may be modified. - * overlap set to TRUE if any retangles overlapped, - * else FALSE; - * - * Strategy: - * Step 1. Sort the rectangles into ascending order with primary key y1 - * and secondary key x1. - * - * Step 2. Split the rectangles into the minimum number of proper y-x - * banded regions. This may require horizontally merging - * rectangles, and vertically coalescing bands. With any luck, - * this step in an identity transformation (ala the Box widget), - * or a coalescing into 1 box (ala Menus). - * - * Step 3. Merge the separate regions down to a single region by calling - * pixman_region_union. Maximize the work each pixman_region_union call does by using - * a binary merge. - * - *----------------------------------------------------------------------- - */ - -static pixman_bool_t -validate (region_type_t * badreg) -{ - /* Descriptor for regions under construction in Step 2. */ - typedef struct - { - region_type_t reg; - int prev_band; - int cur_band; - } region_info_t; - - region_info_t stack_regions[64]; - - int numRects; /* Original numRects for badreg */ - region_info_t *ri; /* Array of current regions */ - int num_ri; /* Number of entries used in ri */ - int size_ri; /* Number of entries available in ri */ - int i; /* Index into rects */ - int j; /* Index into ri */ - region_info_t *rit; /* &ri[j] */ - region_type_t *reg; /* ri[j].reg */ - box_type_t *box; /* Current box in rects */ - box_type_t *ri_box; /* Last box in ri[j].reg */ - region_type_t *hreg; /* ri[j_half].reg */ - pixman_bool_t ret = TRUE; - - if (!badreg->data) - { - GOOD (badreg); - return TRUE; - } - - numRects = badreg->data->numRects; - if (!numRects) - { - if (PIXREGION_NAR (badreg)) - return FALSE; - GOOD (badreg); - return TRUE; - } - - if (badreg->extents.x1 < badreg->extents.x2) - { - if ((numRects) == 1) - { - FREE_DATA (badreg); - badreg->data = (region_data_type_t *) NULL; - } - else - { - DOWNSIZE (badreg, numRects); - } - - GOOD (badreg); - - return TRUE; - } - - /* Step 1: Sort the rects array into ascending (y1, x1) order */ - quick_sort_rects (PIXREGION_BOXPTR (badreg), numRects); - - /* Step 2: Scatter the sorted array into the minimum number of regions */ - - /* Set up the first region to be the first rectangle in badreg */ - /* Note that step 2 code will never overflow the ri[0].reg rects array */ - ri = stack_regions; - size_ri = sizeof (stack_regions) / sizeof (stack_regions[0]); - num_ri = 1; - ri[0].prev_band = 0; - ri[0].cur_band = 0; - ri[0].reg = *badreg; - box = PIXREGION_BOXPTR (&ri[0].reg); - ri[0].reg.extents = *box; - ri[0].reg.data->numRects = 1; - badreg->extents = *pixman_region_empty_box; - badreg->data = pixman_region_empty_data; - - /* Now scatter rectangles into the minimum set of valid regions. If the - * next rectangle to be added to a region would force an existing rectangle - * in the region to be split up in order to maintain y-x banding, just - * forget it. Try the next region. If it doesn't fit cleanly into any - * region, make a new one. - */ - - for (i = numRects; --i > 0;) - { - box++; - /* Look for a region to append box to */ - for (j = num_ri, rit = ri; --j >= 0; rit++) - { - reg = &rit->reg; - ri_box = PIXREGION_END (reg); - - if (box->y1 == ri_box->y1 && box->y2 == ri_box->y2) - { - /* box is in same band as ri_box. Merge or append it */ - if (box->x1 <= ri_box->x2) - { - /* Merge it with ri_box */ - if (box->x2 > ri_box->x2) - ri_box->x2 = box->x2; - } - else - { - RECTALLOC_BAIL (reg, 1, bail); - *PIXREGION_TOP (reg) = *box; - reg->data->numRects++; - } - - goto next_rect; /* So sue me */ - } - else if (box->y1 >= ri_box->y2) - { - /* Put box into new band */ - if (reg->extents.x2 < ri_box->x2) - reg->extents.x2 = ri_box->x2; - - if (reg->extents.x1 > box->x1) - reg->extents.x1 = box->x1; - - COALESCE (reg, rit->prev_band, rit->cur_band); - rit->cur_band = reg->data->numRects; - RECTALLOC_BAIL (reg, 1, bail); - *PIXREGION_TOP (reg) = *box; - reg->data->numRects++; - - goto next_rect; - } - /* Well, this region was inappropriate. Try the next one. */ - } /* for j */ - - /* Uh-oh. No regions were appropriate. Create a new one. */ - if (size_ri == num_ri) - { - size_t data_size; - - /* Oops, allocate space for new region information */ - size_ri <<= 1; - - data_size = size_ri * sizeof(region_info_t); - if (data_size / size_ri != sizeof(region_info_t)) - goto bail; - - if (ri == stack_regions) - { - rit = malloc (data_size); - if (!rit) - goto bail; - memcpy (rit, ri, num_ri * sizeof (region_info_t)); - } - else - { - rit = (region_info_t *) realloc (ri, data_size); - if (!rit) - goto bail; - } - ri = rit; - rit = &ri[num_ri]; - } - num_ri++; - rit->prev_band = 0; - rit->cur_band = 0; - rit->reg.extents = *box; - rit->reg.data = (region_data_type_t *)NULL; - - /* MUST force allocation */ - if (!pixman_rect_alloc (&rit->reg, (i + num_ri) / num_ri)) - goto bail; - - next_rect: ; - } /* for i */ - - /* Make a final pass over each region in order to COALESCE and set - * extents.x2 and extents.y2 - */ - for (j = num_ri, rit = ri; --j >= 0; rit++) - { - reg = &rit->reg; - ri_box = PIXREGION_END (reg); - reg->extents.y2 = ri_box->y2; - - if (reg->extents.x2 < ri_box->x2) - reg->extents.x2 = ri_box->x2; - - COALESCE (reg, rit->prev_band, rit->cur_band); - - if (reg->data->numRects == 1) /* keep unions happy below */ - { - FREE_DATA (reg); - reg->data = (region_data_type_t *)NULL; - } - } - - /* Step 3: Union all regions into a single region */ - while (num_ri > 1) - { - int half = num_ri / 2; - for (j = num_ri & 1; j < (half + (num_ri & 1)); j++) - { - reg = &ri[j].reg; - hreg = &ri[j + half].reg; - - if (!pixman_op (reg, reg, hreg, pixman_region_union_o, TRUE, TRUE)) - ret = FALSE; - - if (hreg->extents.x1 < reg->extents.x1) - reg->extents.x1 = hreg->extents.x1; - - if (hreg->extents.y1 < reg->extents.y1) - reg->extents.y1 = hreg->extents.y1; - - if (hreg->extents.x2 > reg->extents.x2) - reg->extents.x2 = hreg->extents.x2; - - if (hreg->extents.y2 > reg->extents.y2) - reg->extents.y2 = hreg->extents.y2; - - FREE_DATA (hreg); - } - - num_ri -= half; - - if (!ret) - goto bail; - } - - *badreg = ri[0].reg; - - if (ri != stack_regions) - free (ri); - - GOOD (badreg); - return ret; - -bail: - for (i = 0; i < num_ri; i++) - FREE_DATA (&ri[i].reg); - - if (ri != stack_regions) - free (ri); - - return pixman_break (badreg); -} - -/*====================================================================== - * Region Subtraction - *====================================================================*/ - -/*- - *----------------------------------------------------------------------- - * pixman_region_subtract_o -- - * Overlapping band subtraction. x1 is the left-most point not yet - * checked. - * - * Results: - * TRUE if successful. - * - * Side Effects: - * region may have rectangles added to it. - * - *----------------------------------------------------------------------- - */ -/*ARGSUSED*/ -static pixman_bool_t -pixman_region_subtract_o (region_type_t * region, - box_type_t * r1, - box_type_t * r1_end, - box_type_t * r2, - box_type_t * r2_end, - int y1, - int y2) -{ - box_type_t * next_rect; - int x1; - - x1 = r1->x1; - - critical_if_fail (y1 < y2); - critical_if_fail (r1 != r1_end && r2 != r2_end); - - next_rect = PIXREGION_TOP (region); - - do - { - if (r2->x2 <= x1) - { - /* - * Subtrahend entirely to left of minuend: go to next subtrahend. - */ - r2++; - } - else if (r2->x1 <= x1) - { - /* - * Subtrahend precedes minuend: nuke left edge of minuend. - */ - x1 = r2->x2; - if (x1 >= r1->x2) - { - /* - * Minuend completely covered: advance to next minuend and - * reset left fence to edge of new minuend. - */ - r1++; - if (r1 != r1_end) - x1 = r1->x1; - } - else - { - /* - * Subtrahend now used up since it doesn't extend beyond - * minuend - */ - r2++; - } - } - else if (r2->x1 < r1->x2) - { - /* - * Left part of subtrahend covers part of minuend: add uncovered - * part of minuend to region and skip to next subtrahend. - */ - critical_if_fail (x1 < r2->x1); - NEWRECT (region, next_rect, x1, y1, r2->x1, y2); - - x1 = r2->x2; - if (x1 >= r1->x2) - { - /* - * Minuend used up: advance to new... - */ - r1++; - if (r1 != r1_end) - x1 = r1->x1; - } - else - { - /* - * Subtrahend used up - */ - r2++; - } - } - else - { - /* - * Minuend used up: add any remaining piece before advancing. - */ - if (r1->x2 > x1) - NEWRECT (region, next_rect, x1, y1, r1->x2, y2); - - r1++; - - if (r1 != r1_end) - x1 = r1->x1; - } - } - while ((r1 != r1_end) && (r2 != r2_end)); - - /* - * Add remaining minuend rectangles to region. - */ - while (r1 != r1_end) - { - critical_if_fail (x1 < r1->x2); - - NEWRECT (region, next_rect, x1, y1, r1->x2, y2); - - r1++; - if (r1 != r1_end) - x1 = r1->x1; - } - return TRUE; -} - -/*- - *----------------------------------------------------------------------- - * pixman_region_subtract -- - * Subtract reg_s from reg_m and leave the result in reg_d. - * S stands for subtrahend, M for minuend and D for difference. - * - * Results: - * TRUE if successful. - * - * Side Effects: - * reg_d is overwritten. - * - *----------------------------------------------------------------------- - */ -PIXMAN_EXPORT pixman_bool_t -PREFIX (_subtract) (region_type_t * reg_d, - const region_type_t *reg_m, - const region_type_t *reg_s) -{ - GOOD (reg_m); - GOOD (reg_s); - GOOD (reg_d); - - /* check for trivial rejects */ - if (PIXREGION_NIL (reg_m) || PIXREGION_NIL (reg_s) || - !EXTENTCHECK (®_m->extents, ®_s->extents)) - { - if (PIXREGION_NAR (reg_s)) - return pixman_break (reg_d); - - return PREFIX (_copy) (reg_d, reg_m); - } - else if (reg_m == reg_s) - { - FREE_DATA (reg_d); - reg_d->extents.x2 = reg_d->extents.x1; - reg_d->extents.y2 = reg_d->extents.y1; - reg_d->data = pixman_region_empty_data; - - return TRUE; - } - - /* Add those rectangles in region 1 that aren't in region 2, - do yucky subtraction for overlaps, and - just throw away rectangles in region 2 that aren't in region 1 */ - if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE)) - return FALSE; - - /* - * Can't alter reg_d's extents before we call pixman_op because - * it might be one of the source regions and pixman_op depends - * on the extents of those regions being unaltered. Besides, this - * way there's no checking against rectangles that will be nuked - * due to coalescing, so we have to examine fewer rectangles. - */ - pixman_set_extents (reg_d); - GOOD (reg_d); - return TRUE; -} - -/*====================================================================== - * Region Inversion - *====================================================================*/ - -/*- - *----------------------------------------------------------------------- - * pixman_region_inverse -- - * Take a region and a box and return a region that is everything - * in the box but not in the region. The careful reader will note - * that this is the same as subtracting the region from the box... - * - * Results: - * TRUE. - * - * Side Effects: - * new_reg is overwritten. - * - *----------------------------------------------------------------------- - */ -PIXMAN_EXPORT pixman_bool_t -PREFIX (_inverse) (region_type_t * new_reg, /* Destination region */ - const region_type_t *reg1, /* Region to invert */ - const box_type_t * inv_rect) /* Bounding box for inversion */ -{ - region_type_t inv_reg; /* Quick and dirty region made from the - * bounding box */ - GOOD (reg1); - GOOD (new_reg); - - /* check for trivial rejects */ - if (PIXREGION_NIL (reg1) || !EXTENTCHECK (inv_rect, ®1->extents)) - { - if (PIXREGION_NAR (reg1)) - return pixman_break (new_reg); - - new_reg->extents = *inv_rect; - FREE_DATA (new_reg); - new_reg->data = (region_data_type_t *)NULL; - - return TRUE; - } - - /* Add those rectangles in region 1 that aren't in region 2, - * do yucky subtraction for overlaps, and - * just throw away rectangles in region 2 that aren't in region 1 - */ - inv_reg.extents = *inv_rect; - inv_reg.data = (region_data_type_t *)NULL; - if (!pixman_op (new_reg, &inv_reg, reg1, pixman_region_subtract_o, TRUE, FALSE)) - return FALSE; - - /* - * Can't alter new_reg's extents before we call pixman_op because - * it might be one of the source regions and pixman_op depends - * on the extents of those regions being unaltered. Besides, this - * way there's no checking against rectangles that will be nuked - * due to coalescing, so we have to examine fewer rectangles. - */ - pixman_set_extents (new_reg); - GOOD (new_reg); - return TRUE; -} - -/* In time O(log n), locate the first box whose y2 is greater than y. - * Return @end if no such box exists. - */ -static box_type_t * -find_box_for_y (box_type_t *begin, box_type_t *end, int y) -{ - box_type_t *mid; - - if (end == begin) - return end; - - if (end - begin == 1) - { - if (begin->y2 > y) - return begin; - else - return end; - } - - mid = begin + (end - begin) / 2; - if (mid->y2 > y) - { - /* If no box is found in [begin, mid], the function - * will return @mid, which is then known to be the - * correct answer. - */ - return find_box_for_y (begin, mid, y); - } - else - { - return find_box_for_y (mid, end, y); - } -} - -/* - * rect_in(region, rect) - * This routine takes a pointer to a region and a pointer to a box - * and determines if the box is outside/inside/partly inside the region. - * - * The idea is to travel through the list of rectangles trying to cover the - * passed box with them. Anytime a piece of the rectangle isn't covered - * by a band of rectangles, part_out is set TRUE. Any time a rectangle in - * the region covers part of the box, part_in is set TRUE. The process ends - * when either the box has been completely covered (we reached a band that - * doesn't overlap the box, part_in is TRUE and part_out is false), the - * box has been partially covered (part_in == part_out == TRUE -- because of - * the banding, the first time this is true we know the box is only - * partially in the region) or is outside the region (we reached a band - * that doesn't overlap the box at all and part_in is false) - */ -PIXMAN_EXPORT pixman_region_overlap_t -PREFIX (_contains_rectangle) (const region_type_t * region, - const box_type_t * prect) -{ - box_type_t * pbox; - box_type_t * pbox_end; - int part_in, part_out; - int numRects; - int x, y; - - GOOD (region); - - numRects = PIXREGION_NUMRECTS (region); - - /* useful optimization */ - if (!numRects || !EXTENTCHECK (®ion->extents, prect)) - return(PIXMAN_REGION_OUT); - - if (numRects == 1) - { - /* We know that it must be PIXMAN_REGION_IN or PIXMAN_REGION_PART */ - if (SUBSUMES (®ion->extents, prect)) - return(PIXMAN_REGION_IN); - else - return(PIXMAN_REGION_PART); - } - - part_out = FALSE; - part_in = FALSE; - - /* (x,y) starts at upper left of rect, moving to the right and down */ - x = prect->x1; - y = prect->y1; - - /* can stop when both part_out and part_in are TRUE, or we reach prect->y2 */ - for (pbox = PIXREGION_BOXPTR (region), pbox_end = pbox + numRects; - pbox != pbox_end; - pbox++) - { - /* getting up to speed or skipping remainder of band */ - if (pbox->y2 <= y) - { - if ((pbox = find_box_for_y (pbox, pbox_end, y)) == pbox_end) - break; - } - - if (pbox->y1 > y) - { - part_out = TRUE; /* missed part of rectangle above */ - if (part_in || (pbox->y1 >= prect->y2)) - break; - y = pbox->y1; /* x guaranteed to be == prect->x1 */ - } - - if (pbox->x2 <= x) - continue; /* not far enough over yet */ - - if (pbox->x1 > x) - { - part_out = TRUE; /* missed part of rectangle to left */ - if (part_in) - break; - } - - if (pbox->x1 < prect->x2) - { - part_in = TRUE; /* definitely overlap */ - if (part_out) - break; - } - - if (pbox->x2 >= prect->x2) - { - y = pbox->y2; /* finished with this band */ - if (y >= prect->y2) - break; - x = prect->x1; /* reset x out to left again */ - } - else - { - /* - * Because boxes in a band are maximal width, if the first box - * to overlap the rectangle doesn't completely cover it in that - * band, the rectangle must be partially out, since some of it - * will be uncovered in that band. part_in will have been set true - * by now... - */ - part_out = TRUE; - break; - } - } - - if (part_in) - { - if (y < prect->y2) - return PIXMAN_REGION_PART; - else - return PIXMAN_REGION_IN; - } - else - { - return PIXMAN_REGION_OUT; - } -} - -/* PREFIX(_translate) (region, x, y) - * translates in place - */ - -PIXMAN_EXPORT void -PREFIX (_translate) (region_type_t *region, int x, int y) -{ - overflow_int_t x1, x2, y1, y2; - int nbox; - box_type_t * pbox; - - GOOD (region); - region->extents.x1 = x1 = region->extents.x1 + x; - region->extents.y1 = y1 = region->extents.y1 + y; - region->extents.x2 = x2 = region->extents.x2 + x; - region->extents.y2 = y2 = region->extents.y2 + y; - - if (((x1 - PIXMAN_REGION_MIN) | (y1 - PIXMAN_REGION_MIN) | (PIXMAN_REGION_MAX - x2) | (PIXMAN_REGION_MAX - y2)) >= 0) - { - if (region->data && (nbox = region->data->numRects)) - { - for (pbox = PIXREGION_BOXPTR (region); nbox--; pbox++) - { - pbox->x1 += x; - pbox->y1 += y; - pbox->x2 += x; - pbox->y2 += y; - } - } - return; - } - - if (((x2 - PIXMAN_REGION_MIN) | (y2 - PIXMAN_REGION_MIN) | (PIXMAN_REGION_MAX - x1) | (PIXMAN_REGION_MAX - y1)) <= 0) - { - region->extents.x2 = region->extents.x1; - region->extents.y2 = region->extents.y1; - FREE_DATA (region); - region->data = pixman_region_empty_data; - return; - } - - if (x1 < PIXMAN_REGION_MIN) - region->extents.x1 = PIXMAN_REGION_MIN; - else if (x2 > PIXMAN_REGION_MAX) - region->extents.x2 = PIXMAN_REGION_MAX; - - if (y1 < PIXMAN_REGION_MIN) - region->extents.y1 = PIXMAN_REGION_MIN; - else if (y2 > PIXMAN_REGION_MAX) - region->extents.y2 = PIXMAN_REGION_MAX; - - if (region->data && (nbox = region->data->numRects)) - { - box_type_t * pbox_out; - - for (pbox_out = pbox = PIXREGION_BOXPTR (region); nbox--; pbox++) - { - pbox_out->x1 = x1 = pbox->x1 + x; - pbox_out->y1 = y1 = pbox->y1 + y; - pbox_out->x2 = x2 = pbox->x2 + x; - pbox_out->y2 = y2 = pbox->y2 + y; - - if (((x2 - PIXMAN_REGION_MIN) | (y2 - PIXMAN_REGION_MIN) | - (PIXMAN_REGION_MAX - x1) | (PIXMAN_REGION_MAX - y1)) <= 0) - { - region->data->numRects--; - continue; - } - - if (x1 < PIXMAN_REGION_MIN) - pbox_out->x1 = PIXMAN_REGION_MIN; - else if (x2 > PIXMAN_REGION_MAX) - pbox_out->x2 = PIXMAN_REGION_MAX; - - if (y1 < PIXMAN_REGION_MIN) - pbox_out->y1 = PIXMAN_REGION_MIN; - else if (y2 > PIXMAN_REGION_MAX) - pbox_out->y2 = PIXMAN_REGION_MAX; - - pbox_out++; - } - - if (pbox_out != pbox) - { - if (region->data->numRects == 1) - { - region->extents = *PIXREGION_BOXPTR (region); - FREE_DATA (region); - region->data = (region_data_type_t *)NULL; - } - else - { - pixman_set_extents (region); - } - } - } - - GOOD (region); -} - -PIXMAN_EXPORT void -PREFIX (_reset) (region_type_t *region, const box_type_t *box) -{ - GOOD (region); - - critical_if_fail (GOOD_RECT (box)); - - region->extents = *box; - - FREE_DATA (region); - - region->data = NULL; -} - -PIXMAN_EXPORT void -PREFIX (_clear) (region_type_t *region) -{ - GOOD (region); - FREE_DATA (region); - - region->extents = *pixman_region_empty_box; - region->data = pixman_region_empty_data; -} - -/* box is "return" value */ -PIXMAN_EXPORT int -PREFIX (_contains_point) (const region_type_t * region, - int x, int y, - box_type_t * box) -{ - box_type_t *pbox, *pbox_end; - int numRects; - - GOOD (region); - numRects = PIXREGION_NUMRECTS (region); - - if (!numRects || !INBOX (®ion->extents, x, y)) - return(FALSE); - - if (numRects == 1) - { - if (box) - *box = region->extents; - - return(TRUE); - } - - pbox = PIXREGION_BOXPTR (region); - pbox_end = pbox + numRects; - - pbox = find_box_for_y (pbox, pbox_end, y); - - for (;pbox != pbox_end; pbox++) - { - if ((y < pbox->y1) || (x < pbox->x1)) - break; /* missed it */ - - if (x >= pbox->x2) - continue; /* not there yet */ - - if (box) - *box = *pbox; - - return(TRUE); - } - - return(FALSE); -} - -PIXMAN_EXPORT int -PREFIX (_empty) (const region_type_t * region) -{ - GOOD (region); - - return(PIXREGION_NIL (region)); -} - -PIXMAN_EXPORT int -PREFIX (_not_empty) (const region_type_t * region) -{ - GOOD (region); - - return(!PIXREGION_NIL (region)); -} - -PIXMAN_EXPORT box_type_t * -PREFIX (_extents) (const region_type_t * region) -{ - GOOD (region); - - return(box_type_t *)(®ion->extents); -} - -/* - * Clip a list of scanlines to a region. The caller has allocated the - * space. FSorted is non-zero if the scanline origins are in ascending order. - * - * returns the number of new, clipped scanlines. - */ - -PIXMAN_EXPORT pixman_bool_t -PREFIX (_selfcheck) (region_type_t *reg) -{ - int i, numRects; - - if ((reg->extents.x1 > reg->extents.x2) || - (reg->extents.y1 > reg->extents.y2)) - { - return FALSE; - } - - numRects = PIXREGION_NUMRECTS (reg); - if (!numRects) - { - return ((reg->extents.x1 == reg->extents.x2) && - (reg->extents.y1 == reg->extents.y2) && - (reg->data->size || (reg->data == pixman_region_empty_data))); - } - else if (numRects == 1) - { - return (!reg->data); - } - else - { - box_type_t * pbox_p, * pbox_n; - box_type_t box; - - pbox_p = PIXREGION_RECTS (reg); - box = *pbox_p; - box.y2 = pbox_p[numRects - 1].y2; - pbox_n = pbox_p + 1; - - for (i = numRects; --i > 0; pbox_p++, pbox_n++) - { - if ((pbox_n->x1 >= pbox_n->x2) || - (pbox_n->y1 >= pbox_n->y2)) - { - return FALSE; - } - - if (pbox_n->x1 < box.x1) - box.x1 = pbox_n->x1; - - if (pbox_n->x2 > box.x2) - box.x2 = pbox_n->x2; - - if ((pbox_n->y1 < pbox_p->y1) || - ((pbox_n->y1 == pbox_p->y1) && - ((pbox_n->x1 < pbox_p->x2) || (pbox_n->y2 != pbox_p->y2)))) - { - return FALSE; - } - } - - return ((box.x1 == reg->extents.x1) && - (box.x2 == reg->extents.x2) && - (box.y1 == reg->extents.y1) && - (box.y2 == reg->extents.y2)); - } -} - -PIXMAN_EXPORT pixman_bool_t -PREFIX (_init_rects) (region_type_t *region, - const box_type_t *boxes, int count) -{ - box_type_t *rects; - int displacement; - int i; - - /* if it's 1, then we just want to set the extents, so call - * the existing method. */ - if (count == 1) - { - PREFIX (_init_rect) (region, - boxes[0].x1, - boxes[0].y1, - boxes[0].x2 - boxes[0].x1, - boxes[0].y2 - boxes[0].y1); - return TRUE; - } - - PREFIX (_init) (region); - - /* if it's 0, don't call pixman_rect_alloc -- 0 rectangles is - * a special case, and causing pixman_rect_alloc would cause - * us to leak memory (because the 0-rect case should be the - * static pixman_region_empty_data data). - */ - if (count == 0) - return TRUE; - - if (!pixman_rect_alloc (region, count)) - return FALSE; - - rects = PIXREGION_RECTS (region); - - /* Copy in the rects */ - memcpy (rects, boxes, sizeof(box_type_t) * count); - region->data->numRects = count; - - /* Eliminate empty and malformed rectangles */ - displacement = 0; - - for (i = 0; i < count; ++i) - { - box_type_t *box = &rects[i]; - - if (box->x1 >= box->x2 || box->y1 >= box->y2) - displacement++; - else if (displacement) - rects[i - displacement] = rects[i]; - } - - region->data->numRects -= displacement; - - /* If eliminating empty rectangles caused there - * to be only 0 or 1 rectangles, deal with that. - */ - if (region->data->numRects == 0) - { - FREE_DATA (region); - PREFIX (_init) (region); - - return TRUE; - } - - if (region->data->numRects == 1) - { - region->extents = rects[0]; - - FREE_DATA (region); - region->data = NULL; - - GOOD (region); - - return TRUE; - } - - /* Validate */ - region->extents.x1 = region->extents.x2 = 0; - - return validate (region); -} - -#define READ(_ptr) (*(_ptr)) - -static inline box_type_t * -bitmap_addrect (region_type_t *reg, - box_type_t *r, - box_type_t **first_rect, - int rx1, int ry1, - int rx2, int ry2) -{ - if ((rx1 < rx2) && (ry1 < ry2) && - (!(reg->data->numRects && - ((r-1)->y1 == ry1) && ((r-1)->y2 == ry2) && - ((r-1)->x1 <= rx1) && ((r-1)->x2 >= rx2)))) - { - if (reg->data->numRects == reg->data->size) - { - if (!pixman_rect_alloc (reg, 1)) - return NULL; - *first_rect = PIXREGION_BOXPTR(reg); - r = *first_rect + reg->data->numRects; - } - r->x1 = rx1; - r->y1 = ry1; - r->x2 = rx2; - r->y2 = ry2; - reg->data->numRects++; - if (r->x1 < reg->extents.x1) - reg->extents.x1 = r->x1; - if (r->x2 > reg->extents.x2) - reg->extents.x2 = r->x2; - r++; - } - return r; -} - -/* Convert bitmap clip mask into clipping region. - * First, goes through each line and makes boxes by noting the transitions - * from 0 to 1 and 1 to 0. - * Then it coalesces the current line with the previous if they have boxes - * at the same X coordinates. - * Stride is in number of uint32_t per line. - */ -PIXMAN_EXPORT void -PREFIX (_init_from_image) (region_type_t *region, - pixman_image_t *image) -{ - uint32_t mask0 = 0xffffffff & ~SCREEN_SHIFT_RIGHT(0xffffffff, 1); - box_type_t *first_rect, *rects, *prect_line_start; - box_type_t *old_rect, *new_rect; - uint32_t *pw, w, *pw_line, *pw_line_end; - int irect_prev_start, irect_line_start; - int h, base, rx1 = 0, crects; - int ib; - pixman_bool_t in_box, same; - int width, height, stride; - - PREFIX(_init) (region); - - critical_if_fail (region->data); - - return_if_fail (image->type == BITS); - return_if_fail (image->bits.format == PIXMAN_a1); - - pw_line = pixman_image_get_data (image); - width = pixman_image_get_width (image); - height = pixman_image_get_height (image); - stride = pixman_image_get_stride (image) / 4; - - first_rect = PIXREGION_BOXPTR(region); - rects = first_rect; - - region->extents.x1 = width - 1; - region->extents.x2 = 0; - irect_prev_start = -1; - for (h = 0; h < height; h++) - { - pw = pw_line; - pw_line += stride; - irect_line_start = rects - first_rect; - - /* If the Screen left most bit of the word is set, we're starting in - * a box */ - if (READ(pw) & mask0) - { - in_box = TRUE; - rx1 = 0; - } - else - { - in_box = FALSE; - } - - /* Process all words which are fully in the pixmap */ - pw_line_end = pw + (width >> 5); - for (base = 0; pw < pw_line_end; base += 32) - { - w = READ(pw++); - if (in_box) - { - if (!~w) - continue; - } - else - { - if (!w) - continue; - } - for (ib = 0; ib < 32; ib++) - { - /* If the Screen left most bit of the word is set, we're - * starting a box */ - if (w & mask0) - { - if (!in_box) - { - rx1 = base + ib; - /* start new box */ - in_box = TRUE; - } - } - else - { - if (in_box) - { - /* end box */ - rects = bitmap_addrect (region, rects, &first_rect, - rx1, h, base + ib, h + 1); - if (rects == NULL) - goto error; - in_box = FALSE; - } - } - /* Shift the word VISUALLY left one. */ - w = SCREEN_SHIFT_LEFT(w, 1); - } - } - - if (width & 31) - { - /* Process final partial word on line */ - w = READ(pw++); - for (ib = 0; ib < (width & 31); ib++) - { - /* If the Screen left most bit of the word is set, we're - * starting a box */ - if (w & mask0) - { - if (!in_box) - { - rx1 = base + ib; - /* start new box */ - in_box = TRUE; - } - } - else - { - if (in_box) - { - /* end box */ - rects = bitmap_addrect(region, rects, &first_rect, - rx1, h, base + ib, h + 1); - if (rects == NULL) - goto error; - in_box = FALSE; - } - } - /* Shift the word VISUALLY left one. */ - w = SCREEN_SHIFT_LEFT(w, 1); - } - } - /* If scanline ended with last bit set, end the box */ - if (in_box) - { - rects = bitmap_addrect(region, rects, &first_rect, - rx1, h, base + (width & 31), h + 1); - if (rects == NULL) - goto error; - } - /* if all rectangles on this line have the same x-coords as - * those on the previous line, then add 1 to all the previous y2s and - * throw away all the rectangles from this line - */ - same = FALSE; - if (irect_prev_start != -1) - { - crects = irect_line_start - irect_prev_start; - if (crects != 0 && - crects == ((rects - first_rect) - irect_line_start)) - { - old_rect = first_rect + irect_prev_start; - new_rect = prect_line_start = first_rect + irect_line_start; - same = TRUE; - while (old_rect < prect_line_start) - { - if ((old_rect->x1 != new_rect->x1) || - (old_rect->x2 != new_rect->x2)) - { - same = FALSE; - break; - } - old_rect++; - new_rect++; - } - if (same) - { - old_rect = first_rect + irect_prev_start; - while (old_rect < prect_line_start) - { - old_rect->y2 += 1; - old_rect++; - } - rects -= crects; - region->data->numRects -= crects; - } - } - } - if(!same) - irect_prev_start = irect_line_start; - } - if (!region->data->numRects) - { - region->extents.x1 = region->extents.x2 = 0; - } - else - { - region->extents.y1 = PIXREGION_BOXPTR(region)->y1; - region->extents.y2 = PIXREGION_END(region)->y2; - if (region->data->numRects == 1) - { - free (region->data); - region->data = NULL; - } - } - - error: - return; -} diff --git a/vendor/pixman/pixman/pixman-region16.c b/vendor/pixman/pixman/pixman-region16.c deleted file mode 100644 index da4719e7a..000000000 --- a/vendor/pixman/pixman/pixman-region16.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright © 2008 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software - * and its documentation for any purpose is hereby granted without - * fee, provided that the above copyright notice appear in all copies - * and that both that copyright notice and this permission notice - * appear in supporting documentation, and that the name of - * Red Hat, Inc. not be used in advertising or publicity pertaining to - * distribution of the software without specific, written prior - * permission. Red Hat, Inc. makes no representations about the - * suitability of this software for any purpose. It is provided "as - * is" without express or implied warranty. - * - * RED HAT, INC. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL RED HAT, INC. BE LIABLE FOR ANY SPECIAL, - * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER - * RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR - * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Author: Soren Sandmann - */ -#ifdef HAVE_CONFIG_H -#include -#endif - -#undef PIXMAN_DISABLE_DEPRECATED - -#include "pixman-private.h" - -#include - -typedef pixman_box16_t box_type_t; -typedef pixman_region16_data_t region_data_type_t; -typedef pixman_region16_t region_type_t; -typedef int32_t overflow_int_t; - -typedef struct { - int x, y; -} point_type_t; - -#define PREFIX(x) pixman_region##x - -#define PIXMAN_REGION_MAX INT16_MAX -#define PIXMAN_REGION_MIN INT16_MIN - -#include "pixman-region.c" - -/* This function exists only to make it possible to preserve the X ABI - - * it should go away at first opportunity. - * - * The problem is that the X ABI exports the three structs and has used - * them through macros. So the X server calls this function with - * the addresses of those structs which makes the existing code continue to - * work. - */ -PIXMAN_EXPORT void -pixman_region_set_static_pointers (pixman_box16_t *empty_box, - pixman_region16_data_t *empty_data, - pixman_region16_data_t *broken_data) -{ - pixman_region_empty_box = empty_box; - pixman_region_empty_data = empty_data; - pixman_broken_data = broken_data; -} diff --git a/vendor/pixman/pixman/pixman-region32.c b/vendor/pixman/pixman/pixman-region32.c deleted file mode 100644 index 68b456bf3..000000000 --- a/vendor/pixman/pixman/pixman-region32.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright © 2008 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software - * and its documentation for any purpose is hereby granted without - * fee, provided that the above copyright notice appear in all copies - * and that both that copyright notice and this permission notice - * appear in supporting documentation, and that the name of - * Red Hat, Inc. not be used in advertising or publicity pertaining to - * distribution of the software without specific, written prior - * permission. Red Hat, Inc. makes no representations about the - * suitability of this software for any purpose. It is provided "as - * is" without express or implied warranty. - * - * RED HAT, INC. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL RED HAT, INC. BE LIABLE FOR ANY SPECIAL, - * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER - * RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR - * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Author: Soren Sandmann - */ -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "pixman-private.h" - -#include - -typedef pixman_box32_t box_type_t; -typedef pixman_region32_data_t region_data_type_t; -typedef pixman_region32_t region_type_t; -typedef int64_t overflow_int_t; - -typedef struct { - int x, y; -} point_type_t; - -#define PREFIX(x) pixman_region32##x - -#define PIXMAN_REGION_MAX INT32_MAX -#define PIXMAN_REGION_MIN INT32_MIN - -#include "pixman-region.c" diff --git a/vendor/pixman/pixman/pixman-solid-fill.c b/vendor/pixman/pixman/pixman-solid-fill.c deleted file mode 100644 index 44f4de07a..000000000 --- a/vendor/pixman/pixman/pixman-solid-fill.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007, 2009 Red Hat, Inc. - * Copyright © 2009 Soren Sandmann - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif -#include "pixman-private.h" - -static uint32_t -color_to_uint32 (const pixman_color_t *color) -{ - return - ((unsigned int) color->alpha >> 8 << 24) | - ((unsigned int) color->red >> 8 << 16) | - ((unsigned int) color->green & 0xff00) | - ((unsigned int) color->blue >> 8); -} - -static argb_t -color_to_float (const pixman_color_t *color) -{ - argb_t result; - - result.a = pixman_unorm_to_float (color->alpha, 16); - result.r = pixman_unorm_to_float (color->red, 16); - result.g = pixman_unorm_to_float (color->green, 16); - result.b = pixman_unorm_to_float (color->blue, 16); - - return result; -} - -PIXMAN_EXPORT pixman_image_t * -pixman_image_create_solid_fill (const pixman_color_t *color) -{ - pixman_image_t *img = _pixman_image_allocate (); - - if (!img) - return NULL; - - img->type = SOLID; - img->solid.color = *color; - img->solid.color_32 = color_to_uint32 (color); - img->solid.color_float = color_to_float (color); - - return img; -} - diff --git a/vendor/pixman/pixman/pixman-sse2.c b/vendor/pixman/pixman/pixman-sse2.c deleted file mode 100644 index 60825375f..000000000 --- a/vendor/pixman/pixman/pixman-sse2.c +++ /dev/null @@ -1,6528 +0,0 @@ -/* - * Copyright © 2008 Rodrigo Kumpera - * Copyright © 2008 André Tupinambá - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Red Hat not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. Red Hat makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Rodrigo Kumpera (kumpera@gmail.com) - * André Tupinambá (andrelrt@gmail.com) - * - * Based on work by Owen Taylor and Søren Sandmann - */ -#ifdef HAVE_CONFIG_H -#include -#endif - -/* PSHUFD is slow on a lot of old processors, and new processors have SSSE3 */ -#define PSHUFD_IS_FAST 0 - -#include /* for _mm_shuffle_pi16 and _MM_SHUFFLE */ -#include /* for SSE2 intrinsics */ -#include "pixman-private.h" -#include "pixman-combine32.h" -#include "pixman-inlines.h" - -static __m128i mask_0080; -static __m128i mask_00ff; -static __m128i mask_0101; -static __m128i mask_ffff; -static __m128i mask_ff000000; -static __m128i mask_alpha; - -static __m128i mask_565_r; -static __m128i mask_565_g1, mask_565_g2; -static __m128i mask_565_b; -static __m128i mask_red; -static __m128i mask_green; -static __m128i mask_blue; - -static __m128i mask_565_fix_rb; -static __m128i mask_565_fix_g; - -static __m128i mask_565_rb; -static __m128i mask_565_pack_multiplier; - -static force_inline __m128i -unpack_32_1x128 (uint32_t data) -{ - return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128 ()); -} - -static force_inline void -unpack_128_2x128 (__m128i data, __m128i* data_lo, __m128i* data_hi) -{ - *data_lo = _mm_unpacklo_epi8 (data, _mm_setzero_si128 ()); - *data_hi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ()); -} - -static force_inline __m128i -unpack_565_to_8888 (__m128i lo) -{ - __m128i r, g, b, rb, t; - - r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), mask_red); - g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), mask_green); - b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), mask_blue); - - rb = _mm_or_si128 (r, b); - t = _mm_and_si128 (rb, mask_565_fix_rb); - t = _mm_srli_epi32 (t, 5); - rb = _mm_or_si128 (rb, t); - - t = _mm_and_si128 (g, mask_565_fix_g); - t = _mm_srli_epi32 (t, 6); - g = _mm_or_si128 (g, t); - - return _mm_or_si128 (rb, g); -} - -static force_inline void -unpack_565_128_4x128 (__m128i data, - __m128i* data0, - __m128i* data1, - __m128i* data2, - __m128i* data3) -{ - __m128i lo, hi; - - lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ()); - hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ()); - - lo = unpack_565_to_8888 (lo); - hi = unpack_565_to_8888 (hi); - - unpack_128_2x128 (lo, data0, data1); - unpack_128_2x128 (hi, data2, data3); -} - -static force_inline uint16_t -pack_565_32_16 (uint32_t pixel) -{ - return (uint16_t) (((pixel >> 8) & 0xf800) | - ((pixel >> 5) & 0x07e0) | - ((pixel >> 3) & 0x001f)); -} - -static force_inline __m128i -pack_2x128_128 (__m128i lo, __m128i hi) -{ - return _mm_packus_epi16 (lo, hi); -} - -static force_inline __m128i -pack_565_2packedx128_128 (__m128i lo, __m128i hi) -{ - __m128i rb0 = _mm_and_si128 (lo, mask_565_rb); - __m128i rb1 = _mm_and_si128 (hi, mask_565_rb); - - __m128i t0 = _mm_madd_epi16 (rb0, mask_565_pack_multiplier); - __m128i t1 = _mm_madd_epi16 (rb1, mask_565_pack_multiplier); - - __m128i g0 = _mm_and_si128 (lo, mask_green); - __m128i g1 = _mm_and_si128 (hi, mask_green); - - t0 = _mm_or_si128 (t0, g0); - t1 = _mm_or_si128 (t1, g1); - - /* Simulates _mm_packus_epi32 */ - t0 = _mm_slli_epi32 (t0, 16 - 5); - t1 = _mm_slli_epi32 (t1, 16 - 5); - t0 = _mm_srai_epi32 (t0, 16); - t1 = _mm_srai_epi32 (t1, 16); - return _mm_packs_epi32 (t0, t1); -} - -static force_inline __m128i -pack_565_2x128_128 (__m128i lo, __m128i hi) -{ - __m128i data; - __m128i r, g1, g2, b; - - data = pack_2x128_128 (lo, hi); - - r = _mm_and_si128 (data, mask_565_r); - g1 = _mm_and_si128 (_mm_slli_epi32 (data, 3), mask_565_g1); - g2 = _mm_and_si128 (_mm_srli_epi32 (data, 5), mask_565_g2); - b = _mm_and_si128 (_mm_srli_epi32 (data, 3), mask_565_b); - - return _mm_or_si128 (_mm_or_si128 (_mm_or_si128 (r, g1), g2), b); -} - -static force_inline __m128i -pack_565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3) -{ - return _mm_packus_epi16 (pack_565_2x128_128 (*xmm0, *xmm1), - pack_565_2x128_128 (*xmm2, *xmm3)); -} - -static force_inline int -is_opaque (__m128i x) -{ - __m128i ffs = _mm_cmpeq_epi8 (x, x); - - return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888; -} - -static force_inline int -is_zero (__m128i x) -{ - return _mm_movemask_epi8 ( - _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) == 0xffff; -} - -static force_inline int -is_transparent (__m128i x) -{ - return (_mm_movemask_epi8 ( - _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) & 0x8888) == 0x8888; -} - -static force_inline __m128i -expand_pixel_32_1x128 (uint32_t data) -{ - return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE (1, 0, 1, 0)); -} - -static force_inline __m128i -expand_alpha_1x128 (__m128i data) -{ - return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data, - _MM_SHUFFLE (3, 3, 3, 3)), - _MM_SHUFFLE (3, 3, 3, 3)); -} - -static force_inline void -expand_alpha_2x128 (__m128i data_lo, - __m128i data_hi, - __m128i* alpha_lo, - __m128i* alpha_hi) -{ - __m128i lo, hi; - - lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 3, 3, 3)); - hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 3, 3, 3)); - - *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 3, 3, 3)); - *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 3, 3, 3)); -} - -static force_inline void -expand_alpha_rev_2x128 (__m128i data_lo, - __m128i data_hi, - __m128i* alpha_lo, - __m128i* alpha_hi) -{ - __m128i lo, hi; - - lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (0, 0, 0, 0)); - hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (0, 0, 0, 0)); - *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (0, 0, 0, 0)); - *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (0, 0, 0, 0)); -} - -static force_inline void -pix_multiply_2x128 (__m128i* data_lo, - __m128i* data_hi, - __m128i* alpha_lo, - __m128i* alpha_hi, - __m128i* ret_lo, - __m128i* ret_hi) -{ - __m128i lo, hi; - - lo = _mm_mullo_epi16 (*data_lo, *alpha_lo); - hi = _mm_mullo_epi16 (*data_hi, *alpha_hi); - lo = _mm_adds_epu16 (lo, mask_0080); - hi = _mm_adds_epu16 (hi, mask_0080); - *ret_lo = _mm_mulhi_epu16 (lo, mask_0101); - *ret_hi = _mm_mulhi_epu16 (hi, mask_0101); -} - -static force_inline void -pix_add_multiply_2x128 (__m128i* src_lo, - __m128i* src_hi, - __m128i* alpha_dst_lo, - __m128i* alpha_dst_hi, - __m128i* dst_lo, - __m128i* dst_hi, - __m128i* alpha_src_lo, - __m128i* alpha_src_hi, - __m128i* ret_lo, - __m128i* ret_hi) -{ - __m128i t1_lo, t1_hi; - __m128i t2_lo, t2_hi; - - pix_multiply_2x128 (src_lo, src_hi, alpha_dst_lo, alpha_dst_hi, &t1_lo, &t1_hi); - pix_multiply_2x128 (dst_lo, dst_hi, alpha_src_lo, alpha_src_hi, &t2_lo, &t2_hi); - - *ret_lo = _mm_adds_epu8 (t1_lo, t2_lo); - *ret_hi = _mm_adds_epu8 (t1_hi, t2_hi); -} - -static force_inline void -negate_2x128 (__m128i data_lo, - __m128i data_hi, - __m128i* neg_lo, - __m128i* neg_hi) -{ - *neg_lo = _mm_xor_si128 (data_lo, mask_00ff); - *neg_hi = _mm_xor_si128 (data_hi, mask_00ff); -} - -static force_inline void -invert_colors_2x128 (__m128i data_lo, - __m128i data_hi, - __m128i* inv_lo, - __m128i* inv_hi) -{ - __m128i lo, hi; - - lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 0, 1, 2)); - hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 0, 1, 2)); - *inv_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 0, 1, 2)); - *inv_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 0, 1, 2)); -} - -static force_inline void -over_2x128 (__m128i* src_lo, - __m128i* src_hi, - __m128i* alpha_lo, - __m128i* alpha_hi, - __m128i* dst_lo, - __m128i* dst_hi) -{ - __m128i t1, t2; - - negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2); - - pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi); - - *dst_lo = _mm_adds_epu8 (*src_lo, *dst_lo); - *dst_hi = _mm_adds_epu8 (*src_hi, *dst_hi); -} - -static force_inline void -over_rev_non_pre_2x128 (__m128i src_lo, - __m128i src_hi, - __m128i* dst_lo, - __m128i* dst_hi) -{ - __m128i lo, hi; - __m128i alpha_lo, alpha_hi; - - expand_alpha_2x128 (src_lo, src_hi, &alpha_lo, &alpha_hi); - - lo = _mm_or_si128 (alpha_lo, mask_alpha); - hi = _mm_or_si128 (alpha_hi, mask_alpha); - - invert_colors_2x128 (src_lo, src_hi, &src_lo, &src_hi); - - pix_multiply_2x128 (&src_lo, &src_hi, &lo, &hi, &lo, &hi); - - over_2x128 (&lo, &hi, &alpha_lo, &alpha_hi, dst_lo, dst_hi); -} - -static force_inline void -in_over_2x128 (__m128i* src_lo, - __m128i* src_hi, - __m128i* alpha_lo, - __m128i* alpha_hi, - __m128i* mask_lo, - __m128i* mask_hi, - __m128i* dst_lo, - __m128i* dst_hi) -{ - __m128i s_lo, s_hi; - __m128i a_lo, a_hi; - - pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi); - pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi); - - over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi); -} - -/* load 4 pixels from a 16-byte boundary aligned address */ -static force_inline __m128i -load_128_aligned (__m128i* src) -{ - return _mm_load_si128 (src); -} - -/* load 4 pixels from a unaligned address */ -static force_inline __m128i -load_128_unaligned (const __m128i* src) -{ - return _mm_loadu_si128 (src); -} - -/* save 4 pixels using Write Combining memory on a 16-byte - * boundary aligned address - */ -static force_inline void -save_128_write_combining (__m128i* dst, - __m128i data) -{ - _mm_stream_si128 (dst, data); -} - -/* save 4 pixels on a 16-byte boundary aligned address */ -static force_inline void -save_128_aligned (__m128i* dst, - __m128i data) -{ - _mm_store_si128 (dst, data); -} - -/* save 4 pixels on a unaligned address */ -static force_inline void -save_128_unaligned (__m128i* dst, - __m128i data) -{ - _mm_storeu_si128 (dst, data); -} - -static force_inline __m128i -load_32_1x128 (uint32_t data) -{ - return _mm_cvtsi32_si128 (data); -} - -static force_inline __m128i -expand_alpha_rev_1x128 (__m128i data) -{ - return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (0, 0, 0, 0)); -} - -static force_inline __m128i -expand_pixel_8_1x128 (uint8_t data) -{ - return _mm_shufflelo_epi16 ( - unpack_32_1x128 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0)); -} - -static force_inline __m128i -pix_multiply_1x128 (__m128i data, - __m128i alpha) -{ - return _mm_mulhi_epu16 (_mm_adds_epu16 (_mm_mullo_epi16 (data, alpha), - mask_0080), - mask_0101); -} - -static force_inline __m128i -pix_add_multiply_1x128 (__m128i* src, - __m128i* alpha_dst, - __m128i* dst, - __m128i* alpha_src) -{ - __m128i t1 = pix_multiply_1x128 (*src, *alpha_dst); - __m128i t2 = pix_multiply_1x128 (*dst, *alpha_src); - - return _mm_adds_epu8 (t1, t2); -} - -static force_inline __m128i -negate_1x128 (__m128i data) -{ - return _mm_xor_si128 (data, mask_00ff); -} - -static force_inline __m128i -invert_colors_1x128 (__m128i data) -{ - return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (3, 0, 1, 2)); -} - -static force_inline __m128i -over_1x128 (__m128i src, __m128i alpha, __m128i dst) -{ - return _mm_adds_epu8 (src, pix_multiply_1x128 (dst, negate_1x128 (alpha))); -} - -static force_inline __m128i -in_over_1x128 (__m128i* src, __m128i* alpha, __m128i* mask, __m128i* dst) -{ - return over_1x128 (pix_multiply_1x128 (*src, *mask), - pix_multiply_1x128 (*alpha, *mask), - *dst); -} - -static force_inline __m128i -over_rev_non_pre_1x128 (__m128i src, __m128i dst) -{ - __m128i alpha = expand_alpha_1x128 (src); - - return over_1x128 (pix_multiply_1x128 (invert_colors_1x128 (src), - _mm_or_si128 (alpha, mask_alpha)), - alpha, - dst); -} - -static force_inline uint32_t -pack_1x128_32 (__m128i data) -{ - return _mm_cvtsi128_si32 (_mm_packus_epi16 (data, _mm_setzero_si128 ())); -} - -static force_inline __m128i -expand565_16_1x128 (uint16_t pixel) -{ - __m128i m = _mm_cvtsi32_si128 (pixel); - - m = unpack_565_to_8888 (m); - - return _mm_unpacklo_epi8 (m, _mm_setzero_si128 ()); -} - -static force_inline uint32_t -core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst) -{ - uint8_t a; - __m128i xmms; - - a = src >> 24; - - if (a == 0xff) - { - return src; - } - else if (src) - { - xmms = unpack_32_1x128 (src); - return pack_1x128_32 ( - over_1x128 (xmms, expand_alpha_1x128 (xmms), - unpack_32_1x128 (dst))); - } - - return dst; -} - -static force_inline uint32_t -combine1 (const uint32_t *ps, const uint32_t *pm) -{ - uint32_t s; - memcpy(&s, ps, sizeof(uint32_t)); - - if (pm) - { - __m128i ms, mm; - - mm = unpack_32_1x128 (*pm); - mm = expand_alpha_1x128 (mm); - - ms = unpack_32_1x128 (s); - ms = pix_multiply_1x128 (ms, mm); - - s = pack_1x128_32 (ms); - } - - return s; -} - -static force_inline __m128i -combine4 (const __m128i *ps, const __m128i *pm) -{ - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_msk_lo, xmm_msk_hi; - __m128i s; - - if (pm) - { - xmm_msk_lo = load_128_unaligned (pm); - - if (is_transparent (xmm_msk_lo)) - return _mm_setzero_si128 (); - } - - s = load_128_unaligned (ps); - - if (pm) - { - unpack_128_2x128 (s, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_msk_lo, &xmm_msk_lo, &xmm_msk_hi); - - expand_alpha_2x128 (xmm_msk_lo, xmm_msk_hi, &xmm_msk_lo, &xmm_msk_hi); - - pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_msk_lo, &xmm_msk_hi, - &xmm_src_lo, &xmm_src_hi); - - s = pack_2x128_128 (xmm_src_lo, xmm_src_hi); - } - - return s; -} - -static force_inline void -core_combine_over_u_sse2_mask (uint32_t * pd, - const uint32_t* ps, - const uint32_t* pm, - int w) -{ - uint32_t s, d; - - /* Align dst on a 16-byte boundary */ - while (w && ((uintptr_t)pd & 15)) - { - d = *pd; - s = combine1 (ps, pm); - - if (s) - *pd = core_combine_over_u_pixel_sse2 (s, d); - pd++; - ps++; - pm++; - w--; - } - - while (w >= 4) - { - __m128i mask = load_128_unaligned ((__m128i *)pm); - - if (!is_zero (mask)) - { - __m128i src; - __m128i src_hi, src_lo; - __m128i mask_hi, mask_lo; - __m128i alpha_hi, alpha_lo; - - src = load_128_unaligned ((__m128i *)ps); - - if (is_opaque (_mm_and_si128 (src, mask))) - { - save_128_aligned ((__m128i *)pd, src); - } - else - { - __m128i dst = load_128_aligned ((__m128i *)pd); - __m128i dst_hi, dst_lo; - - unpack_128_2x128 (mask, &mask_lo, &mask_hi); - unpack_128_2x128 (src, &src_lo, &src_hi); - - expand_alpha_2x128 (mask_lo, mask_hi, &mask_lo, &mask_hi); - pix_multiply_2x128 (&src_lo, &src_hi, - &mask_lo, &mask_hi, - &src_lo, &src_hi); - - unpack_128_2x128 (dst, &dst_lo, &dst_hi); - - expand_alpha_2x128 (src_lo, src_hi, - &alpha_lo, &alpha_hi); - - over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi, - &dst_lo, &dst_hi); - - save_128_aligned ( - (__m128i *)pd, - pack_2x128_128 (dst_lo, dst_hi)); - } - } - - pm += 4; - ps += 4; - pd += 4; - w -= 4; - } - while (w) - { - d = *pd; - s = combine1 (ps, pm); - - if (s) - *pd = core_combine_over_u_pixel_sse2 (s, d); - pd++; - ps++; - pm++; - - w--; - } -} - -static force_inline void -core_combine_over_u_sse2_no_mask (uint32_t * pd, - const uint32_t* ps, - int w) -{ - uint32_t s, d; - - /* Align dst on a 16-byte boundary */ - while (w && ((uintptr_t)pd & 15)) - { - d = *pd; - s = *ps; - - if (s) - *pd = core_combine_over_u_pixel_sse2 (s, d); - pd++; - ps++; - w--; - } - - while (w >= 4) - { - __m128i src; - __m128i src_hi, src_lo, dst_hi, dst_lo; - __m128i alpha_hi, alpha_lo; - - src = load_128_unaligned ((__m128i *)ps); - - if (!is_zero (src)) - { - if (is_opaque (src)) - { - save_128_aligned ((__m128i *)pd, src); - } - else - { - __m128i dst = load_128_aligned ((__m128i *)pd); - - unpack_128_2x128 (src, &src_lo, &src_hi); - unpack_128_2x128 (dst, &dst_lo, &dst_hi); - - expand_alpha_2x128 (src_lo, src_hi, - &alpha_lo, &alpha_hi); - over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi, - &dst_lo, &dst_hi); - - save_128_aligned ( - (__m128i *)pd, - pack_2x128_128 (dst_lo, dst_hi)); - } - } - - ps += 4; - pd += 4; - w -= 4; - } - while (w) - { - d = *pd; - s = *ps; - - if (s) - *pd = core_combine_over_u_pixel_sse2 (s, d); - pd++; - ps++; - - w--; - } -} - -static force_inline void -sse2_combine_over_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - if (pm) - core_combine_over_u_sse2_mask (pd, ps, pm, w); - else - core_combine_over_u_sse2_no_mask (pd, ps, w); -} - -static void -sse2_combine_over_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - uint32_t s, d; - - __m128i xmm_dst_lo, xmm_dst_hi; - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_alpha_lo, xmm_alpha_hi; - - /* Align dst on a 16-byte boundary */ - while (w && - ((uintptr_t)pd & 15)) - { - d = *pd; - s = combine1 (ps, pm); - - *pd++ = core_combine_over_u_pixel_sse2 (d, s); - w--; - ps++; - if (pm) - pm++; - } - - while (w >= 4) - { - /* I'm loading unaligned because I'm not sure - * about the address alignment. - */ - xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); - xmm_dst_hi = load_128_aligned ((__m128i*) pd); - - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - - expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, - &xmm_alpha_lo, &xmm_alpha_hi); - - over_2x128 (&xmm_dst_lo, &xmm_dst_hi, - &xmm_alpha_lo, &xmm_alpha_hi, - &xmm_src_lo, &xmm_src_hi); - - /* rebuid the 4 pixel data and save*/ - save_128_aligned ((__m128i*)pd, - pack_2x128_128 (xmm_src_lo, xmm_src_hi)); - - w -= 4; - ps += 4; - pd += 4; - - if (pm) - pm += 4; - } - - while (w) - { - d = *pd; - s = combine1 (ps, pm); - - *pd++ = core_combine_over_u_pixel_sse2 (d, s); - ps++; - w--; - if (pm) - pm++; - } -} - -static force_inline uint32_t -core_combine_in_u_pixel_sse2 (uint32_t src, uint32_t dst) -{ - uint32_t maska = src >> 24; - - if (maska == 0) - { - return 0; - } - else if (maska != 0xff) - { - return pack_1x128_32 ( - pix_multiply_1x128 (unpack_32_1x128 (dst), - expand_alpha_1x128 (unpack_32_1x128 (src)))); - } - - return dst; -} - -static void -sse2_combine_in_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - uint32_t s, d; - - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - - while (w && ((uintptr_t)pd & 15)) - { - s = combine1 (ps, pm); - d = *pd; - - *pd++ = core_combine_in_u_pixel_sse2 (d, s); - w--; - ps++; - if (pm) - pm++; - } - - while (w >= 4) - { - xmm_dst_hi = load_128_aligned ((__m128i*) pd); - xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm); - - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_dst_lo, &xmm_dst_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ((__m128i*)pd, - pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - ps += 4; - pd += 4; - w -= 4; - if (pm) - pm += 4; - } - - while (w) - { - s = combine1 (ps, pm); - d = *pd; - - *pd++ = core_combine_in_u_pixel_sse2 (d, s); - w--; - ps++; - if (pm) - pm++; - } -} - -static void -sse2_combine_in_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - uint32_t s, d; - - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - - while (w && ((uintptr_t)pd & 15)) - { - s = combine1 (ps, pm); - d = *pd; - - *pd++ = core_combine_in_u_pixel_sse2 (s, d); - ps++; - w--; - if (pm) - pm++; - } - - while (w >= 4) - { - xmm_dst_hi = load_128_aligned ((__m128i*) pd); - xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm); - - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, - &xmm_src_lo, &xmm_src_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - ps += 4; - pd += 4; - w -= 4; - if (pm) - pm += 4; - } - - while (w) - { - s = combine1 (ps, pm); - d = *pd; - - *pd++ = core_combine_in_u_pixel_sse2 (s, d); - w--; - ps++; - if (pm) - pm++; - } -} - -static void -sse2_combine_out_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - while (w && ((uintptr_t)pd & 15)) - { - uint32_t s = combine1 (ps, pm); - uint32_t d = *pd; - - *pd++ = pack_1x128_32 ( - pix_multiply_1x128 ( - unpack_32_1x128 (d), negate_1x128 ( - expand_alpha_1x128 (unpack_32_1x128 (s))))); - - if (pm) - pm++; - ps++; - w--; - } - - while (w >= 4) - { - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - - xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); - xmm_dst_hi = load_128_aligned ((__m128i*) pd); - - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - negate_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - - pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, - &xmm_src_lo, &xmm_src_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - ps += 4; - pd += 4; - if (pm) - pm += 4; - - w -= 4; - } - - while (w) - { - uint32_t s = combine1 (ps, pm); - uint32_t d = *pd; - - *pd++ = pack_1x128_32 ( - pix_multiply_1x128 ( - unpack_32_1x128 (d), negate_1x128 ( - expand_alpha_1x128 (unpack_32_1x128 (s))))); - ps++; - if (pm) - pm++; - w--; - } -} - -static void -sse2_combine_out_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - while (w && ((uintptr_t)pd & 15)) - { - uint32_t s = combine1 (ps, pm); - uint32_t d = *pd; - - *pd++ = pack_1x128_32 ( - pix_multiply_1x128 ( - unpack_32_1x128 (s), negate_1x128 ( - expand_alpha_1x128 (unpack_32_1x128 (d))))); - w--; - ps++; - if (pm) - pm++; - } - - while (w >= 4) - { - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - - xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm); - xmm_dst_hi = load_128_aligned ((__m128i*) pd); - - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - - expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - negate_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - - pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_dst_lo, &xmm_dst_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - ps += 4; - pd += 4; - w -= 4; - if (pm) - pm += 4; - } - - while (w) - { - uint32_t s = combine1 (ps, pm); - uint32_t d = *pd; - - *pd++ = pack_1x128_32 ( - pix_multiply_1x128 ( - unpack_32_1x128 (s), negate_1x128 ( - expand_alpha_1x128 (unpack_32_1x128 (d))))); - w--; - ps++; - if (pm) - pm++; - } -} - -static force_inline uint32_t -core_combine_atop_u_pixel_sse2 (uint32_t src, - uint32_t dst) -{ - __m128i s = unpack_32_1x128 (src); - __m128i d = unpack_32_1x128 (dst); - - __m128i sa = negate_1x128 (expand_alpha_1x128 (s)); - __m128i da = expand_alpha_1x128 (d); - - return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa)); -} - -static void -sse2_combine_atop_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - uint32_t s, d; - - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; - __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; - - while (w && ((uintptr_t)pd & 15)) - { - s = combine1 (ps, pm); - d = *pd; - - *pd++ = core_combine_atop_u_pixel_sse2 (s, d); - w--; - ps++; - if (pm) - pm++; - } - - while (w >= 4) - { - xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); - xmm_dst_hi = load_128_aligned ((__m128i*) pd); - - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_alpha_src_lo, &xmm_alpha_src_hi); - expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, - &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); - - negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi, - &xmm_alpha_src_lo, &xmm_alpha_src_hi); - - pix_add_multiply_2x128 ( - &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, - &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - ps += 4; - pd += 4; - w -= 4; - if (pm) - pm += 4; - } - - while (w) - { - s = combine1 (ps, pm); - d = *pd; - - *pd++ = core_combine_atop_u_pixel_sse2 (s, d); - w--; - ps++; - if (pm) - pm++; - } -} - -static force_inline uint32_t -core_combine_reverse_atop_u_pixel_sse2 (uint32_t src, - uint32_t dst) -{ - __m128i s = unpack_32_1x128 (src); - __m128i d = unpack_32_1x128 (dst); - - __m128i sa = expand_alpha_1x128 (s); - __m128i da = negate_1x128 (expand_alpha_1x128 (d)); - - return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa)); -} - -static void -sse2_combine_atop_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - uint32_t s, d; - - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; - __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; - - while (w && ((uintptr_t)pd & 15)) - { - s = combine1 (ps, pm); - d = *pd; - - *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d); - ps++; - w--; - if (pm) - pm++; - } - - while (w >= 4) - { - xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); - xmm_dst_hi = load_128_aligned ((__m128i*) pd); - - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_alpha_src_lo, &xmm_alpha_src_hi); - expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, - &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); - - negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, - &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); - - pix_add_multiply_2x128 ( - &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, - &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - ps += 4; - pd += 4; - w -= 4; - if (pm) - pm += 4; - } - - while (w) - { - s = combine1 (ps, pm); - d = *pd; - - *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d); - ps++; - w--; - if (pm) - pm++; - } -} - -static force_inline uint32_t -core_combine_xor_u_pixel_sse2 (uint32_t src, - uint32_t dst) -{ - __m128i s = unpack_32_1x128 (src); - __m128i d = unpack_32_1x128 (dst); - - __m128i neg_d = negate_1x128 (expand_alpha_1x128 (d)); - __m128i neg_s = negate_1x128 (expand_alpha_1x128 (s)); - - return pack_1x128_32 (pix_add_multiply_1x128 (&s, &neg_d, &d, &neg_s)); -} - -static void -sse2_combine_xor_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dst, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int w = width; - uint32_t s, d; - uint32_t* pd = dst; - const uint32_t* ps = src; - const uint32_t* pm = mask; - - __m128i xmm_src, xmm_src_lo, xmm_src_hi; - __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; - __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; - - while (w && ((uintptr_t)pd & 15)) - { - s = combine1 (ps, pm); - d = *pd; - - *pd++ = core_combine_xor_u_pixel_sse2 (s, d); - w--; - ps++; - if (pm) - pm++; - } - - while (w >= 4) - { - xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm); - xmm_dst = load_128_aligned ((__m128i*) pd); - - unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_alpha_src_lo, &xmm_alpha_src_hi); - expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, - &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); - - negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi, - &xmm_alpha_src_lo, &xmm_alpha_src_hi); - negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, - &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); - - pix_add_multiply_2x128 ( - &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, - &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - ps += 4; - pd += 4; - w -= 4; - if (pm) - pm += 4; - } - - while (w) - { - s = combine1 (ps, pm); - d = *pd; - - *pd++ = core_combine_xor_u_pixel_sse2 (s, d); - w--; - ps++; - if (pm) - pm++; - } -} - -static force_inline void -sse2_combine_add_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dst, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int w = width; - uint32_t s, d; - uint32_t* pd = dst; - const uint32_t* ps = src; - const uint32_t* pm = mask; - - while (w && (uintptr_t)pd & 15) - { - s = combine1 (ps, pm); - d = *pd; - - ps++; - if (pm) - pm++; - *pd++ = _mm_cvtsi128_si32 ( - _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d))); - w--; - } - - while (w >= 4) - { - __m128i s; - - s = combine4 ((__m128i*)ps, (__m128i*)pm); - - save_128_aligned ( - (__m128i*)pd, _mm_adds_epu8 (s, load_128_aligned ((__m128i*)pd))); - - pd += 4; - ps += 4; - if (pm) - pm += 4; - w -= 4; - } - - while (w--) - { - s = combine1 (ps, pm); - d = *pd; - - ps++; - *pd++ = _mm_cvtsi128_si32 ( - _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d))); - if (pm) - pm++; - } -} - -static force_inline uint32_t -core_combine_saturate_u_pixel_sse2 (uint32_t src, - uint32_t dst) -{ - __m128i ms = unpack_32_1x128 (src); - __m128i md = unpack_32_1x128 (dst); - uint32_t sa = src >> 24; - uint32_t da = ~dst >> 24; - - if (sa > da) - { - ms = pix_multiply_1x128 ( - ms, expand_alpha_1x128 (unpack_32_1x128 (DIV_UN8 (da, sa) << 24))); - } - - return pack_1x128_32 (_mm_adds_epu16 (md, ms)); -} - -static void -sse2_combine_saturate_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - uint32_t s, d; - - uint32_t pack_cmp; - __m128i xmm_src, xmm_dst; - - while (w && (uintptr_t)pd & 15) - { - s = combine1 (ps, pm); - d = *pd; - - *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); - w--; - ps++; - if (pm) - pm++; - } - - while (w >= 4) - { - xmm_dst = load_128_aligned ((__m128i*)pd); - xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm); - - pack_cmp = _mm_movemask_epi8 ( - _mm_cmpgt_epi32 ( - _mm_srli_epi32 (xmm_src, 24), - _mm_srli_epi32 (_mm_xor_si128 (xmm_dst, mask_ff000000), 24))); - - /* if some alpha src is grater than respective ~alpha dst */ - if (pack_cmp) - { - s = combine1 (ps++, pm); - d = *pd; - *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); - if (pm) - pm++; - - s = combine1 (ps++, pm); - d = *pd; - *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); - if (pm) - pm++; - - s = combine1 (ps++, pm); - d = *pd; - *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); - if (pm) - pm++; - - s = combine1 (ps++, pm); - d = *pd; - *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); - if (pm) - pm++; - } - else - { - save_128_aligned ((__m128i*)pd, _mm_adds_epu8 (xmm_dst, xmm_src)); - - pd += 4; - ps += 4; - if (pm) - pm += 4; - } - - w -= 4; - } - - while (w--) - { - s = combine1 (ps, pm); - d = *pd; - - *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); - ps++; - if (pm) - pm++; - } -} - -static void -sse2_combine_src_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - uint32_t s, m; - - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_mask_lo, xmm_mask_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - - while (w && (uintptr_t)pd & 15) - { - s = *ps++; - m = *pm++; - *pd++ = pack_1x128_32 ( - pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m))); - w--; - } - - while (w >= 4) - { - xmm_src_hi = load_128_unaligned ((__m128i*)ps); - xmm_mask_hi = load_128_unaligned ((__m128i*)pm); - - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - - pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; - } - - while (w) - { - s = *ps++; - m = *pm++; - *pd++ = pack_1x128_32 ( - pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m))); - w--; - } -} - -static force_inline uint32_t -core_combine_over_ca_pixel_sse2 (uint32_t src, - uint32_t mask, - uint32_t dst) -{ - __m128i s = unpack_32_1x128 (src); - __m128i expAlpha = expand_alpha_1x128 (s); - __m128i unpk_mask = unpack_32_1x128 (mask); - __m128i unpk_dst = unpack_32_1x128 (dst); - - return pack_1x128_32 (in_over_1x128 (&s, &expAlpha, &unpk_mask, &unpk_dst)); -} - -static void -sse2_combine_over_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - uint32_t s, m, d; - - __m128i xmm_alpha_lo, xmm_alpha_hi; - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - __m128i xmm_mask_lo, xmm_mask_hi; - - while (w && (uintptr_t)pd & 15) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d); - w--; - } - - while (w >= 4) - { - xmm_dst_hi = load_128_aligned ((__m128i*)pd); - xmm_src_hi = load_128_unaligned ((__m128i*)ps); - xmm_mask_hi = load_128_unaligned ((__m128i*)pm); - - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_alpha_lo, &xmm_alpha_hi); - - in_over_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_alpha_lo, &xmm_alpha_hi, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; - } - - while (w) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d); - w--; - } -} - -static force_inline uint32_t -core_combine_over_reverse_ca_pixel_sse2 (uint32_t src, - uint32_t mask, - uint32_t dst) -{ - __m128i d = unpack_32_1x128 (dst); - - return pack_1x128_32 ( - over_1x128 (d, expand_alpha_1x128 (d), - pix_multiply_1x128 (unpack_32_1x128 (src), - unpack_32_1x128 (mask)))); -} - -static void -sse2_combine_over_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - uint32_t s, m, d; - - __m128i xmm_alpha_lo, xmm_alpha_hi; - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - __m128i xmm_mask_lo, xmm_mask_hi; - - while (w && (uintptr_t)pd & 15) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d); - w--; - } - - while (w >= 4) - { - xmm_dst_hi = load_128_aligned ((__m128i*)pd); - xmm_src_hi = load_128_unaligned ((__m128i*)ps); - xmm_mask_hi = load_128_unaligned ((__m128i*)pm); - - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - - expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, - &xmm_alpha_lo, &xmm_alpha_hi); - pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_mask_lo, &xmm_mask_hi); - - over_2x128 (&xmm_dst_lo, &xmm_dst_hi, - &xmm_alpha_lo, &xmm_alpha_hi, - &xmm_mask_lo, &xmm_mask_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi)); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; - } - - while (w) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d); - w--; - } -} - -static void -sse2_combine_in_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - uint32_t s, m, d; - - __m128i xmm_alpha_lo, xmm_alpha_hi; - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - __m128i xmm_mask_lo, xmm_mask_hi; - - while (w && (uintptr_t)pd & 15) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = pack_1x128_32 ( - pix_multiply_1x128 ( - pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)), - expand_alpha_1x128 (unpack_32_1x128 (d)))); - - w--; - } - - while (w >= 4) - { - xmm_dst_hi = load_128_aligned ((__m128i*)pd); - xmm_src_hi = load_128_unaligned ((__m128i*)ps); - xmm_mask_hi = load_128_unaligned ((__m128i*)pm); - - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - - expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, - &xmm_alpha_lo, &xmm_alpha_hi); - - pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_dst_lo, &xmm_dst_hi); - - pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, - &xmm_alpha_lo, &xmm_alpha_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; - } - - while (w) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = pack_1x128_32 ( - pix_multiply_1x128 ( - pix_multiply_1x128 ( - unpack_32_1x128 (s), unpack_32_1x128 (m)), - expand_alpha_1x128 (unpack_32_1x128 (d)))); - - w--; - } -} - -static void -sse2_combine_in_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - uint32_t s, m, d; - - __m128i xmm_alpha_lo, xmm_alpha_hi; - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - __m128i xmm_mask_lo, xmm_mask_hi; - - while (w && (uintptr_t)pd & 15) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = pack_1x128_32 ( - pix_multiply_1x128 ( - unpack_32_1x128 (d), - pix_multiply_1x128 (unpack_32_1x128 (m), - expand_alpha_1x128 (unpack_32_1x128 (s))))); - w--; - } - - while (w >= 4) - { - xmm_dst_hi = load_128_aligned ((__m128i*)pd); - xmm_src_hi = load_128_unaligned ((__m128i*)ps); - xmm_mask_hi = load_128_unaligned ((__m128i*)pm); - - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_alpha_lo, &xmm_alpha_hi); - pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, - &xmm_alpha_lo, &xmm_alpha_hi, - &xmm_alpha_lo, &xmm_alpha_hi); - - pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, - &xmm_alpha_lo, &xmm_alpha_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; - } - - while (w) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = pack_1x128_32 ( - pix_multiply_1x128 ( - unpack_32_1x128 (d), - pix_multiply_1x128 (unpack_32_1x128 (m), - expand_alpha_1x128 (unpack_32_1x128 (s))))); - w--; - } -} - -static void -sse2_combine_out_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - uint32_t s, m, d; - - __m128i xmm_alpha_lo, xmm_alpha_hi; - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - __m128i xmm_mask_lo, xmm_mask_hi; - - while (w && (uintptr_t)pd & 15) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = pack_1x128_32 ( - pix_multiply_1x128 ( - pix_multiply_1x128 ( - unpack_32_1x128 (s), unpack_32_1x128 (m)), - negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d))))); - w--; - } - - while (w >= 4) - { - xmm_dst_hi = load_128_aligned ((__m128i*)pd); - xmm_src_hi = load_128_unaligned ((__m128i*)ps); - xmm_mask_hi = load_128_unaligned ((__m128i*)pm); - - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - - expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, - &xmm_alpha_lo, &xmm_alpha_hi); - negate_2x128 (xmm_alpha_lo, xmm_alpha_hi, - &xmm_alpha_lo, &xmm_alpha_hi); - - pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_dst_lo, &xmm_dst_hi); - pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, - &xmm_alpha_lo, &xmm_alpha_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; - } - - while (w) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = pack_1x128_32 ( - pix_multiply_1x128 ( - pix_multiply_1x128 ( - unpack_32_1x128 (s), unpack_32_1x128 (m)), - negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d))))); - - w--; - } -} - -static void -sse2_combine_out_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - uint32_t s, m, d; - - __m128i xmm_alpha_lo, xmm_alpha_hi; - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - __m128i xmm_mask_lo, xmm_mask_hi; - - while (w && (uintptr_t)pd & 15) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = pack_1x128_32 ( - pix_multiply_1x128 ( - unpack_32_1x128 (d), - negate_1x128 (pix_multiply_1x128 ( - unpack_32_1x128 (m), - expand_alpha_1x128 (unpack_32_1x128 (s)))))); - w--; - } - - while (w >= 4) - { - xmm_dst_hi = load_128_aligned ((__m128i*)pd); - xmm_src_hi = load_128_unaligned ((__m128i*)ps); - xmm_mask_hi = load_128_unaligned ((__m128i*)pm); - - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_alpha_lo, &xmm_alpha_hi); - - pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, - &xmm_alpha_lo, &xmm_alpha_hi, - &xmm_mask_lo, &xmm_mask_hi); - - negate_2x128 (xmm_mask_lo, xmm_mask_hi, - &xmm_mask_lo, &xmm_mask_hi); - - pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; - } - - while (w) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = pack_1x128_32 ( - pix_multiply_1x128 ( - unpack_32_1x128 (d), - negate_1x128 (pix_multiply_1x128 ( - unpack_32_1x128 (m), - expand_alpha_1x128 (unpack_32_1x128 (s)))))); - w--; - } -} - -static force_inline uint32_t -core_combine_atop_ca_pixel_sse2 (uint32_t src, - uint32_t mask, - uint32_t dst) -{ - __m128i m = unpack_32_1x128 (mask); - __m128i s = unpack_32_1x128 (src); - __m128i d = unpack_32_1x128 (dst); - __m128i sa = expand_alpha_1x128 (s); - __m128i da = expand_alpha_1x128 (d); - - s = pix_multiply_1x128 (s, m); - m = negate_1x128 (pix_multiply_1x128 (m, sa)); - - return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da)); -} - -static void -sse2_combine_atop_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - uint32_t s, m, d; - - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; - __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; - __m128i xmm_mask_lo, xmm_mask_hi; - - while (w && (uintptr_t)pd & 15) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d); - w--; - } - - while (w >= 4) - { - xmm_dst_hi = load_128_aligned ((__m128i*)pd); - xmm_src_hi = load_128_unaligned ((__m128i*)ps); - xmm_mask_hi = load_128_unaligned ((__m128i*)pm); - - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_alpha_src_lo, &xmm_alpha_src_hi); - expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, - &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); - - pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_src_lo, &xmm_src_hi); - pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, - &xmm_alpha_src_lo, &xmm_alpha_src_hi, - &xmm_mask_lo, &xmm_mask_hi); - - negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - - pix_add_multiply_2x128 ( - &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, - &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; - } - - while (w) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d); - w--; - } -} - -static force_inline uint32_t -core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src, - uint32_t mask, - uint32_t dst) -{ - __m128i m = unpack_32_1x128 (mask); - __m128i s = unpack_32_1x128 (src); - __m128i d = unpack_32_1x128 (dst); - - __m128i da = negate_1x128 (expand_alpha_1x128 (d)); - __m128i sa = expand_alpha_1x128 (s); - - s = pix_multiply_1x128 (s, m); - m = pix_multiply_1x128 (m, sa); - - return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da)); -} - -static void -sse2_combine_atop_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - uint32_t s, m, d; - - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; - __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; - __m128i xmm_mask_lo, xmm_mask_hi; - - while (w && (uintptr_t)pd & 15) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d); - w--; - } - - while (w >= 4) - { - xmm_dst_hi = load_128_aligned ((__m128i*)pd); - xmm_src_hi = load_128_unaligned ((__m128i*)ps); - xmm_mask_hi = load_128_unaligned ((__m128i*)pm); - - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_alpha_src_lo, &xmm_alpha_src_hi); - expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, - &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); - - pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_src_lo, &xmm_src_hi); - pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, - &xmm_alpha_src_lo, &xmm_alpha_src_hi, - &xmm_mask_lo, &xmm_mask_hi); - - negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, - &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); - - pix_add_multiply_2x128 ( - &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, - &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; - } - - while (w) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d); - w--; - } -} - -static force_inline uint32_t -core_combine_xor_ca_pixel_sse2 (uint32_t src, - uint32_t mask, - uint32_t dst) -{ - __m128i a = unpack_32_1x128 (mask); - __m128i s = unpack_32_1x128 (src); - __m128i d = unpack_32_1x128 (dst); - - __m128i alpha_dst = negate_1x128 (pix_multiply_1x128 ( - a, expand_alpha_1x128 (s))); - __m128i dest = pix_multiply_1x128 (s, a); - __m128i alpha_src = negate_1x128 (expand_alpha_1x128 (d)); - - return pack_1x128_32 (pix_add_multiply_1x128 (&d, - &alpha_dst, - &dest, - &alpha_src)); -} - -static void -sse2_combine_xor_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - uint32_t s, m, d; - - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; - __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; - __m128i xmm_mask_lo, xmm_mask_hi; - - while (w && (uintptr_t)pd & 15) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d); - w--; - } - - while (w >= 4) - { - xmm_dst_hi = load_128_aligned ((__m128i*)pd); - xmm_src_hi = load_128_unaligned ((__m128i*)ps); - xmm_mask_hi = load_128_unaligned ((__m128i*)pm); - - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_alpha_src_lo, &xmm_alpha_src_hi); - expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, - &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); - - pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_src_lo, &xmm_src_hi); - pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, - &xmm_alpha_src_lo, &xmm_alpha_src_hi, - &xmm_mask_lo, &xmm_mask_hi); - - negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, - &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); - negate_2x128 (xmm_mask_lo, xmm_mask_hi, - &xmm_mask_lo, &xmm_mask_hi); - - pix_add_multiply_2x128 ( - &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, - &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; - } - - while (w) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d); - w--; - } -} - -static void -sse2_combine_add_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * pd, - const uint32_t * ps, - const uint32_t * pm, - int w) -{ - uint32_t s, m, d; - - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - __m128i xmm_mask_lo, xmm_mask_hi; - - while (w && (uintptr_t)pd & 15) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = pack_1x128_32 ( - _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s), - unpack_32_1x128 (m)), - unpack_32_1x128 (d))); - w--; - } - - while (w >= 4) - { - xmm_src_hi = load_128_unaligned ((__m128i*)ps); - xmm_mask_hi = load_128_unaligned ((__m128i*)pm); - xmm_dst_hi = load_128_aligned ((__m128i*)pd); - - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - - pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_src_lo, &xmm_src_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 ( - _mm_adds_epu8 (xmm_src_lo, xmm_dst_lo), - _mm_adds_epu8 (xmm_src_hi, xmm_dst_hi))); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; - } - - while (w) - { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = pack_1x128_32 ( - _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s), - unpack_32_1x128 (m)), - unpack_32_1x128 (d))); - w--; - } -} - -static force_inline __m128i -create_mask_16_128 (uint16_t mask) -{ - return _mm_set1_epi16 (mask); -} - -/* Work around a code generation bug in Sun Studio 12. */ -#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590) -# define create_mask_2x32_128(mask0, mask1) \ - (_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1))) -#else -static force_inline __m128i -create_mask_2x32_128 (uint32_t mask0, - uint32_t mask1) -{ - return _mm_set_epi32 (mask0, mask1, mask0, mask1); -} -#endif - -static void -sse2_composite_over_n_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src; - uint32_t *dst_line, *dst, d; - int32_t w; - int dst_stride; - __m128i xmm_src, xmm_alpha; - __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - - xmm_src = expand_pixel_32_1x128 (src); - xmm_alpha = expand_alpha_1x128 (xmm_src); - - while (height--) - { - dst = dst_line; - - dst_line += dst_stride; - w = width; - - while (w && (uintptr_t)dst & 15) - { - d = *dst; - *dst++ = pack_1x128_32 (over_1x128 (xmm_src, - xmm_alpha, - unpack_32_1x128 (d))); - w--; - } - - while (w >= 4) - { - xmm_dst = load_128_aligned ((__m128i*)dst); - - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - - over_2x128 (&xmm_src, &xmm_src, - &xmm_alpha, &xmm_alpha, - &xmm_dst_lo, &xmm_dst_hi); - - /* rebuid the 4 pixel data and save*/ - save_128_aligned ( - (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - w -= 4; - dst += 4; - } - - while (w) - { - d = *dst; - *dst++ = pack_1x128_32 (over_1x128 (xmm_src, - xmm_alpha, - unpack_32_1x128 (d))); - w--; - } - - } -} - -static void -sse2_composite_over_n_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src; - uint16_t *dst_line, *dst, d; - int32_t w; - int dst_stride; - __m128i xmm_src, xmm_alpha; - __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - - xmm_src = expand_pixel_32_1x128 (src); - xmm_alpha = expand_alpha_1x128 (xmm_src); - - while (height--) - { - dst = dst_line; - - dst_line += dst_stride; - w = width; - - while (w && (uintptr_t)dst & 15) - { - d = *dst; - - *dst++ = pack_565_32_16 ( - pack_1x128_32 (over_1x128 (xmm_src, - xmm_alpha, - expand565_16_1x128 (d)))); - w--; - } - - while (w >= 8) - { - xmm_dst = load_128_aligned ((__m128i*)dst); - - unpack_565_128_4x128 (xmm_dst, - &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); - - over_2x128 (&xmm_src, &xmm_src, - &xmm_alpha, &xmm_alpha, - &xmm_dst0, &xmm_dst1); - over_2x128 (&xmm_src, &xmm_src, - &xmm_alpha, &xmm_alpha, - &xmm_dst2, &xmm_dst3); - - xmm_dst = pack_565_4x128_128 ( - &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); - - save_128_aligned ((__m128i*)dst, xmm_dst); - - dst += 8; - w -= 8; - } - - while (w--) - { - d = *dst; - *dst++ = pack_565_32_16 ( - pack_1x128_32 (over_1x128 (xmm_src, xmm_alpha, - expand565_16_1x128 (d)))); - } - } - -} - -static void -sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src; - uint32_t *dst_line, d; - uint32_t *mask_line, m; - uint32_t pack_cmp; - int dst_stride, mask_stride; - - __m128i xmm_src; - __m128i xmm_dst; - __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; - - __m128i mmx_src, mmx_mask, mmx_dest; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - - xmm_src = _mm_unpacklo_epi8 ( - create_mask_2x32_128 (src, src), _mm_setzero_si128 ()); - mmx_src = xmm_src; - - while (height--) - { - int w = width; - const uint32_t *pm = (uint32_t *)mask_line; - uint32_t *pd = (uint32_t *)dst_line; - - dst_line += dst_stride; - mask_line += mask_stride; - - while (w && (uintptr_t)pd & 15) - { - m = *pm++; - - if (m) - { - d = *pd; - - mmx_mask = unpack_32_1x128 (m); - mmx_dest = unpack_32_1x128 (d); - - *pd = pack_1x128_32 ( - _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), - mmx_dest)); - } - - pd++; - w--; - } - - while (w >= 4) - { - xmm_mask = load_128_unaligned ((__m128i*)pm); - - pack_cmp = - _mm_movemask_epi8 ( - _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); - - /* if all bits in mask are zero, pack_cmp are equal to 0xffff */ - if (pack_cmp != 0xffff) - { - xmm_dst = load_128_aligned ((__m128i*)pd); - - unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); - - pix_multiply_2x128 (&xmm_src, &xmm_src, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_mask_lo, &xmm_mask_hi); - xmm_mask_hi = pack_2x128_128 (xmm_mask_lo, xmm_mask_hi); - - save_128_aligned ( - (__m128i*)pd, _mm_adds_epu8 (xmm_mask_hi, xmm_dst)); - } - - pd += 4; - pm += 4; - w -= 4; - } - - while (w) - { - m = *pm++; - - if (m) - { - d = *pd; - - mmx_mask = unpack_32_1x128 (m); - mmx_dest = unpack_32_1x128 (d); - - *pd = pack_1x128_32 ( - _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), - mmx_dest)); - } - - pd++; - w--; - } - } - -} - -static void -sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src; - uint32_t *dst_line, d; - uint32_t *mask_line, m; - uint32_t pack_cmp; - int dst_stride, mask_stride; - - __m128i xmm_src, xmm_alpha; - __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; - - __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - - xmm_src = _mm_unpacklo_epi8 ( - create_mask_2x32_128 (src, src), _mm_setzero_si128 ()); - xmm_alpha = expand_alpha_1x128 (xmm_src); - mmx_src = xmm_src; - mmx_alpha = xmm_alpha; - - while (height--) - { - int w = width; - const uint32_t *pm = (uint32_t *)mask_line; - uint32_t *pd = (uint32_t *)dst_line; - - dst_line += dst_stride; - mask_line += mask_stride; - - while (w && (uintptr_t)pd & 15) - { - m = *pm++; - - if (m) - { - d = *pd; - mmx_mask = unpack_32_1x128 (m); - mmx_dest = unpack_32_1x128 (d); - - *pd = pack_1x128_32 (in_over_1x128 (&mmx_src, - &mmx_alpha, - &mmx_mask, - &mmx_dest)); - } - - pd++; - w--; - } - - while (w >= 4) - { - xmm_mask = load_128_unaligned ((__m128i*)pm); - - pack_cmp = - _mm_movemask_epi8 ( - _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); - - /* if all bits in mask are zero, pack_cmp are equal to 0xffff */ - if (pack_cmp != 0xffff) - { - xmm_dst = load_128_aligned ((__m128i*)pd); - - unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - - in_over_2x128 (&xmm_src, &xmm_src, - &xmm_alpha, &xmm_alpha, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - } - - pd += 4; - pm += 4; - w -= 4; - } - - while (w) - { - m = *pm++; - - if (m) - { - d = *pd; - mmx_mask = unpack_32_1x128 (m); - mmx_dest = unpack_32_1x128 (d); - - *pd = pack_1x128_32 ( - in_over_1x128 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)); - } - - pd++; - w--; - } - } - -} - -static void -sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - uint32_t mask; - int32_t w; - int dst_stride, src_stride; - - __m128i xmm_mask; - __m128i xmm_src, xmm_src_lo, xmm_src_hi; - __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - __m128i xmm_alpha_lo, xmm_alpha_hi; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8); - - xmm_mask = create_mask_16_128 (mask >> 24); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w && (uintptr_t)dst & 15) - { - uint32_t s = *src++; - - if (s) - { - uint32_t d = *dst; - - __m128i ms = unpack_32_1x128 (s); - __m128i alpha = expand_alpha_1x128 (ms); - __m128i dest = xmm_mask; - __m128i alpha_dst = unpack_32_1x128 (d); - - *dst = pack_1x128_32 ( - in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); - } - dst++; - w--; - } - - while (w >= 4) - { - xmm_src = load_128_unaligned ((__m128i*)src); - - if (!is_zero (xmm_src)) - { - xmm_dst = load_128_aligned ((__m128i*)dst); - - unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_alpha_lo, &xmm_alpha_hi); - - in_over_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_alpha_lo, &xmm_alpha_hi, - &xmm_mask, &xmm_mask, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - } - - dst += 4; - src += 4; - w -= 4; - } - - while (w) - { - uint32_t s = *src++; - - if (s) - { - uint32_t d = *dst; - - __m128i ms = unpack_32_1x128 (s); - __m128i alpha = expand_alpha_1x128 (ms); - __m128i mask = xmm_mask; - __m128i dest = unpack_32_1x128 (d); - - *dst = pack_1x128_32 ( - in_over_1x128 (&ms, &alpha, &mask, &dest)); - } - - dst++; - w--; - } - } - -} - -static void -sse2_composite_src_x888_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint16_t *dst_line, *dst; - uint32_t *src_line, *src, s; - int dst_stride, src_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w && (uintptr_t)dst & 15) - { - s = *src++; - *dst = convert_8888_to_0565 (s); - dst++; - w--; - } - - while (w >= 8) - { - __m128i xmm_src0 = load_128_unaligned ((__m128i *)src + 0); - __m128i xmm_src1 = load_128_unaligned ((__m128i *)src + 1); - - save_128_aligned ((__m128i*)dst, pack_565_2packedx128_128 (xmm_src0, xmm_src1)); - - w -= 8; - src += 8; - dst += 8; - } - - while (w) - { - s = *src++; - *dst = convert_8888_to_0565 (s); - dst++; - w--; - } - } -} - -static void -sse2_composite_src_x888_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - int32_t w; - int dst_stride, src_stride; - - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w && (uintptr_t)dst & 15) - { - *dst++ = *src++ | 0xff000000; - w--; - } - - while (w >= 16) - { - __m128i xmm_src1, xmm_src2, xmm_src3, xmm_src4; - - xmm_src1 = load_128_unaligned ((__m128i*)src + 0); - xmm_src2 = load_128_unaligned ((__m128i*)src + 1); - xmm_src3 = load_128_unaligned ((__m128i*)src + 2); - xmm_src4 = load_128_unaligned ((__m128i*)src + 3); - - save_128_aligned ((__m128i*)dst + 0, _mm_or_si128 (xmm_src1, mask_ff000000)); - save_128_aligned ((__m128i*)dst + 1, _mm_or_si128 (xmm_src2, mask_ff000000)); - save_128_aligned ((__m128i*)dst + 2, _mm_or_si128 (xmm_src3, mask_ff000000)); - save_128_aligned ((__m128i*)dst + 3, _mm_or_si128 (xmm_src4, mask_ff000000)); - - dst += 16; - src += 16; - w -= 16; - } - - while (w) - { - *dst++ = *src++ | 0xff000000; - w--; - } - } - -} - -static void -sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - uint32_t mask; - int dst_stride, src_stride; - int32_t w; - - __m128i xmm_mask, xmm_alpha; - __m128i xmm_src, xmm_src_lo, xmm_src_hi; - __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8); - - xmm_mask = create_mask_16_128 (mask >> 24); - xmm_alpha = mask_00ff; - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w && (uintptr_t)dst & 15) - { - uint32_t s = (*src++) | 0xff000000; - uint32_t d = *dst; - - __m128i src = unpack_32_1x128 (s); - __m128i alpha = xmm_alpha; - __m128i mask = xmm_mask; - __m128i dest = unpack_32_1x128 (d); - - *dst++ = pack_1x128_32 ( - in_over_1x128 (&src, &alpha, &mask, &dest)); - - w--; - } - - while (w >= 4) - { - xmm_src = _mm_or_si128 ( - load_128_unaligned ((__m128i*)src), mask_ff000000); - xmm_dst = load_128_aligned ((__m128i*)dst); - - unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - - in_over_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_alpha, &xmm_alpha, - &xmm_mask, &xmm_mask, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - dst += 4; - src += 4; - w -= 4; - - } - - while (w) - { - uint32_t s = (*src++) | 0xff000000; - uint32_t d = *dst; - - __m128i src = unpack_32_1x128 (s); - __m128i alpha = xmm_alpha; - __m128i mask = xmm_mask; - __m128i dest = unpack_32_1x128 (d); - - *dst++ = pack_1x128_32 ( - in_over_1x128 (&src, &alpha, &mask, &dest)); - - w--; - } - } - -} - -static void -sse2_composite_over_8888_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - int dst_stride, src_stride; - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - dst = dst_line; - src = src_line; - - while (height--) - { - sse2_combine_over_u (imp, op, dst, src, NULL, width); - - dst += dst_stride; - src += src_stride; - } -} - -static force_inline uint16_t -composite_over_8888_0565pixel (uint32_t src, uint16_t dst) -{ - __m128i ms; - - ms = unpack_32_1x128 (src); - return pack_565_32_16 ( - pack_1x128_32 ( - over_1x128 ( - ms, expand_alpha_1x128 (ms), expand565_16_1x128 (dst)))); -} - -static void -sse2_composite_over_8888_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint16_t *dst_line, *dst, d; - uint32_t *src_line, *src, s; - int dst_stride, src_stride; - int32_t w; - - __m128i xmm_alpha_lo, xmm_alpha_hi; - __m128i xmm_src, xmm_src_lo, xmm_src_hi; - __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - dst = dst_line; - src = src_line; - - dst_line += dst_stride; - src_line += src_stride; - w = width; - - /* Align dst on a 16-byte boundary */ - while (w && - ((uintptr_t)dst & 15)) - { - s = *src++; - d = *dst; - - *dst++ = composite_over_8888_0565pixel (s, d); - w--; - } - - /* It's a 8 pixel loop */ - while (w >= 8) - { - /* I'm loading unaligned because I'm not sure - * about the address alignment. - */ - xmm_src = load_128_unaligned ((__m128i*) src); - xmm_dst = load_128_aligned ((__m128i*) dst); - - /* Unpacking */ - unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); - unpack_565_128_4x128 (xmm_dst, - &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_alpha_lo, &xmm_alpha_hi); - - /* I'm loading next 4 pixels from memory - * before to optimze the memory read. - */ - xmm_src = load_128_unaligned ((__m128i*) (src + 4)); - - over_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_alpha_lo, &xmm_alpha_hi, - &xmm_dst0, &xmm_dst1); - - /* Unpacking */ - unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_alpha_lo, &xmm_alpha_hi); - - over_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_alpha_lo, &xmm_alpha_hi, - &xmm_dst2, &xmm_dst3); - - save_128_aligned ( - (__m128i*)dst, pack_565_4x128_128 ( - &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); - - w -= 8; - dst += 8; - src += 8; - } - - while (w--) - { - s = *src++; - d = *dst; - - *dst++ = composite_over_8888_0565pixel (s, d); - } - } - -} - -static void -sse2_composite_over_n_8_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, srca; - uint32_t *dst_line, *dst; - uint8_t *mask_line, *mask; - int dst_stride, mask_stride; - int32_t w; - uint32_t d; - - __m128i xmm_src, xmm_alpha, xmm_def; - __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; - - __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - srca = src >> 24; - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - xmm_def = create_mask_2x32_128 (src, src); - xmm_src = expand_pixel_32_1x128 (src); - xmm_alpha = expand_alpha_1x128 (xmm_src); - mmx_src = xmm_src; - mmx_alpha = xmm_alpha; - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w && (uintptr_t)dst & 15) - { - uint8_t m = *mask++; - - if (m) - { - d = *dst; - mmx_mask = expand_pixel_8_1x128 (m); - mmx_dest = unpack_32_1x128 (d); - - *dst = pack_1x128_32 (in_over_1x128 (&mmx_src, - &mmx_alpha, - &mmx_mask, - &mmx_dest)); - } - - w--; - dst++; - } - - while (w >= 4) - { - uint32_t m; - memcpy(&m, mask, sizeof(uint32_t)); - - if (srca == 0xff && m == 0xffffffff) - { - save_128_aligned ((__m128i*)dst, xmm_def); - } - else if (m) - { - xmm_dst = load_128_aligned ((__m128i*) dst); - xmm_mask = unpack_32_1x128 (m); - xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); - - /* Unpacking */ - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); - - expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, - &xmm_mask_lo, &xmm_mask_hi); - - in_over_2x128 (&xmm_src, &xmm_src, - &xmm_alpha, &xmm_alpha, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - } - - w -= 4; - dst += 4; - mask += 4; - } - - while (w) - { - uint8_t m = *mask++; - - if (m) - { - d = *dst; - mmx_mask = expand_pixel_8_1x128 (m); - mmx_dest = unpack_32_1x128 (d); - - *dst = pack_1x128_32 (in_over_1x128 (&mmx_src, - &mmx_alpha, - &mmx_mask, - &mmx_dest)); - } - - w--; - dst++; - } - } - -} - -#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) -__attribute__((__force_align_arg_pointer__)) -#endif -static pixman_bool_t -sse2_fill (pixman_implementation_t *imp, - uint32_t * bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t filler) -{ - uint32_t byte_width; - uint8_t *byte_line; - - __m128i xmm_def; - - if (bpp == 8) - { - uint32_t b; - uint32_t w; - - stride = stride * (int) sizeof (uint32_t) / 1; - byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x); - byte_width = width; - stride *= 1; - - b = filler & 0xff; - w = (b << 8) | b; - filler = (w << 16) | w; - } - else if (bpp == 16) - { - stride = stride * (int) sizeof (uint32_t) / 2; - byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); - byte_width = 2 * width; - stride *= 2; - - filler = (filler & 0xffff) * 0x00010001; - } - else if (bpp == 32) - { - stride = stride * (int) sizeof (uint32_t) / 4; - byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); - byte_width = 4 * width; - stride *= 4; - } - else - { - return FALSE; - } - - xmm_def = create_mask_2x32_128 (filler, filler); - - while (height--) - { - int w; - uint8_t *d = byte_line; - byte_line += stride; - w = byte_width; - - if (w >= 1 && ((uintptr_t)d & 1)) - { - *(uint8_t *)d = filler; - w -= 1; - d += 1; - } - - while (w >= 2 && ((uintptr_t)d & 3)) - { - *(uint16_t *)d = filler; - w -= 2; - d += 2; - } - - while (w >= 4 && ((uintptr_t)d & 15)) - { - *(uint32_t *)d = filler; - - w -= 4; - d += 4; - } - - while (w >= 128) - { - save_128_aligned ((__m128i*)(d), xmm_def); - save_128_aligned ((__m128i*)(d + 16), xmm_def); - save_128_aligned ((__m128i*)(d + 32), xmm_def); - save_128_aligned ((__m128i*)(d + 48), xmm_def); - save_128_aligned ((__m128i*)(d + 64), xmm_def); - save_128_aligned ((__m128i*)(d + 80), xmm_def); - save_128_aligned ((__m128i*)(d + 96), xmm_def); - save_128_aligned ((__m128i*)(d + 112), xmm_def); - - d += 128; - w -= 128; - } - - if (w >= 64) - { - save_128_aligned ((__m128i*)(d), xmm_def); - save_128_aligned ((__m128i*)(d + 16), xmm_def); - save_128_aligned ((__m128i*)(d + 32), xmm_def); - save_128_aligned ((__m128i*)(d + 48), xmm_def); - - d += 64; - w -= 64; - } - - if (w >= 32) - { - save_128_aligned ((__m128i*)(d), xmm_def); - save_128_aligned ((__m128i*)(d + 16), xmm_def); - - d += 32; - w -= 32; - } - - if (w >= 16) - { - save_128_aligned ((__m128i*)(d), xmm_def); - - d += 16; - w -= 16; - } - - while (w >= 4) - { - *(uint32_t *)d = filler; - - w -= 4; - d += 4; - } - - if (w >= 2) - { - *(uint16_t *)d = filler; - w -= 2; - d += 2; - } - - if (w >= 1) - { - *(uint8_t *)d = filler; - w -= 1; - d += 1; - } - } - - return TRUE; -} - -static void -sse2_composite_src_n_8_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, srca; - uint32_t *dst_line, *dst; - uint8_t *mask_line, *mask; - int dst_stride, mask_stride; - int32_t w; - - __m128i xmm_src, xmm_def; - __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - srca = src >> 24; - if (src == 0) - { - sse2_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride, - PIXMAN_FORMAT_BPP (dest_image->bits.format), - dest_x, dest_y, width, height, 0); - return; - } - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - xmm_def = create_mask_2x32_128 (src, src); - xmm_src = expand_pixel_32_1x128 (src); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w && (uintptr_t)dst & 15) - { - uint8_t m = *mask++; - - if (m) - { - *dst = pack_1x128_32 ( - pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m))); - } - else - { - *dst = 0; - } - - w--; - dst++; - } - - while (w >= 4) - { - uint32_t m; - memcpy(&m, mask, sizeof(uint32_t)); - - if (srca == 0xff && m == 0xffffffff) - { - save_128_aligned ((__m128i*)dst, xmm_def); - } - else if (m) - { - xmm_mask = unpack_32_1x128 (m); - xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); - - /* Unpacking */ - unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); - - expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, - &xmm_mask_lo, &xmm_mask_hi); - - pix_multiply_2x128 (&xmm_src, &xmm_src, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_mask_lo, &xmm_mask_hi); - - save_128_aligned ( - (__m128i*)dst, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi)); - } - else - { - save_128_aligned ((__m128i*)dst, _mm_setzero_si128 ()); - } - - w -= 4; - dst += 4; - mask += 4; - } - - while (w) - { - uint8_t m = *mask++; - - if (m) - { - *dst = pack_1x128_32 ( - pix_multiply_1x128 ( - xmm_src, expand_pixel_8_1x128 (m))); - } - else - { - *dst = 0; - } - - w--; - dst++; - } - } - -} - -static void -sse2_composite_over_n_8_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src; - uint16_t *dst_line, *dst, d; - uint8_t *mask_line, *mask; - int dst_stride, mask_stride; - int32_t w; - __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; - - __m128i xmm_src, xmm_alpha; - __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; - __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - xmm_src = expand_pixel_32_1x128 (src); - xmm_alpha = expand_alpha_1x128 (xmm_src); - mmx_src = xmm_src; - mmx_alpha = xmm_alpha; - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w && (uintptr_t)dst & 15) - { - uint8_t m = *mask++; - - if (m) - { - d = *dst; - mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); - mmx_dest = expand565_16_1x128 (d); - - *dst = pack_565_32_16 ( - pack_1x128_32 ( - in_over_1x128 ( - &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); - } - - w--; - dst++; - } - - while (w >= 8) - { - uint32_t m; - - xmm_dst = load_128_aligned ((__m128i*) dst); - unpack_565_128_4x128 (xmm_dst, - &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); - - memcpy(&m, mask, sizeof(uint32_t)); - mask += 4; - - if (m) - { - xmm_mask = unpack_32_1x128 (m); - xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); - - /* Unpacking */ - unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); - - expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, - &xmm_mask_lo, &xmm_mask_hi); - - in_over_2x128 (&xmm_src, &xmm_src, - &xmm_alpha, &xmm_alpha, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_dst0, &xmm_dst1); - } - - memcpy(&m, mask, sizeof(uint32_t)); - mask += 4; - - if (m) - { - xmm_mask = unpack_32_1x128 (m); - xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); - - /* Unpacking */ - unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); - - expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, - &xmm_mask_lo, &xmm_mask_hi); - in_over_2x128 (&xmm_src, &xmm_src, - &xmm_alpha, &xmm_alpha, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_dst2, &xmm_dst3); - } - - save_128_aligned ( - (__m128i*)dst, pack_565_4x128_128 ( - &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); - - w -= 8; - dst += 8; - } - - while (w) - { - uint8_t m = *mask++; - - if (m) - { - d = *dst; - mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); - mmx_dest = expand565_16_1x128 (d); - - *dst = pack_565_32_16 ( - pack_1x128_32 ( - in_over_1x128 ( - &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); - } - - w--; - dst++; - } - } - -} - -static void -sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint16_t *dst_line, *dst, d; - uint32_t *src_line, *src, s; - int dst_stride, src_stride; - int32_t w; - uint32_t opaque, zero; - - __m128i ms; - __m128i xmm_src, xmm_src_lo, xmm_src_hi; - __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w && (uintptr_t)dst & 15) - { - s = *src++; - d = *dst; - - ms = unpack_32_1x128 (s); - - *dst++ = pack_565_32_16 ( - pack_1x128_32 ( - over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d)))); - w--; - } - - while (w >= 8) - { - /* First round */ - xmm_src = load_128_unaligned ((__m128i*)src); - xmm_dst = load_128_aligned ((__m128i*)dst); - - opaque = is_opaque (xmm_src); - zero = is_zero (xmm_src); - - unpack_565_128_4x128 (xmm_dst, - &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); - unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); - - /* preload next round*/ - xmm_src = load_128_unaligned ((__m128i*)(src + 4)); - - if (opaque) - { - invert_colors_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_dst0, &xmm_dst1); - } - else if (!zero) - { - over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_dst0, &xmm_dst1); - } - - /* Second round */ - opaque = is_opaque (xmm_src); - zero = is_zero (xmm_src); - - unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); - - if (opaque) - { - invert_colors_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_dst2, &xmm_dst3); - } - else if (!zero) - { - over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_dst2, &xmm_dst3); - } - - save_128_aligned ( - (__m128i*)dst, pack_565_4x128_128 ( - &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); - - w -= 8; - src += 8; - dst += 8; - } - - while (w) - { - s = *src++; - d = *dst; - - ms = unpack_32_1x128 (s); - - *dst++ = pack_565_32_16 ( - pack_1x128_32 ( - over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d)))); - w--; - } - } - -} - -static void -sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst, d; - uint32_t *src_line, *src, s; - int dst_stride, src_stride; - int32_t w; - uint32_t opaque, zero; - - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w && (uintptr_t)dst & 15) - { - s = *src++; - d = *dst; - - *dst++ = pack_1x128_32 ( - over_rev_non_pre_1x128 ( - unpack_32_1x128 (s), unpack_32_1x128 (d))); - - w--; - } - - while (w >= 4) - { - xmm_src_hi = load_128_unaligned ((__m128i*)src); - - opaque = is_opaque (xmm_src_hi); - zero = is_zero (xmm_src_hi); - - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - - if (opaque) - { - invert_colors_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - } - else if (!zero) - { - xmm_dst_hi = load_128_aligned ((__m128i*)dst); - - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - - over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - } - - w -= 4; - dst += 4; - src += 4; - } - - while (w) - { - s = *src++; - d = *dst; - - *dst++ = pack_1x128_32 ( - over_rev_non_pre_1x128 ( - unpack_32_1x128 (s), unpack_32_1x128 (d))); - - w--; - } - } - -} - -static void -sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src; - uint16_t *dst_line, *dst, d; - uint32_t *mask_line, *mask, m; - int dst_stride, mask_stride; - int w; - uint32_t pack_cmp; - - __m128i xmm_src, xmm_alpha; - __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; - __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; - - __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - - xmm_src = expand_pixel_32_1x128 (src); - xmm_alpha = expand_alpha_1x128 (xmm_src); - mmx_src = xmm_src; - mmx_alpha = xmm_alpha; - - while (height--) - { - w = width; - mask = mask_line; - dst = dst_line; - mask_line += mask_stride; - dst_line += dst_stride; - - while (w && ((uintptr_t)dst & 15)) - { - m = *(uint32_t *) mask; - - if (m) - { - d = *dst; - mmx_mask = unpack_32_1x128 (m); - mmx_dest = expand565_16_1x128 (d); - - *dst = pack_565_32_16 ( - pack_1x128_32 ( - in_over_1x128 ( - &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); - } - - w--; - dst++; - mask++; - } - - while (w >= 8) - { - /* First round */ - xmm_mask = load_128_unaligned ((__m128i*)mask); - xmm_dst = load_128_aligned ((__m128i*)dst); - - pack_cmp = _mm_movemask_epi8 ( - _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); - - unpack_565_128_4x128 (xmm_dst, - &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); - unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); - - /* preload next round */ - xmm_mask = load_128_unaligned ((__m128i*)(mask + 4)); - - /* preload next round */ - if (pack_cmp != 0xffff) - { - in_over_2x128 (&xmm_src, &xmm_src, - &xmm_alpha, &xmm_alpha, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_dst0, &xmm_dst1); - } - - /* Second round */ - pack_cmp = _mm_movemask_epi8 ( - _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); - - unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); - - if (pack_cmp != 0xffff) - { - in_over_2x128 (&xmm_src, &xmm_src, - &xmm_alpha, &xmm_alpha, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_dst2, &xmm_dst3); - } - - save_128_aligned ( - (__m128i*)dst, pack_565_4x128_128 ( - &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); - - w -= 8; - dst += 8; - mask += 8; - } - - while (w) - { - m = *(uint32_t *) mask; - - if (m) - { - d = *dst; - mmx_mask = unpack_32_1x128 (m); - mmx_dest = expand565_16_1x128 (d); - - *dst = pack_565_32_16 ( - pack_1x128_32 ( - in_over_1x128 ( - &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); - } - - w--; - dst++; - mask++; - } - } - -} - -static void -sse2_composite_in_n_8_8 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint8_t *dst_line, *dst; - uint8_t *mask_line, *mask; - int dst_stride, mask_stride; - uint32_t d; - uint32_t src; - int32_t w; - - __m128i xmm_alpha; - __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; - __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src)); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w && ((uintptr_t)dst & 15)) - { - uint8_t m = *mask++; - d = (uint32_t) *dst; - - *dst++ = (uint8_t) pack_1x128_32 ( - pix_multiply_1x128 ( - pix_multiply_1x128 (xmm_alpha, - unpack_32_1x128 (m)), - unpack_32_1x128 (d))); - w--; - } - - while (w >= 16) - { - xmm_mask = load_128_unaligned ((__m128i*)mask); - xmm_dst = load_128_aligned ((__m128i*)dst); - - unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - - pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_mask_lo, &xmm_mask_hi); - - pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, - &xmm_dst_lo, &xmm_dst_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - mask += 16; - dst += 16; - w -= 16; - } - - while (w) - { - uint8_t m = *mask++; - d = (uint32_t) *dst; - - *dst++ = (uint8_t) pack_1x128_32 ( - pix_multiply_1x128 ( - pix_multiply_1x128 ( - xmm_alpha, unpack_32_1x128 (m)), - unpack_32_1x128 (d))); - w--; - } - } - -} - -static void -sse2_composite_in_n_8 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint8_t *dst_line, *dst; - int dst_stride; - uint32_t d; - uint32_t src; - int32_t w; - - __m128i xmm_alpha; - __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src)); - - src = src >> 24; - - if (src == 0xff) - return; - - if (src == 0x00) - { - pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, - 8, dest_x, dest_y, width, height, src); - - return; - } - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - w = width; - - while (w && ((uintptr_t)dst & 15)) - { - d = (uint32_t) *dst; - - *dst++ = (uint8_t) pack_1x128_32 ( - pix_multiply_1x128 ( - xmm_alpha, - unpack_32_1x128 (d))); - w--; - } - - while (w >= 16) - { - xmm_dst = load_128_aligned ((__m128i*)dst); - - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - - pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, - &xmm_dst_lo, &xmm_dst_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - dst += 16; - w -= 16; - } - - while (w) - { - d = (uint32_t) *dst; - - *dst++ = (uint8_t) pack_1x128_32 ( - pix_multiply_1x128 ( - xmm_alpha, - unpack_32_1x128 (d))); - w--; - } - } - -} - -static void -sse2_composite_in_8_8 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint8_t *dst_line, *dst; - uint8_t *src_line, *src; - int src_stride, dst_stride; - int32_t w; - uint32_t s, d; - - __m128i xmm_src, xmm_src_lo, xmm_src_hi; - __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w && ((uintptr_t)dst & 15)) - { - s = (uint32_t) *src++; - d = (uint32_t) *dst; - - *dst++ = (uint8_t) pack_1x128_32 ( - pix_multiply_1x128 ( - unpack_32_1x128 (s), unpack_32_1x128 (d))); - w--; - } - - while (w >= 16) - { - xmm_src = load_128_unaligned ((__m128i*)src); - xmm_dst = load_128_aligned ((__m128i*)dst); - - unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - - pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_dst_lo, &xmm_dst_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - src += 16; - dst += 16; - w -= 16; - } - - while (w) - { - s = (uint32_t) *src++; - d = (uint32_t) *dst; - - *dst++ = (uint8_t) pack_1x128_32 ( - pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (d))); - w--; - } - } - -} - -static void -sse2_composite_add_n_8_8 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint8_t *dst_line, *dst; - uint8_t *mask_line, *mask; - int dst_stride, mask_stride; - int32_t w; - uint32_t src; - uint32_t d; - - __m128i xmm_alpha; - __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; - __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src)); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w && ((uintptr_t)dst & 15)) - { - uint8_t m = *mask++; - d = (uint32_t) *dst; - - *dst++ = (uint8_t) pack_1x128_32 ( - _mm_adds_epu16 ( - pix_multiply_1x128 ( - xmm_alpha, unpack_32_1x128 (m)), - unpack_32_1x128 (d))); - w--; - } - - while (w >= 16) - { - xmm_mask = load_128_unaligned ((__m128i*)mask); - xmm_dst = load_128_aligned ((__m128i*)dst); - - unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - - pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_mask_lo, &xmm_mask_hi); - - xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo); - xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi); - - save_128_aligned ( - (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - - mask += 16; - dst += 16; - w -= 16; - } - - while (w) - { - uint8_t m = (uint32_t) *mask++; - d = (uint32_t) *dst; - - *dst++ = (uint8_t) pack_1x128_32 ( - _mm_adds_epu16 ( - pix_multiply_1x128 ( - xmm_alpha, unpack_32_1x128 (m)), - unpack_32_1x128 (d))); - - w--; - } - } - -} - -static void -sse2_composite_add_n_8 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint8_t *dst_line, *dst; - int dst_stride; - int32_t w; - uint32_t src; - - __m128i xmm_src; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - src >>= 24; - - if (src == 0x00) - return; - - if (src == 0xff) - { - pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, - 8, dest_x, dest_y, width, height, 0xff); - - return; - } - - src = (src << 24) | (src << 16) | (src << 8) | src; - xmm_src = _mm_set_epi32 (src, src, src, src); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - w = width; - - while (w && ((uintptr_t)dst & 15)) - { - *dst = (uint8_t)_mm_cvtsi128_si32 ( - _mm_adds_epu8 ( - xmm_src, - _mm_cvtsi32_si128 (*dst))); - - w--; - dst++; - } - - while (w >= 16) - { - save_128_aligned ( - (__m128i*)dst, _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst))); - - dst += 16; - w -= 16; - } - - while (w) - { - *dst = (uint8_t)_mm_cvtsi128_si32 ( - _mm_adds_epu8 ( - xmm_src, - _mm_cvtsi32_si128 (*dst))); - - w--; - dst++; - } - } - -} - -static void -sse2_composite_add_8_8 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint8_t *dst_line, *dst; - uint8_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; - uint16_t t; - - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - src = src_line; - - dst_line += dst_stride; - src_line += src_stride; - w = width; - - /* Small head */ - while (w && (uintptr_t)dst & 3) - { - t = (*dst) + (*src++); - *dst++ = t | (0 - (t >> 8)); - w--; - } - - sse2_combine_add_u (imp, op, - (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2); - - /* Small tail */ - dst += w & 0xfffc; - src += w & 0xfffc; - - w &= 3; - - while (w) - { - t = (*dst) + (*src++); - *dst++ = t | (0 - (t >> 8)); - w--; - } - } - -} - -static void -sse2_composite_add_8888_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - int dst_stride, src_stride; - - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - - sse2_combine_add_u (imp, op, dst, src, NULL, width); - } -} - -static void -sse2_composite_add_n_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst, src; - int dst_stride; - - __m128i xmm_src; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - if (src == 0) - return; - - if (src == ~0) - { - pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32, - dest_x, dest_y, width, height, ~0); - - return; - } - - xmm_src = _mm_set_epi32 (src, src, src, src); - while (height--) - { - int w = width; - uint32_t d; - - dst = dst_line; - dst_line += dst_stride; - - while (w && (uintptr_t)dst & 15) - { - d = *dst; - *dst++ = - _mm_cvtsi128_si32 ( _mm_adds_epu8 (xmm_src, _mm_cvtsi32_si128 (d))); - w--; - } - - while (w >= 4) - { - save_128_aligned - ((__m128i*)dst, - _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst))); - - dst += 4; - w -= 4; - } - - while (w--) - { - d = *dst; - *dst++ = - _mm_cvtsi128_si32 (_mm_adds_epu8 (xmm_src, - _mm_cvtsi32_si128 (d))); - } - } -} - -static void -sse2_composite_add_n_8_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint8_t *mask_line, *mask; - int dst_stride, mask_stride; - int32_t w; - uint32_t src; - - __m128i xmm_src; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - if (src == 0) - return; - xmm_src = expand_pixel_32_1x128 (src); - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w && ((uintptr_t)dst & 15)) - { - uint8_t m = *mask++; - if (m) - { - *dst = pack_1x128_32 - (_mm_adds_epu16 - (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), - unpack_32_1x128 (*dst))); - } - dst++; - w--; - } - - while (w >= 4) - { - uint32_t m; - memcpy(&m, mask, sizeof(uint32_t)); - - if (m) - { - __m128i xmm_mask_lo, xmm_mask_hi; - __m128i xmm_dst_lo, xmm_dst_hi; - - __m128i xmm_dst = load_128_aligned ((__m128i*)dst); - __m128i xmm_mask = - _mm_unpacklo_epi8 (unpack_32_1x128(m), - _mm_setzero_si128 ()); - - unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - - expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, - &xmm_mask_lo, &xmm_mask_hi); - - pix_multiply_2x128 (&xmm_src, &xmm_src, - &xmm_mask_lo, &xmm_mask_hi, - &xmm_mask_lo, &xmm_mask_hi); - - xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo); - xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi); - - save_128_aligned ( - (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - } - - w -= 4; - dst += 4; - mask += 4; - } - - while (w) - { - uint8_t m = *mask++; - if (m) - { - *dst = pack_1x128_32 - (_mm_adds_epu16 - (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), - unpack_32_1x128 (*dst))); - } - dst++; - w--; - } - } -} - -static pixman_bool_t -sse2_blt (pixman_implementation_t *imp, - uint32_t * src_bits, - uint32_t * dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dest_x, - int dest_y, - int width, - int height) -{ - uint8_t * src_bytes; - uint8_t * dst_bytes; - int byte_width; - - if (src_bpp != dst_bpp) - return FALSE; - - if (src_bpp == 16) - { - src_stride = src_stride * (int) sizeof (uint32_t) / 2; - dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; - src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); - dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); - byte_width = 2 * width; - src_stride *= 2; - dst_stride *= 2; - } - else if (src_bpp == 32) - { - src_stride = src_stride * (int) sizeof (uint32_t) / 4; - dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; - src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); - dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); - byte_width = 4 * width; - src_stride *= 4; - dst_stride *= 4; - } - else - { - return FALSE; - } - - while (height--) - { - int w; - uint8_t *s = src_bytes; - uint8_t *d = dst_bytes; - src_bytes += src_stride; - dst_bytes += dst_stride; - w = byte_width; - - while (w >= 2 && ((uintptr_t)d & 3)) - { - memmove(d, s, 2); - w -= 2; - s += 2; - d += 2; - } - - while (w >= 4 && ((uintptr_t)d & 15)) - { - memmove(d, s, 4); - - w -= 4; - s += 4; - d += 4; - } - - while (w >= 64) - { - __m128i xmm0, xmm1, xmm2, xmm3; - - xmm0 = load_128_unaligned ((__m128i*)(s)); - xmm1 = load_128_unaligned ((__m128i*)(s + 16)); - xmm2 = load_128_unaligned ((__m128i*)(s + 32)); - xmm3 = load_128_unaligned ((__m128i*)(s + 48)); - - save_128_aligned ((__m128i*)(d), xmm0); - save_128_aligned ((__m128i*)(d + 16), xmm1); - save_128_aligned ((__m128i*)(d + 32), xmm2); - save_128_aligned ((__m128i*)(d + 48), xmm3); - - s += 64; - d += 64; - w -= 64; - } - - while (w >= 16) - { - save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) ); - - w -= 16; - d += 16; - s += 16; - } - - while (w >= 4) - { - memmove(d, s, 4); - - w -= 4; - s += 4; - d += 4; - } - - if (w >= 2) - { - memmove(d, s, 2); - w -= 2; - s += 2; - d += 2; - } - } - - return TRUE; -} - -static void -sse2_composite_copy_area (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - sse2_blt (imp, src_image->bits.bits, - dest_image->bits.bits, - src_image->bits.rowstride, - dest_image->bits.rowstride, - PIXMAN_FORMAT_BPP (src_image->bits.format), - PIXMAN_FORMAT_BPP (dest_image->bits.format), - src_x, src_y, dest_x, dest_y, width, height); -} - -static void -sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *src, *src_line, s; - uint32_t *dst, *dst_line, d; - uint8_t *mask, *mask_line; - int src_stride, mask_stride, dst_stride; - int32_t w; - __m128i ms; - - __m128i xmm_src, xmm_src_lo, xmm_src_hi; - __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - src = src_line; - src_line += src_stride; - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - - w = width; - - while (w && (uintptr_t)dst & 15) - { - uint8_t m = *mask++; - s = 0xff000000 | *src++; - d = *dst; - ms = unpack_32_1x128 (s); - - if (m != 0xff) - { - __m128i ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); - __m128i md = unpack_32_1x128 (d); - - ms = in_over_1x128 (&ms, &mask_00ff, &ma, &md); - } - - *dst++ = pack_1x128_32 (ms); - w--; - } - - while (w >= 4) - { - uint32_t m; - memcpy(&m, mask, sizeof(uint32_t)); - xmm_src = _mm_or_si128 ( - load_128_unaligned ((__m128i*)src), mask_ff000000); - - if (m == 0xffffffff) - { - save_128_aligned ((__m128i*)dst, xmm_src); - } - else - { - xmm_dst = load_128_aligned ((__m128i*)dst); - - xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); - - unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - - expand_alpha_rev_2x128 ( - xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - - in_over_2x128 (&xmm_src_lo, &xmm_src_hi, - &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - } - - src += 4; - dst += 4; - mask += 4; - w -= 4; - } - - while (w) - { - uint8_t m = *mask++; - - if (m) - { - s = 0xff000000 | *src; - - if (m == 0xff) - { - *dst = s; - } - else - { - __m128i ma, md, ms; - - d = *dst; - - ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); - md = unpack_32_1x128 (d); - ms = unpack_32_1x128 (s); - - *dst = pack_1x128_32 (in_over_1x128 (&ms, &mask_00ff, &ma, &md)); - } - - } - - src++; - dst++; - w--; - } - } - -} - -static void -sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *src, *src_line, s; - uint32_t *dst, *dst_line, d; - uint8_t *mask, *mask_line; - int src_stride, mask_stride, dst_stride; - int32_t w; - - __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; - __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - src = src_line; - src_line += src_stride; - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - - w = width; - - while (w && (uintptr_t)dst & 15) - { - uint32_t sa; - uint8_t m = *mask++; - - s = *src++; - d = *dst; - - sa = s >> 24; - - if (m) - { - if (sa == 0xff && m == 0xff) - { - *dst = s; - } - else - { - __m128i ms, md, ma, msa; - - ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); - ms = unpack_32_1x128 (s); - md = unpack_32_1x128 (d); - - msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); - - *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); - } - } - - dst++; - w--; - } - - while (w >= 4) - { - uint32_t m; - memcpy(&m, mask, sizeof(uint32_t)); - - if (m) - { - xmm_src = load_128_unaligned ((__m128i*)src); - - if (m == 0xffffffff && is_opaque (xmm_src)) - { - save_128_aligned ((__m128i *)dst, xmm_src); - } - else - { - xmm_dst = load_128_aligned ((__m128i *)dst); - - xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); - - unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi); - expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - - in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi, - &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - } - } - - src += 4; - dst += 4; - mask += 4; - w -= 4; - } - - while (w) - { - uint32_t sa; - uint8_t m = *mask++; - - s = *src++; - d = *dst; - - sa = s >> 24; - - if (m) - { - if (sa == 0xff && m == 0xff) - { - *dst = s; - } - else - { - __m128i ms, md, ma, msa; - - ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); - ms = unpack_32_1x128 (s); - md = unpack_32_1x128 (d); - - msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); - - *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); - } - } - - dst++; - w--; - } - } - -} - -static void -sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src; - uint32_t *dst_line, *dst; - __m128i xmm_src; - __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - __m128i xmm_dsta_hi, xmm_dsta_lo; - int dst_stride; - int32_t w; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - - xmm_src = expand_pixel_32_1x128 (src); - - while (height--) - { - dst = dst_line; - - dst_line += dst_stride; - w = width; - - while (w && (uintptr_t)dst & 15) - { - __m128i vd; - - vd = unpack_32_1x128 (*dst); - - *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd), - xmm_src)); - w--; - dst++; - } - - while (w >= 4) - { - __m128i tmp_lo, tmp_hi; - - xmm_dst = load_128_aligned ((__m128i*)dst); - - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dsta_lo, &xmm_dsta_hi); - - tmp_lo = xmm_src; - tmp_hi = xmm_src; - - over_2x128 (&xmm_dst_lo, &xmm_dst_hi, - &xmm_dsta_lo, &xmm_dsta_hi, - &tmp_lo, &tmp_hi); - - save_128_aligned ( - (__m128i*)dst, pack_2x128_128 (tmp_lo, tmp_hi)); - - w -= 4; - dst += 4; - } - - while (w) - { - __m128i vd; - - vd = unpack_32_1x128 (*dst); - - *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd), - xmm_src)); - w--; - dst++; - } - - } - -} - -static void -sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *src, *src_line, s; - uint32_t *dst, *dst_line, d; - uint32_t *mask, *mask_line; - uint32_t m; - int src_stride, mask_stride, dst_stride; - int32_t w; - - __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; - __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - src = src_line; - src_line += src_stride; - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - - w = width; - - while (w && (uintptr_t)dst & 15) - { - uint32_t sa; - - s = *src++; - m = (*mask++) >> 24; - d = *dst; - - sa = s >> 24; - - if (m) - { - if (sa == 0xff && m == 0xff) - { - *dst = s; - } - else - { - __m128i ms, md, ma, msa; - - ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); - ms = unpack_32_1x128 (s); - md = unpack_32_1x128 (d); - - msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); - - *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); - } - } - - dst++; - w--; - } - - while (w >= 4) - { - xmm_mask = load_128_unaligned ((__m128i*)mask); - - if (!is_transparent (xmm_mask)) - { - xmm_src = load_128_unaligned ((__m128i*)src); - - if (is_opaque (xmm_mask) && is_opaque (xmm_src)) - { - save_128_aligned ((__m128i *)dst, xmm_src); - } - else - { - xmm_dst = load_128_aligned ((__m128i *)dst); - - unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi); - expand_alpha_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - - in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi, - &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - } - } - - src += 4; - dst += 4; - mask += 4; - w -= 4; - } - - while (w) - { - uint32_t sa; - - s = *src++; - m = (*mask++) >> 24; - d = *dst; - - sa = s >> 24; - - if (m) - { - if (sa == 0xff && m == 0xff) - { - *dst = s; - } - else - { - __m128i ms, md, ma, msa; - - ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); - ms = unpack_32_1x128 (s); - md = unpack_32_1x128 (d); - - msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); - - *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); - } - } - - dst++; - w--; - } - } - -} - -/* A variant of 'sse2_combine_over_u' with minor tweaks */ -static force_inline void -scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd, - const uint32_t* ps, - int32_t w, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t src_width_fixed, - pixman_bool_t fully_transparent_src) -{ - uint32_t s, d; - const uint32_t* pm = NULL; - - __m128i xmm_dst_lo, xmm_dst_hi; - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_alpha_lo, xmm_alpha_hi; - - if (fully_transparent_src) - return; - - /* Align dst on a 16-byte boundary */ - while (w && ((uintptr_t)pd & 15)) - { - d = *pd; - s = combine1 (ps + pixman_fixed_to_int (vx), pm); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - - *pd++ = core_combine_over_u_pixel_sse2 (s, d); - if (pm) - pm++; - w--; - } - - while (w >= 4) - { - __m128i tmp; - uint32_t tmp1, tmp2, tmp3, tmp4; - - tmp1 = *(ps + pixman_fixed_to_int (vx)); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - tmp2 = *(ps + pixman_fixed_to_int (vx)); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - tmp3 = *(ps + pixman_fixed_to_int (vx)); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - tmp4 = *(ps + pixman_fixed_to_int (vx)); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - - tmp = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1); - - xmm_src_hi = combine4 ((__m128i*)&tmp, (__m128i*)pm); - - if (is_opaque (xmm_src_hi)) - { - save_128_aligned ((__m128i*)pd, xmm_src_hi); - } - else if (!is_zero (xmm_src_hi)) - { - xmm_dst_hi = load_128_aligned ((__m128i*) pd); - - unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - - expand_alpha_2x128 ( - xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi); - - over_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_alpha_lo, &xmm_alpha_hi, - &xmm_dst_lo, &xmm_dst_hi); - - /* rebuid the 4 pixel data and save*/ - save_128_aligned ((__m128i*)pd, - pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - } - - w -= 4; - pd += 4; - if (pm) - pm += 4; - } - - while (w) - { - d = *pd; - s = combine1 (ps + pixman_fixed_to_int (vx), pm); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - - *pd++ = core_combine_over_u_pixel_sse2 (s, d); - if (pm) - pm++; - - w--; - } -} - -FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER, - scaled_nearest_scanline_sse2_8888_8888_OVER, - uint32_t, uint32_t, COVER) -FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER, - scaled_nearest_scanline_sse2_8888_8888_OVER, - uint32_t, uint32_t, NONE) -FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER, - scaled_nearest_scanline_sse2_8888_8888_OVER, - uint32_t, uint32_t, PAD) -FAST_NEAREST_MAINLOOP (sse2_8888_8888_normal_OVER, - scaled_nearest_scanline_sse2_8888_8888_OVER, - uint32_t, uint32_t, NORMAL) - -static force_inline void -scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask, - uint32_t * dst, - const uint32_t * src, - int32_t w, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t src_width_fixed, - pixman_bool_t zero_src) -{ - __m128i xmm_mask; - __m128i xmm_src, xmm_src_lo, xmm_src_hi; - __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - __m128i xmm_alpha_lo, xmm_alpha_hi; - - if (zero_src || (*mask >> 24) == 0) - return; - - xmm_mask = create_mask_16_128 (*mask >> 24); - - while (w && (uintptr_t)dst & 15) - { - uint32_t s = *(src + pixman_fixed_to_int (vx)); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - - if (s) - { - uint32_t d = *dst; - - __m128i ms = unpack_32_1x128 (s); - __m128i alpha = expand_alpha_1x128 (ms); - __m128i dest = xmm_mask; - __m128i alpha_dst = unpack_32_1x128 (d); - - *dst = pack_1x128_32 ( - in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); - } - dst++; - w--; - } - - while (w >= 4) - { - uint32_t tmp1, tmp2, tmp3, tmp4; - - tmp1 = *(src + pixman_fixed_to_int (vx)); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - tmp2 = *(src + pixman_fixed_to_int (vx)); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - tmp3 = *(src + pixman_fixed_to_int (vx)); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - tmp4 = *(src + pixman_fixed_to_int (vx)); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - - xmm_src = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1); - - if (!is_zero (xmm_src)) - { - xmm_dst = load_128_aligned ((__m128i*)dst); - - unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_alpha_lo, &xmm_alpha_hi); - - in_over_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_alpha_lo, &xmm_alpha_hi, - &xmm_mask, &xmm_mask, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ( - (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - } - - dst += 4; - w -= 4; - } - - while (w) - { - uint32_t s = *(src + pixman_fixed_to_int (vx)); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - - if (s) - { - uint32_t d = *dst; - - __m128i ms = unpack_32_1x128 (s); - __m128i alpha = expand_alpha_1x128 (ms); - __m128i mask = xmm_mask; - __m128i dest = unpack_32_1x128 (d); - - *dst = pack_1x128_32 ( - in_over_1x128 (&ms, &alpha, &mask, &dest)); - } - - dst++; - w--; - } - -} - -FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER, - scaled_nearest_scanline_sse2_8888_n_8888_OVER, - uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE) -FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER, - scaled_nearest_scanline_sse2_8888_n_8888_OVER, - uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE) -FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER, - scaled_nearest_scanline_sse2_8888_n_8888_OVER, - uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE) -FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER, - scaled_nearest_scanline_sse2_8888_n_8888_OVER, - uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE) - -#if PSHUFD_IS_FAST - -/***********************************************************************************/ - -# define BILINEAR_DECLARE_VARIABLES \ - const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ - const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ - const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ - const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \ - unit_x, -unit_x, unit_x, -unit_x); \ - const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \ - unit_x * 4, -unit_x * 4, \ - unit_x * 4, -unit_x * 4, \ - unit_x * 4, -unit_x * 4); \ - const __m128i xmm_zero = _mm_setzero_si128 (); \ - __m128i xmm_x = _mm_set_epi16 (vx + unit_x * 3, -(vx + 1) - unit_x * 3, \ - vx + unit_x * 2, -(vx + 1) - unit_x * 2, \ - vx + unit_x * 1, -(vx + 1) - unit_x * 1, \ - vx + unit_x * 0, -(vx + 1) - unit_x * 0); \ - __m128i xmm_wh_state; - -#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase_) \ -do { \ - int phase = phase_; \ - __m128i xmm_wh, xmm_a, xmm_b; \ - /* fetch 2x2 pixel block into sse2 registers */ \ - __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \ - __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \ - vx += unit_x; \ - /* vertical interpolation */ \ - xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \ - xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \ - xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \ - /* calculate horizontal weights */ \ - if (phase <= 0) \ - { \ - xmm_wh_state = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ - 16 - BILINEAR_INTERPOLATION_BITS)); \ - xmm_x = _mm_add_epi16 (xmm_x, (phase < 0) ? xmm_ux1 : xmm_ux4); \ - phase = 0; \ - } \ - xmm_wh = _mm_shuffle_epi32 (xmm_wh_state, _MM_SHUFFLE (phase, phase, \ - phase, phase)); \ - /* horizontal interpolation */ \ - xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \ - xmm_a, _MM_SHUFFLE (1, 0, 3, 2)), xmm_a), xmm_wh); \ - /* shift the result */ \ - pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS * 2); \ -} while (0) - -#else /************************************************************************/ - -# define BILINEAR_DECLARE_VARIABLES \ - const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ - const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ - const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ - const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \ - unit_x, -unit_x, unit_x, -unit_x); \ - const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \ - unit_x * 4, -unit_x * 4, \ - unit_x * 4, -unit_x * 4, \ - unit_x * 4, -unit_x * 4); \ - const __m128i xmm_zero = _mm_setzero_si128 (); \ - __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), \ - vx, -(vx + 1), vx, -(vx + 1)) - -#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase) \ -do { \ - __m128i xmm_wh, xmm_a, xmm_b; \ - /* fetch 2x2 pixel block into sse2 registers */ \ - __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \ - __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \ - (void)xmm_ux4; /* suppress warning: unused variable 'xmm_ux4' */ \ - vx += unit_x; \ - /* vertical interpolation */ \ - xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \ - xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \ - xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \ - /* calculate horizontal weights */ \ - xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ - 16 - BILINEAR_INTERPOLATION_BITS)); \ - xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \ - /* horizontal interpolation */ \ - xmm_b = _mm_unpacklo_epi64 (/* any value is fine here */ xmm_b, xmm_a); \ - xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); \ - /* shift the result */ \ - pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS * 2); \ -} while (0) - -/***********************************************************************************/ - -#endif - -#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix); \ -do { \ - __m128i xmm_pix; \ - BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix, -1); \ - xmm_pix = _mm_packs_epi32 (xmm_pix, xmm_pix); \ - xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); \ - pix = _mm_cvtsi128_si32 (xmm_pix); \ -} while(0) - -#define BILINEAR_INTERPOLATE_FOUR_PIXELS(pix); \ -do { \ - __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; \ - BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix1, 0); \ - BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix2, 1); \ - BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix3, 2); \ - BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix4, 3); \ - xmm_pix1 = _mm_packs_epi32 (xmm_pix1, xmm_pix2); \ - xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); \ - pix = _mm_packus_epi16 (xmm_pix1, xmm_pix3); \ -} while(0) - -#define BILINEAR_SKIP_ONE_PIXEL() \ -do { \ - vx += unit_x; \ - xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \ -} while(0) - -#define BILINEAR_SKIP_FOUR_PIXELS() \ -do { \ - vx += unit_x * 4; \ - xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4); \ -} while(0) - -/***********************************************************************************/ - -static force_inline void -scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst, - const uint32_t * mask, - const uint32_t * src_top, - const uint32_t * src_bottom, - int32_t w, - int wt, - int wb, - pixman_fixed_t vx_, - pixman_fixed_t unit_x_, - pixman_fixed_t max_vx, - pixman_bool_t zero_src) -{ - intptr_t vx = vx_; - intptr_t unit_x = unit_x_; - BILINEAR_DECLARE_VARIABLES; - uint32_t pix1, pix2; - - while (w && ((uintptr_t)dst & 15)) - { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - *dst++ = pix1; - w--; - } - - while ((w -= 4) >= 0) { - __m128i xmm_src; - BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); - _mm_store_si128 ((__m128i *)dst, xmm_src); - dst += 4; - } - - if (w & 2) - { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); - *dst++ = pix1; - *dst++ = pix2; - } - - if (w & 1) - { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - *dst = pix1; - } - -} - -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC, - scaled_bilinear_scanline_sse2_8888_8888_SRC, - uint32_t, uint32_t, uint32_t, - COVER, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC, - scaled_bilinear_scanline_sse2_8888_8888_SRC, - uint32_t, uint32_t, uint32_t, - PAD, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC, - scaled_bilinear_scanline_sse2_8888_8888_SRC, - uint32_t, uint32_t, uint32_t, - NONE, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC, - scaled_bilinear_scanline_sse2_8888_8888_SRC, - uint32_t, uint32_t, uint32_t, - NORMAL, FLAG_NONE) - -static force_inline void -scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t * dst, - const uint32_t * mask, - const uint32_t * src_top, - const uint32_t * src_bottom, - int32_t w, - int wt, - int wb, - pixman_fixed_t vx_, - pixman_fixed_t unit_x_, - pixman_fixed_t max_vx, - pixman_bool_t zero_src) -{ - intptr_t vx = vx_; - intptr_t unit_x = unit_x_; - BILINEAR_DECLARE_VARIABLES; - uint32_t pix1, pix2; - - while (w && ((uintptr_t)dst & 15)) - { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - *dst++ = pix1 | 0xFF000000; - w--; - } - - while ((w -= 4) >= 0) { - __m128i xmm_src; - BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); - _mm_store_si128 ((__m128i *)dst, _mm_or_si128 (xmm_src, mask_ff000000)); - dst += 4; - } - - if (w & 2) - { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); - *dst++ = pix1 | 0xFF000000; - *dst++ = pix2 | 0xFF000000; - } - - if (w & 1) - { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - *dst = pix1 | 0xFF000000; - } -} - -FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_cover_SRC, - scaled_bilinear_scanline_sse2_x888_8888_SRC, - uint32_t, uint32_t, uint32_t, - COVER, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_pad_SRC, - scaled_bilinear_scanline_sse2_x888_8888_SRC, - uint32_t, uint32_t, uint32_t, - PAD, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_normal_SRC, - scaled_bilinear_scanline_sse2_x888_8888_SRC, - uint32_t, uint32_t, uint32_t, - NORMAL, FLAG_NONE) - -static force_inline void -scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst, - const uint32_t * mask, - const uint32_t * src_top, - const uint32_t * src_bottom, - int32_t w, - int wt, - int wb, - pixman_fixed_t vx_, - pixman_fixed_t unit_x_, - pixman_fixed_t max_vx, - pixman_bool_t zero_src) -{ - intptr_t vx = vx_; - intptr_t unit_x = unit_x_; - BILINEAR_DECLARE_VARIABLES; - uint32_t pix1, pix2; - - while (w && ((uintptr_t)dst & 15)) - { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - - if (pix1) - { - pix2 = *dst; - *dst = core_combine_over_u_pixel_sse2 (pix1, pix2); - } - - w--; - dst++; - } - - while (w >= 4) - { - __m128i xmm_src; - __m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo; - __m128i xmm_alpha_hi, xmm_alpha_lo; - - BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); - - if (!is_zero (xmm_src)) - { - if (is_opaque (xmm_src)) - { - save_128_aligned ((__m128i *)dst, xmm_src); - } - else - { - __m128i xmm_dst = load_128_aligned ((__m128i *)dst); - - unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi); - over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ((__m128i *)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - } - } - - w -= 4; - dst += 4; - } - - while (w) - { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - - if (pix1) - { - pix2 = *dst; - *dst = core_combine_over_u_pixel_sse2 (pix1, pix2); - } - - w--; - dst++; - } -} - -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER, - scaled_bilinear_scanline_sse2_8888_8888_OVER, - uint32_t, uint32_t, uint32_t, - COVER, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER, - scaled_bilinear_scanline_sse2_8888_8888_OVER, - uint32_t, uint32_t, uint32_t, - PAD, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER, - scaled_bilinear_scanline_sse2_8888_8888_OVER, - uint32_t, uint32_t, uint32_t, - NONE, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER, - scaled_bilinear_scanline_sse2_8888_8888_OVER, - uint32_t, uint32_t, uint32_t, - NORMAL, FLAG_NONE) - -static force_inline void -scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst, - const uint8_t * mask, - const uint32_t * src_top, - const uint32_t * src_bottom, - int32_t w, - int wt, - int wb, - pixman_fixed_t vx_, - pixman_fixed_t unit_x_, - pixman_fixed_t max_vx, - pixman_bool_t zero_src) -{ - intptr_t vx = vx_; - intptr_t unit_x = unit_x_; - BILINEAR_DECLARE_VARIABLES; - uint32_t pix1, pix2; - - while (w && ((uintptr_t)dst & 15)) - { - uint32_t sa; - uint8_t m = *mask++; - - if (m) - { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - sa = pix1 >> 24; - - if (sa == 0xff && m == 0xff) - { - *dst = pix1; - } - else - { - __m128i ms, md, ma, msa; - - pix2 = *dst; - ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); - ms = unpack_32_1x128 (pix1); - md = unpack_32_1x128 (pix2); - - msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); - - *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); - } - } - else - { - BILINEAR_SKIP_ONE_PIXEL (); - } - - w--; - dst++; - } - - while (w >= 4) - { - uint32_t m; - - __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; - __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; - - memcpy(&m, mask, sizeof(uint32_t)); - - if (m) - { - BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); - - if (m == 0xffffffff && is_opaque (xmm_src)) - { - save_128_aligned ((__m128i *)dst, xmm_src); - } - else - { - xmm_dst = load_128_aligned ((__m128i *)dst); - - xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); - - unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi); - expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - - in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi, - &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - } - } - else - { - BILINEAR_SKIP_FOUR_PIXELS (); - } - - w -= 4; - dst += 4; - mask += 4; - } - - while (w) - { - uint32_t sa; - uint8_t m = *mask++; - - if (m) - { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - sa = pix1 >> 24; - - if (sa == 0xff && m == 0xff) - { - *dst = pix1; - } - else - { - __m128i ms, md, ma, msa; - - pix2 = *dst; - ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); - ms = unpack_32_1x128 (pix1); - md = unpack_32_1x128 (pix2); - - msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); - - *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); - } - } - else - { - BILINEAR_SKIP_ONE_PIXEL (); - } - - w--; - dst++; - } -} - -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER, - scaled_bilinear_scanline_sse2_8888_8_8888_OVER, - uint32_t, uint8_t, uint32_t, - COVER, FLAG_HAVE_NON_SOLID_MASK) -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER, - scaled_bilinear_scanline_sse2_8888_8_8888_OVER, - uint32_t, uint8_t, uint32_t, - PAD, FLAG_HAVE_NON_SOLID_MASK) -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER, - scaled_bilinear_scanline_sse2_8888_8_8888_OVER, - uint32_t, uint8_t, uint32_t, - NONE, FLAG_HAVE_NON_SOLID_MASK) -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER, - scaled_bilinear_scanline_sse2_8888_8_8888_OVER, - uint32_t, uint8_t, uint32_t, - NORMAL, FLAG_HAVE_NON_SOLID_MASK) - -static force_inline void -scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst, - const uint32_t * mask, - const uint32_t * src_top, - const uint32_t * src_bottom, - int32_t w, - int wt, - int wb, - pixman_fixed_t vx_, - pixman_fixed_t unit_x_, - pixman_fixed_t max_vx, - pixman_bool_t zero_src) -{ - intptr_t vx = vx_; - intptr_t unit_x = unit_x_; - BILINEAR_DECLARE_VARIABLES; - uint32_t pix1; - __m128i xmm_mask; - - if (zero_src || (*mask >> 24) == 0) - return; - - xmm_mask = create_mask_16_128 (*mask >> 24); - - while (w && ((uintptr_t)dst & 15)) - { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - if (pix1) - { - uint32_t d = *dst; - - __m128i ms = unpack_32_1x128 (pix1); - __m128i alpha = expand_alpha_1x128 (ms); - __m128i dest = xmm_mask; - __m128i alpha_dst = unpack_32_1x128 (d); - - *dst = pack_1x128_32 - (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); - } - - dst++; - w--; - } - - while (w >= 4) - { - __m128i xmm_src; - BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); - - if (!is_zero (xmm_src)) - { - __m128i xmm_src_lo, xmm_src_hi; - __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - __m128i xmm_alpha_lo, xmm_alpha_hi; - - xmm_dst = load_128_aligned ((__m128i*)dst); - - unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, - &xmm_alpha_lo, &xmm_alpha_hi); - - in_over_2x128 (&xmm_src_lo, &xmm_src_hi, - &xmm_alpha_lo, &xmm_alpha_hi, - &xmm_mask, &xmm_mask, - &xmm_dst_lo, &xmm_dst_hi); - - save_128_aligned - ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - } - - dst += 4; - w -= 4; - } - - while (w) - { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - if (pix1) - { - uint32_t d = *dst; - - __m128i ms = unpack_32_1x128 (pix1); - __m128i alpha = expand_alpha_1x128 (ms); - __m128i dest = xmm_mask; - __m128i alpha_dst = unpack_32_1x128 (d); - - *dst = pack_1x128_32 - (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); - } - - dst++; - w--; - } -} - -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER, - scaled_bilinear_scanline_sse2_8888_n_8888_OVER, - uint32_t, uint32_t, uint32_t, - COVER, FLAG_HAVE_SOLID_MASK) -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER, - scaled_bilinear_scanline_sse2_8888_n_8888_OVER, - uint32_t, uint32_t, uint32_t, - PAD, FLAG_HAVE_SOLID_MASK) -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER, - scaled_bilinear_scanline_sse2_8888_n_8888_OVER, - uint32_t, uint32_t, uint32_t, - NONE, FLAG_HAVE_SOLID_MASK) -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER, - scaled_bilinear_scanline_sse2_8888_n_8888_OVER, - uint32_t, uint32_t, uint32_t, - NORMAL, FLAG_HAVE_SOLID_MASK) - -static const pixman_fast_path_t sse2_fast_paths[] = -{ - /* PIXMAN_OP_OVER */ - PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, sse2_composite_over_n_8_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, sse2_composite_over_n_8_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, sse2_composite_over_n_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, sse2_composite_over_n_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, sse2_composite_over_n_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, sse2_composite_over_n_0565), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, sse2_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, sse2_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, sse2_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, sse2_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, sse2_composite_over_8888_0565), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, sse2_composite_over_8888_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, sse2_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, sse2_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, sse2_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, sse2_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, sse2_composite_over_8888_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, sse2_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, sse2_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, sse2_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, sse2_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, sse2_composite_over_x888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, sse2_composite_over_x888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, sse2_composite_over_x888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, sse2_composite_over_x888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, sse2_composite_over_x888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, sse2_composite_over_x888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, sse2_composite_over_x888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, sse2_composite_over_x888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, sse2_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, sse2_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, sse2_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, sse2_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, sse2_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, sse2_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, sse2_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, sse2_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, sse2_composite_over_n_8888_0565_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, sse2_composite_over_n_8888_0565_ca), - PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, sse2_composite_over_pixbuf_8888), - PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, sse2_composite_over_pixbuf_8888), - PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, sse2_composite_over_pixbuf_8888), - PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, sse2_composite_over_pixbuf_8888), - PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, sse2_composite_over_pixbuf_0565), - PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, sse2_composite_over_pixbuf_0565), - PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area), - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area), - - /* PIXMAN_OP_OVER_REVERSE */ - PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, sse2_composite_over_reverse_n_8888), - PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, sse2_composite_over_reverse_n_8888), - - /* PIXMAN_OP_ADD */ - PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, sse2_composite_add_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8_8), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8), - PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8), - PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, sse2_composite_add_n_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, sse2_composite_add_n_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, sse2_composite_add_n_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, sse2_composite_add_n_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, sse2_composite_add_n_8_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, sse2_composite_add_n_8_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, sse2_composite_add_n_8_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, sse2_composite_add_n_8_8888), - - /* PIXMAN_OP_SRC */ - PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888), - PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, sse2_composite_src_n_8_8888), - PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, sse2_composite_src_n_8_8888), - PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, sse2_composite_src_n_8_8888), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, sse2_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, sse2_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, sse2_composite_copy_area), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, sse2_composite_copy_area), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area), - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, sse2_composite_copy_area), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, sse2_composite_copy_area), - - /* PIXMAN_OP_IN */ - PIXMAN_STD_FAST_PATH (IN, a8, null, a8, sse2_composite_in_8_8), - PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8), - PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8), - - SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), - - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888), - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888), - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888), - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888), - - SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, sse2_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, sse2_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, sse2_8888_8888), - - SIMPLE_BILINEAR_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888), - SIMPLE_BILINEAR_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888), - SIMPLE_BILINEAR_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888), - SIMPLE_BILINEAR_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888), - SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888), - SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888), - - SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), - - SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888), - SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888), - SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888), - SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888), - - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888), - SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888), - - { PIXMAN_OP_NONE }, -}; - -static uint32_t * -sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) -{ - int w = iter->width; - __m128i ff000000 = mask_ff000000; - uint32_t *dst = iter->buffer; - uint32_t *src = (uint32_t *)iter->bits; - - iter->bits += iter->stride; - - while (w && ((uintptr_t)dst) & 0x0f) - { - *dst++ = (*src++) | 0xff000000; - w--; - } - - while (w >= 4) - { - save_128_aligned ( - (__m128i *)dst, _mm_or_si128 ( - load_128_unaligned ((__m128i *)src), ff000000)); - - dst += 4; - src += 4; - w -= 4; - } - - while (w) - { - *dst++ = (*src++) | 0xff000000; - w--; - } - - return iter->buffer; -} - -static uint32_t * -sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) -{ - int w = iter->width; - uint32_t *dst = iter->buffer; - uint16_t *src = (uint16_t *)iter->bits; - __m128i ff000000 = mask_ff000000; - - iter->bits += iter->stride; - - while (w && ((uintptr_t)dst) & 0x0f) - { - uint16_t s = *src++; - - *dst++ = convert_0565_to_8888 (s); - w--; - } - - while (w >= 8) - { - __m128i lo, hi, s; - - s = _mm_loadu_si128 ((__m128i *)src); - - lo = unpack_565_to_8888 (_mm_unpacklo_epi16 (s, _mm_setzero_si128 ())); - hi = unpack_565_to_8888 (_mm_unpackhi_epi16 (s, _mm_setzero_si128 ())); - - save_128_aligned ((__m128i *)(dst + 0), _mm_or_si128 (lo, ff000000)); - save_128_aligned ((__m128i *)(dst + 4), _mm_or_si128 (hi, ff000000)); - - dst += 8; - src += 8; - w -= 8; - } - - while (w) - { - uint16_t s = *src++; - - *dst++ = convert_0565_to_8888 (s); - w--; - } - - return iter->buffer; -} - -static uint32_t * -sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) -{ - int w = iter->width; - uint32_t *dst = iter->buffer; - uint8_t *src = iter->bits; - __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6; - - iter->bits += iter->stride; - - while (w && (((uintptr_t)dst) & 15)) - { - *dst++ = (uint32_t)(*(src++)) << 24; - w--; - } - - while (w >= 16) - { - xmm0 = _mm_loadu_si128((__m128i *)src); - - xmm1 = _mm_unpacklo_epi8 (_mm_setzero_si128(), xmm0); - xmm2 = _mm_unpackhi_epi8 (_mm_setzero_si128(), xmm0); - xmm3 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm1); - xmm4 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm1); - xmm5 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm2); - xmm6 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm2); - - _mm_store_si128(((__m128i *)(dst + 0)), xmm3); - _mm_store_si128(((__m128i *)(dst + 4)), xmm4); - _mm_store_si128(((__m128i *)(dst + 8)), xmm5); - _mm_store_si128(((__m128i *)(dst + 12)), xmm6); - - dst += 16; - src += 16; - w -= 16; - } - - while (w) - { - *dst++ = (uint32_t)(*(src++)) << 24; - w--; - } - - return iter->buffer; -} - -#define IMAGE_FLAGS \ - (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ - FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) - -static const pixman_iter_info_t sse2_iters[] = -{ - { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, - _pixman_iter_init_bits_stride, sse2_fetch_x8r8g8b8, NULL - }, - { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW, - _pixman_iter_init_bits_stride, sse2_fetch_r5g6b5, NULL - }, - { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, - _pixman_iter_init_bits_stride, sse2_fetch_a8, NULL - }, - { PIXMAN_null }, -}; - -#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) -__attribute__((__force_align_arg_pointer__)) -#endif -pixman_implementation_t * -_pixman_implementation_create_sse2 (pixman_implementation_t *fallback) -{ - pixman_implementation_t *imp = _pixman_implementation_create (fallback, sse2_fast_paths); - - /* SSE2 constants */ - mask_565_r = create_mask_2x32_128 (0x00f80000, 0x00f80000); - mask_565_g1 = create_mask_2x32_128 (0x00070000, 0x00070000); - mask_565_g2 = create_mask_2x32_128 (0x000000e0, 0x000000e0); - mask_565_b = create_mask_2x32_128 (0x0000001f, 0x0000001f); - mask_red = create_mask_2x32_128 (0x00f80000, 0x00f80000); - mask_green = create_mask_2x32_128 (0x0000fc00, 0x0000fc00); - mask_blue = create_mask_2x32_128 (0x000000f8, 0x000000f8); - mask_565_fix_rb = create_mask_2x32_128 (0x00e000e0, 0x00e000e0); - mask_565_fix_g = create_mask_2x32_128 (0x0000c000, 0x0000c000); - mask_0080 = create_mask_16_128 (0x0080); - mask_00ff = create_mask_16_128 (0x00ff); - mask_0101 = create_mask_16_128 (0x0101); - mask_ffff = create_mask_16_128 (0xffff); - mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000); - mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000); - mask_565_rb = create_mask_2x32_128 (0x00f800f8, 0x00f800f8); - mask_565_pack_multiplier = create_mask_2x32_128 (0x20000004, 0x20000004); - - /* Set up function pointers */ - imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u; - imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u; - imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u; - imp->combine_32[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_u; - imp->combine_32[PIXMAN_OP_OUT] = sse2_combine_out_u; - imp->combine_32[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_u; - imp->combine_32[PIXMAN_OP_ATOP] = sse2_combine_atop_u; - imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_u; - imp->combine_32[PIXMAN_OP_XOR] = sse2_combine_xor_u; - imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u; - - imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u; - - imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca; - imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca; - imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_IN] = sse2_combine_in_ca; - imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_OUT] = sse2_combine_out_ca; - imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_ATOP] = sse2_combine_atop_ca; - imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_XOR] = sse2_combine_xor_ca; - imp->combine_32_ca[PIXMAN_OP_ADD] = sse2_combine_add_ca; - - imp->blt = sse2_blt; - imp->fill = sse2_fill; - - imp->iter_info = sse2_iters; - - return imp; -} diff --git a/vendor/pixman/pixman/pixman-ssse3.c b/vendor/pixman/pixman/pixman-ssse3.c deleted file mode 100644 index 0359895af..000000000 --- a/vendor/pixman/pixman/pixman-ssse3.c +++ /dev/null @@ -1,351 +0,0 @@ -/* - * Copyright © 2013 Soren Sandmann Pedersen - * Copyright © 2013 Red Hat, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Soren Sandmann (soren.sandmann@gmail.com) - */ -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include -#include -#include -#include -#include "pixman-private.h" -#include "pixman-inlines.h" - -typedef struct -{ - int y; - uint64_t * buffer; -} line_t; - -typedef struct -{ - line_t lines[2]; - pixman_fixed_t y; - pixman_fixed_t x; - uint64_t data[1]; -} bilinear_info_t; - -static void -ssse3_fetch_horizontal (bits_image_t *image, line_t *line, - int y, pixman_fixed_t x, pixman_fixed_t ux, int n) -{ - uint32_t *bits = image->bits + y * image->rowstride; - __m128i vx = _mm_set_epi16 ( - - (x + 1), x, - (x + 1), x, - - (x + ux + 1), x + ux, - (x + ux + 1), x + ux); - __m128i vux = _mm_set_epi16 ( - - 2 * ux, 2 * ux, - 2 * ux, 2 * ux, - - 2 * ux, 2 * ux, - 2 * ux, 2 * ux); - __m128i vaddc = _mm_set_epi16 (1, 0, 1, 0, 1, 0, 1, 0); - __m128i *b = (__m128i *)line->buffer; - __m128i vrl0, vrl1; - - while ((n -= 2) >= 0) - { - __m128i vw, vr, s; - - vrl1 = _mm_loadl_epi64 ( - (__m128i *)(bits + pixman_fixed_to_int (x + ux))); - /* vrl1: R1, L1 */ - - final_pixel: - vrl0 = _mm_loadl_epi64 ( - (__m128i *)(bits + pixman_fixed_to_int (x))); - /* vrl0: R0, L0 */ - - /* The weights are based on vx which is a vector of - * - * - (x + 1), x, - (x + 1), x, - * - (x + ux + 1), x + ux, - (x + ux + 1), x + ux - * - * so the 16 bit weights end up like this: - * - * iw0, w0, iw0, w0, iw1, w1, iw1, w1 - * - * and after shifting and packing, we get these bytes: - * - * iw0, w0, iw0, w0, iw1, w1, iw1, w1, - * iw0, w0, iw0, w0, iw1, w1, iw1, w1, - * - * which means the first and the second input pixel - * have to be interleaved like this: - * - * la0, ra0, lr0, rr0, la1, ra1, lr1, rr1, - * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1 - * - * before maddubsw can be used. - */ - - vw = _mm_add_epi16 ( - vaddc, _mm_srli_epi16 (vx, 16 - BILINEAR_INTERPOLATION_BITS)); - /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1 - */ - - vw = _mm_packus_epi16 (vw, vw); - /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1, - * iw0, w0, iw0, w0, iw1, w1, iw1, w1 - */ - vx = _mm_add_epi16 (vx, vux); - - x += 2 * ux; - - vr = _mm_unpacklo_epi16 (vrl1, vrl0); - /* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */ - - s = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (1, 0, 3, 2)); - /* s: lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */ - - vr = _mm_unpackhi_epi8 (vr, s); - /* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1, - * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1 - */ - - vr = _mm_maddubs_epi16 (vr, vw); - - /* When the weight is 0, the inverse weight is - * 128 which can't be represented in a signed byte. - * As a result maddubsw computes the following: - * - * r = l * -128 + r * 0 - * - * rather than the desired - * - * r = l * 128 + r * 0 - * - * We fix this by taking the absolute value of the - * result. - */ - vr = _mm_abs_epi16 (vr); - - /* vr: A0, R0, A1, R1, G0, B0, G1, B1 */ - _mm_store_si128 (b++, vr); - } - - if (n == -1) - { - vrl1 = _mm_setzero_si128(); - goto final_pixel; - } - - line->y = y; -} - -static uint32_t * -ssse3_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask) -{ - pixman_fixed_t fx, ux; - bilinear_info_t *info = iter->data; - line_t *line0, *line1; - int y0, y1; - int32_t dist_y; - __m128i vw; - int i; - - fx = info->x; - ux = iter->image->common.transform->matrix[0][0]; - - y0 = pixman_fixed_to_int (info->y); - y1 = y0 + 1; - - line0 = &info->lines[y0 & 0x01]; - line1 = &info->lines[y1 & 0x01]; - - if (line0->y != y0) - { - ssse3_fetch_horizontal ( - &iter->image->bits, line0, y0, fx, ux, iter->width); - } - - if (line1->y != y1) - { - ssse3_fetch_horizontal ( - &iter->image->bits, line1, y1, fx, ux, iter->width); - } - - dist_y = pixman_fixed_to_bilinear_weight (info->y); - dist_y <<= (16 - BILINEAR_INTERPOLATION_BITS); - - vw = _mm_set_epi16 ( - dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y); - - for (i = 0; i + 3 < iter->width; i += 4) - { - __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i)); - __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i)); - __m128i top1 = _mm_load_si128 ((__m128i *)(line0->buffer + i + 2)); - __m128i bot1 = _mm_load_si128 ((__m128i *)(line1->buffer + i + 2)); - __m128i r0, r1, tmp, p; - - r0 = _mm_mulhi_epu16 ( - _mm_sub_epi16 (bot0, top0), vw); - tmp = _mm_cmplt_epi16 (bot0, top0); - tmp = _mm_and_si128 (tmp, vw); - r0 = _mm_sub_epi16 (r0, tmp); - r0 = _mm_add_epi16 (r0, top0); - r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS); - /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */ - r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1)); - /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */ - - r1 = _mm_mulhi_epu16 ( - _mm_sub_epi16 (bot1, top1), vw); - tmp = _mm_cmplt_epi16 (bot1, top1); - tmp = _mm_and_si128 (tmp, vw); - r1 = _mm_sub_epi16 (r1, tmp); - r1 = _mm_add_epi16 (r1, top1); - r1 = _mm_srli_epi16 (r1, BILINEAR_INTERPOLATION_BITS); - r1 = _mm_shuffle_epi32 (r1, _MM_SHUFFLE (2, 0, 3, 1)); - /* r1: A3 R3 G3 B3 A2 R2 G2 B2 */ - - p = _mm_packus_epi16 (r0, r1); - - _mm_storeu_si128 ((__m128i *)(iter->buffer + i), p); - } - - while (i < iter->width) - { - __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i)); - __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i)); - __m128i r0, tmp, p; - - r0 = _mm_mulhi_epu16 ( - _mm_sub_epi16 (bot0, top0), vw); - tmp = _mm_cmplt_epi16 (bot0, top0); - tmp = _mm_and_si128 (tmp, vw); - r0 = _mm_sub_epi16 (r0, tmp); - r0 = _mm_add_epi16 (r0, top0); - r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS); - /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */ - r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1)); - /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */ - - p = _mm_packus_epi16 (r0, r0); - - if (iter->width - i == 1) - { - *(uint32_t *)(iter->buffer + i) = _mm_cvtsi128_si32 (p); - i++; - } - else - { - _mm_storel_epi64 ((__m128i *)(iter->buffer + i), p); - i += 2; - } - } - - info->y += iter->image->common.transform->matrix[1][1]; - - return iter->buffer; -} - -static void -ssse3_bilinear_cover_iter_fini (pixman_iter_t *iter) -{ - free (iter->data); -} - -static void -ssse3_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info) -{ - int width = iter->width; - bilinear_info_t *info; - pixman_vector_t v; - - /* Reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (iter->image->common.transform, &v)) - goto fail; - - info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t) + 64); - if (!info) - goto fail; - - info->x = v.vector[0] - pixman_fixed_1 / 2; - info->y = v.vector[1] - pixman_fixed_1 / 2; - -#define ALIGN(addr) \ - ((void *)((((uintptr_t)(addr)) + 15) & (~15))) - - /* It is safe to set the y coordinates to -1 initially - * because COVER_CLIP_BILINEAR ensures that we will only - * be asked to fetch lines in the [0, height) interval - */ - info->lines[0].y = -1; - info->lines[0].buffer = ALIGN (&(info->data[0])); - info->lines[1].y = -1; - info->lines[1].buffer = ALIGN (info->lines[0].buffer + width); - - iter->get_scanline = ssse3_fetch_bilinear_cover; - iter->fini = ssse3_bilinear_cover_iter_fini; - - iter->data = info; - return; - -fail: - /* Something went wrong, either a bad matrix or OOM; in such cases, - * we don't guarantee any particular rendering. - */ - _pixman_log_error ( - FUNC, "Allocation failure or bad matrix, skipping rendering\n"); - - iter->get_scanline = _pixman_iter_get_scanline_noop; - iter->fini = NULL; -} - -static const pixman_iter_info_t ssse3_iters[] = -{ - { PIXMAN_a8r8g8b8, - (FAST_PATH_STANDARD_FLAGS | - FAST_PATH_SCALE_TRANSFORM | - FAST_PATH_BILINEAR_FILTER | - FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR), - ITER_NARROW | ITER_SRC, - ssse3_bilinear_cover_iter_init, - NULL, NULL - }, - - { PIXMAN_null }, -}; - -static const pixman_fast_path_t ssse3_fast_paths[] = -{ - { PIXMAN_OP_NONE }, -}; - -pixman_implementation_t * -_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback) -{ - pixman_implementation_t *imp = - _pixman_implementation_create (fallback, ssse3_fast_paths); - - imp->iter_info = ssse3_iters; - - return imp; -} diff --git a/vendor/pixman/pixman/pixman-timer.c b/vendor/pixman/pixman/pixman-timer.c deleted file mode 100644 index 656d90017..000000000 --- a/vendor/pixman/pixman/pixman-timer.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Red Hat not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. Red Hat makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * RED HAT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL RED HAT - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include -#include "pixman-private.h" - -#ifdef PIXMAN_TIMERS - -static pixman_timer_t *timers; - -static void -dump_timers (void) -{ - pixman_timer_t *timer; - - for (timer = timers; timer != NULL; timer = timer->next) - { - printf ("%s: total: %llu n: %llu avg: %f\n", - timer->name, - timer->total, - timer->n_times, - timer->total / (double)timer->n_times); - } -} - -void -pixman_timer_register (pixman_timer_t *timer) -{ - static int initialized; - - int atexit (void (*function)(void)); - - if (!initialized) - { - atexit (dump_timers); - initialized = 1; - } - - timer->next = timers; - timers = timer; -} - -#endif diff --git a/vendor/pixman/pixman/pixman-trap.c b/vendor/pixman/pixman/pixman-trap.c deleted file mode 100644 index 0ec73dc65..000000000 --- a/vendor/pixman/pixman/pixman-trap.c +++ /dev/null @@ -1,711 +0,0 @@ -/* - * Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc. - * Copyright © 2004 Keith Packard - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include -#include "pixman-private.h" - -/* - * Compute the smallest value greater than or equal to y which is on a - * grid row. - */ - -PIXMAN_EXPORT pixman_fixed_t -pixman_sample_ceil_y (pixman_fixed_t y, int n) -{ - pixman_fixed_t f = pixman_fixed_frac (y); - pixman_fixed_t i = pixman_fixed_floor (y); - - f = DIV (f - Y_FRAC_FIRST (n) + (STEP_Y_SMALL (n) - pixman_fixed_e), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) + - Y_FRAC_FIRST (n); - - if (f > Y_FRAC_LAST (n)) - { - if (pixman_fixed_to_int (i) == 0x7fff) - { - f = 0xffff; /* saturate */ - } - else - { - f = Y_FRAC_FIRST (n); - i += pixman_fixed_1; - } - } - return (i | f); -} - -/* - * Compute the largest value strictly less than y which is on a - * grid row. - */ -PIXMAN_EXPORT pixman_fixed_t -pixman_sample_floor_y (pixman_fixed_t y, - int n) -{ - pixman_fixed_t f = pixman_fixed_frac (y); - pixman_fixed_t i = pixman_fixed_floor (y); - - f = DIV (f - pixman_fixed_e - Y_FRAC_FIRST (n), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) + - Y_FRAC_FIRST (n); - - if (f < Y_FRAC_FIRST (n)) - { - if (pixman_fixed_to_int (i) == 0xffff8000) - { - f = 0; /* saturate */ - } - else - { - f = Y_FRAC_LAST (n); - i -= pixman_fixed_1; - } - } - return (i | f); -} - -/* - * Step an edge by any amount (including negative values) - */ -PIXMAN_EXPORT void -pixman_edge_step (pixman_edge_t *e, - int n) -{ - pixman_fixed_48_16_t ne; - - e->x += n * e->stepx; - - ne = e->e + n * (pixman_fixed_48_16_t) e->dx; - - if (n >= 0) - { - if (ne > 0) - { - int nx = (ne + e->dy - 1) / e->dy; - e->e = ne - nx * (pixman_fixed_48_16_t) e->dy; - e->x += nx * e->signdx; - } - } - else - { - if (ne <= -e->dy) - { - int nx = (-ne) / e->dy; - e->e = ne + nx * (pixman_fixed_48_16_t) e->dy; - e->x -= nx * e->signdx; - } - } -} - -/* - * A private routine to initialize the multi-step - * elements of an edge structure - */ -static void -_pixman_edge_multi_init (pixman_edge_t * e, - int n, - pixman_fixed_t *stepx_p, - pixman_fixed_t *dx_p) -{ - pixman_fixed_t stepx; - pixman_fixed_48_16_t ne; - - ne = n * (pixman_fixed_48_16_t) e->dx; - stepx = n * e->stepx; - - if (ne > 0) - { - int nx = ne / e->dy; - ne -= nx * (pixman_fixed_48_16_t)e->dy; - stepx += nx * e->signdx; - } - - *dx_p = ne; - *stepx_p = stepx; -} - -/* - * Initialize one edge structure given the line endpoints and a - * starting y value - */ -PIXMAN_EXPORT void -pixman_edge_init (pixman_edge_t *e, - int n, - pixman_fixed_t y_start, - pixman_fixed_t x_top, - pixman_fixed_t y_top, - pixman_fixed_t x_bot, - pixman_fixed_t y_bot) -{ - pixman_fixed_t dx, dy; - - e->x = x_top; - e->e = 0; - dx = x_bot - x_top; - dy = y_bot - y_top; - e->dy = dy; - e->dx = 0; - - if (dy) - { - if (dx >= 0) - { - e->signdx = 1; - e->stepx = dx / dy; - e->dx = dx % dy; - e->e = -dy; - } - else - { - e->signdx = -1; - e->stepx = -(-dx / dy); - e->dx = -dx % dy; - e->e = 0; - } - - _pixman_edge_multi_init (e, STEP_Y_SMALL (n), - &e->stepx_small, &e->dx_small); - - _pixman_edge_multi_init (e, STEP_Y_BIG (n), - &e->stepx_big, &e->dx_big); - } - pixman_edge_step (e, y_start - y_top); -} - -/* - * Initialize one edge structure given a line, starting y value - * and a pixel offset for the line - */ -PIXMAN_EXPORT void -pixman_line_fixed_edge_init (pixman_edge_t * e, - int n, - pixman_fixed_t y, - const pixman_line_fixed_t *line, - int x_off, - int y_off) -{ - pixman_fixed_t x_off_fixed = pixman_int_to_fixed (x_off); - pixman_fixed_t y_off_fixed = pixman_int_to_fixed (y_off); - const pixman_point_fixed_t *top, *bot; - - if (line->p1.y <= line->p2.y) - { - top = &line->p1; - bot = &line->p2; - } - else - { - top = &line->p2; - bot = &line->p1; - } - - pixman_edge_init (e, n, y, - top->x + x_off_fixed, - top->y + y_off_fixed, - bot->x + x_off_fixed, - bot->y + y_off_fixed); -} - -PIXMAN_EXPORT void -pixman_add_traps (pixman_image_t * image, - int16_t x_off, - int16_t y_off, - int ntrap, - const pixman_trap_t *traps) -{ - int bpp; - int height; - - pixman_fixed_t x_off_fixed; - pixman_fixed_t y_off_fixed; - pixman_edge_t l, r; - pixman_fixed_t t, b; - - _pixman_image_validate (image); - - height = image->bits.height; - bpp = PIXMAN_FORMAT_BPP (image->bits.format); - - x_off_fixed = pixman_int_to_fixed (x_off); - y_off_fixed = pixman_int_to_fixed (y_off); - - while (ntrap--) - { - t = traps->top.y + y_off_fixed; - if (t < 0) - t = 0; - t = pixman_sample_ceil_y (t, bpp); - - b = traps->bot.y + y_off_fixed; - if (pixman_fixed_to_int (b) >= height) - b = pixman_int_to_fixed (height) - 1; - b = pixman_sample_floor_y (b, bpp); - - if (b >= t) - { - /* initialize edge walkers */ - pixman_edge_init (&l, bpp, t, - traps->top.l + x_off_fixed, - traps->top.y + y_off_fixed, - traps->bot.l + x_off_fixed, - traps->bot.y + y_off_fixed); - - pixman_edge_init (&r, bpp, t, - traps->top.r + x_off_fixed, - traps->top.y + y_off_fixed, - traps->bot.r + x_off_fixed, - traps->bot.y + y_off_fixed); - - pixman_rasterize_edges (image, &l, &r, t, b); - } - - traps++; - } -} - -#if 0 -static void -dump_image (pixman_image_t *image, - const char * title) -{ - int i, j; - - if (!image->type == BITS) - printf ("%s is not a regular image\n", title); - - if (!image->bits.format == PIXMAN_a8) - printf ("%s is not an alpha mask\n", title); - - printf ("\n\n\n%s: \n", title); - - for (i = 0; i < image->bits.height; ++i) - { - uint8_t *line = - (uint8_t *)&(image->bits.bits[i * image->bits.rowstride]); - - for (j = 0; j < image->bits.width; ++j) - printf ("%c", line[j] ? '#' : ' '); - - printf ("\n"); - } -} -#endif - -PIXMAN_EXPORT void -pixman_add_trapezoids (pixman_image_t * image, - int16_t x_off, - int y_off, - int ntraps, - const pixman_trapezoid_t *traps) -{ - int i; - -#if 0 - dump_image (image, "before"); -#endif - - for (i = 0; i < ntraps; ++i) - { - const pixman_trapezoid_t *trap = &(traps[i]); - - if (!pixman_trapezoid_valid (trap)) - continue; - - pixman_rasterize_trapezoid (image, trap, x_off, y_off); - } - -#if 0 - dump_image (image, "after"); -#endif -} - -PIXMAN_EXPORT void -pixman_rasterize_trapezoid (pixman_image_t * image, - const pixman_trapezoid_t *trap, - int x_off, - int y_off) -{ - int bpp; - int height; - - pixman_fixed_t y_off_fixed; - pixman_edge_t l, r; - pixman_fixed_t t, b; - - return_if_fail (image->type == BITS); - - _pixman_image_validate (image); - - if (!pixman_trapezoid_valid (trap)) - return; - - height = image->bits.height; - bpp = PIXMAN_FORMAT_BPP (image->bits.format); - - y_off_fixed = pixman_int_to_fixed (y_off); - - t = trap->top + y_off_fixed; - if (t < 0) - t = 0; - t = pixman_sample_ceil_y (t, bpp); - - b = trap->bottom + y_off_fixed; - if (pixman_fixed_to_int (b) >= height) - b = pixman_int_to_fixed (height) - 1; - b = pixman_sample_floor_y (b, bpp); - - if (b >= t) - { - /* initialize edge walkers */ - pixman_line_fixed_edge_init (&l, bpp, t, &trap->left, x_off, y_off); - pixman_line_fixed_edge_init (&r, bpp, t, &trap->right, x_off, y_off); - - pixman_rasterize_edges (image, &l, &r, t, b); - } -} - -static const pixman_bool_t zero_src_has_no_effect[PIXMAN_N_OPERATORS] = -{ - FALSE, /* Clear 0 0 */ - FALSE, /* Src 1 0 */ - TRUE, /* Dst 0 1 */ - TRUE, /* Over 1 1-Aa */ - TRUE, /* OverReverse 1-Ab 1 */ - FALSE, /* In Ab 0 */ - FALSE, /* InReverse 0 Aa */ - FALSE, /* Out 1-Ab 0 */ - TRUE, /* OutReverse 0 1-Aa */ - TRUE, /* Atop Ab 1-Aa */ - FALSE, /* AtopReverse 1-Ab Aa */ - TRUE, /* Xor 1-Ab 1-Aa */ - TRUE, /* Add 1 1 */ -}; - -static pixman_bool_t -get_trap_extents (pixman_op_t op, pixman_image_t *dest, - const pixman_trapezoid_t *traps, int n_traps, - pixman_box32_t *box) -{ - int i; - - /* When the operator is such that a zero source has an - * effect on the underlying image, we have to - * composite across the entire destination - */ - if (!zero_src_has_no_effect [op]) - { - box->x1 = 0; - box->y1 = 0; - box->x2 = dest->bits.width; - box->y2 = dest->bits.height; - return TRUE; - } - - box->x1 = INT32_MAX; - box->y1 = INT32_MAX; - box->x2 = INT32_MIN; - box->y2 = INT32_MIN; - - for (i = 0; i < n_traps; ++i) - { - const pixman_trapezoid_t *trap = &(traps[i]); - int y1, y2; - - if (!pixman_trapezoid_valid (trap)) - continue; - - y1 = pixman_fixed_to_int (trap->top); - if (y1 < box->y1) - box->y1 = y1; - - y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap->bottom)); - if (y2 > box->y2) - box->y2 = y2; - -#define EXTEND_MIN(x) \ - if (pixman_fixed_to_int ((x)) < box->x1) \ - box->x1 = pixman_fixed_to_int ((x)); -#define EXTEND_MAX(x) \ - if (pixman_fixed_to_int (pixman_fixed_ceil ((x))) > box->x2) \ - box->x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x))); - -#define EXTEND(x) \ - EXTEND_MIN(x); \ - EXTEND_MAX(x); - - EXTEND(trap->left.p1.x); - EXTEND(trap->left.p2.x); - EXTEND(trap->right.p1.x); - EXTEND(trap->right.p2.x); - } - - if (box->x1 >= box->x2 || box->y1 >= box->y2) - return FALSE; - - return TRUE; -} - -/* - * pixman_composite_trapezoids() - * - * All the trapezoids are conceptually rendered to an infinitely big image. - * The (0, 0) coordinates of this image are then aligned with the (x, y) - * coordinates of the source image, and then both images are aligned with - * the (x, y) coordinates of the destination. Then these three images are - * composited across the entire destination. - */ -PIXMAN_EXPORT void -pixman_composite_trapezoids (pixman_op_t op, - pixman_image_t * src, - pixman_image_t * dst, - pixman_format_code_t mask_format, - int x_src, - int y_src, - int x_dst, - int y_dst, - int n_traps, - const pixman_trapezoid_t * traps) -{ - int i; - - return_if_fail (PIXMAN_FORMAT_TYPE (mask_format) == PIXMAN_TYPE_A); - - if (n_traps <= 0) - return; - - _pixman_image_validate (src); - _pixman_image_validate (dst); - - if (op == PIXMAN_OP_ADD && - (src->common.flags & FAST_PATH_IS_OPAQUE) && - (mask_format == dst->common.extended_format_code) && - !(dst->common.have_clip_region)) - { - for (i = 0; i < n_traps; ++i) - { - const pixman_trapezoid_t *trap = &(traps[i]); - - if (!pixman_trapezoid_valid (trap)) - continue; - - pixman_rasterize_trapezoid (dst, trap, x_dst, y_dst); - } - } - else - { - pixman_image_t *tmp; - pixman_box32_t box; - int i; - - if (!get_trap_extents (op, dst, traps, n_traps, &box)) - return; - - if (!(tmp = pixman_image_create_bits ( - mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1))) - return; - - for (i = 0; i < n_traps; ++i) - { - const pixman_trapezoid_t *trap = &(traps[i]); - - if (!pixman_trapezoid_valid (trap)) - continue; - - pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1); - } - - pixman_image_composite (op, src, tmp, dst, - x_src + box.x1, y_src + box.y1, - 0, 0, - x_dst + box.x1, y_dst + box.y1, - box.x2 - box.x1, box.y2 - box.y1); - - pixman_image_unref (tmp); - } -} - -static int -greater_y (const pixman_point_fixed_t *a, const pixman_point_fixed_t *b) -{ - if (a->y == b->y) - return a->x > b->x; - return a->y > b->y; -} - -/* - * Note that the definition of this function is a bit odd because - * of the X coordinate space (y increasing downwards). - */ -static int -clockwise (const pixman_point_fixed_t *ref, - const pixman_point_fixed_t *a, - const pixman_point_fixed_t *b) -{ - pixman_point_fixed_t ad, bd; - - ad.x = a->x - ref->x; - ad.y = a->y - ref->y; - bd.x = b->x - ref->x; - bd.y = b->y - ref->y; - - return ((pixman_fixed_32_32_t) bd.y * ad.x - - (pixman_fixed_32_32_t) ad.y * bd.x) < 0; -} - -static void -triangle_to_trapezoids (const pixman_triangle_t *tri, pixman_trapezoid_t *traps) -{ - const pixman_point_fixed_t *top, *left, *right, *tmp; - - top = &tri->p1; - left = &tri->p2; - right = &tri->p3; - - if (greater_y (top, left)) - { - tmp = left; - left = top; - top = tmp; - } - - if (greater_y (top, right)) - { - tmp = right; - right = top; - top = tmp; - } - - if (clockwise (top, right, left)) - { - tmp = right; - right = left; - left = tmp; - } - - /* - * Two cases: - * - * + + - * / \ / \ - * / \ / \ - * / + + \ - * / -- -- \ - * / -- -- \ - * / --- --- \ - * +-- --+ - */ - - traps->top = top->y; - traps->left.p1 = *top; - traps->left.p2 = *left; - traps->right.p1 = *top; - traps->right.p2 = *right; - - if (right->y < left->y) - traps->bottom = right->y; - else - traps->bottom = left->y; - - traps++; - - *traps = *(traps - 1); - - if (right->y < left->y) - { - traps->top = right->y; - traps->bottom = left->y; - traps->right.p1 = *right; - traps->right.p2 = *left; - } - else - { - traps->top = left->y; - traps->bottom = right->y; - traps->left.p1 = *left; - traps->left.p2 = *right; - } -} - -static pixman_trapezoid_t * -convert_triangles (int n_tris, const pixman_triangle_t *tris) -{ - pixman_trapezoid_t *traps; - int i; - - if (n_tris <= 0) - return NULL; - - traps = pixman_malloc_ab (n_tris, 2 * sizeof (pixman_trapezoid_t)); - if (!traps) - return NULL; - - for (i = 0; i < n_tris; ++i) - triangle_to_trapezoids (&(tris[i]), traps + 2 * i); - - return traps; -} - -PIXMAN_EXPORT void -pixman_composite_triangles (pixman_op_t op, - pixman_image_t * src, - pixman_image_t * dst, - pixman_format_code_t mask_format, - int x_src, - int y_src, - int x_dst, - int y_dst, - int n_tris, - const pixman_triangle_t * tris) -{ - pixman_trapezoid_t *traps; - - if ((traps = convert_triangles (n_tris, tris))) - { - pixman_composite_trapezoids (op, src, dst, mask_format, - x_src, y_src, x_dst, y_dst, - n_tris * 2, traps); - - free (traps); - } -} - -PIXMAN_EXPORT void -pixman_add_triangles (pixman_image_t *image, - int32_t x_off, - int32_t y_off, - int n_tris, - const pixman_triangle_t *tris) -{ - pixman_trapezoid_t *traps; - - if ((traps = convert_triangles (n_tris, tris))) - { - pixman_add_trapezoids (image, x_off, y_off, - n_tris * 2, traps); - - free (traps); - } -} diff --git a/vendor/pixman/pixman/pixman-utils.c b/vendor/pixman/pixman/pixman-utils.c deleted file mode 100644 index 8c57b0bfa..000000000 --- a/vendor/pixman/pixman/pixman-utils.c +++ /dev/null @@ -1,330 +0,0 @@ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 1999 Keith Packard - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Author: Keith Packard, SuSE, Inc. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif -#include -#include - -#include "pixman-private.h" - -pixman_bool_t -_pixman_multiply_overflows_size (size_t a, size_t b) -{ - return a >= SIZE_MAX / b; -} - -pixman_bool_t -_pixman_multiply_overflows_int (unsigned int a, unsigned int b) -{ - return a >= INT32_MAX / b; -} - -pixman_bool_t -_pixman_addition_overflows_int (unsigned int a, unsigned int b) -{ - return a > INT32_MAX - b; -} - -void * -pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c) -{ - if (!b || a >= INT32_MAX / b || (a * b) > INT32_MAX - c) - return NULL; - - return malloc (a * b + c); -} - -void * -pixman_malloc_ab (unsigned int a, - unsigned int b) -{ - if (a >= INT32_MAX / b) - return NULL; - - return malloc (a * b); -} - -void * -pixman_malloc_abc (unsigned int a, - unsigned int b, - unsigned int c) -{ - if (a >= INT32_MAX / b) - return NULL; - else if (a * b >= INT32_MAX / c) - return NULL; - else - return malloc (a * b * c); -} - -static force_inline uint16_t -float_to_unorm (float f, int n_bits) -{ - uint32_t u; - - if (f > 1.0) - f = 1.0; - if (f < 0.0) - f = 0.0; - - u = f * (1 << n_bits); - u -= (u >> n_bits); - - return u; -} - -static force_inline float -unorm_to_float (uint16_t u, int n_bits) -{ - uint32_t m = ((1 << n_bits) - 1); - - return (u & m) * (1.f / (float)m); -} - -/* - * This function expands images from a8r8g8b8 to argb_t. To preserve - * precision, it needs to know from which source format the a8r8g8b8 pixels - * originally came. - * - * For example, if the source was PIXMAN_x1r5g5b5 and the red component - * contained bits 12345, then the 8-bit value is 12345123. To correctly - * expand this to floating point, it should be 12345 / 31.0 and not - * 12345123 / 255.0. - */ -void -pixman_expand_to_float (argb_t *dst, - const uint32_t *src, - pixman_format_code_t format, - int width) -{ - static const float multipliers[16] = { - 0.0f, - 1.0f / ((1 << 1) - 1), - 1.0f / ((1 << 2) - 1), - 1.0f / ((1 << 3) - 1), - 1.0f / ((1 << 4) - 1), - 1.0f / ((1 << 5) - 1), - 1.0f / ((1 << 6) - 1), - 1.0f / ((1 << 7) - 1), - 1.0f / ((1 << 8) - 1), - 1.0f / ((1 << 9) - 1), - 1.0f / ((1 << 10) - 1), - 1.0f / ((1 << 11) - 1), - 1.0f / ((1 << 12) - 1), - 1.0f / ((1 << 13) - 1), - 1.0f / ((1 << 14) - 1), - 1.0f / ((1 << 15) - 1), - }; - int a_size, r_size, g_size, b_size; - int a_shift, r_shift, g_shift, b_shift; - float a_mul, r_mul, g_mul, b_mul; - uint32_t a_mask, r_mask, g_mask, b_mask; - int i; - - if (!PIXMAN_FORMAT_VIS (format)) - format = PIXMAN_a8r8g8b8; - - /* - * Determine the sizes of each component and the masks and shifts - * required to extract them from the source pixel. - */ - a_size = PIXMAN_FORMAT_A (format); - r_size = PIXMAN_FORMAT_R (format); - g_size = PIXMAN_FORMAT_G (format); - b_size = PIXMAN_FORMAT_B (format); - - a_shift = 32 - a_size; - r_shift = 24 - r_size; - g_shift = 16 - g_size; - b_shift = 8 - b_size; - - a_mask = ((1 << a_size) - 1); - r_mask = ((1 << r_size) - 1); - g_mask = ((1 << g_size) - 1); - b_mask = ((1 << b_size) - 1); - - a_mul = multipliers[a_size]; - r_mul = multipliers[r_size]; - g_mul = multipliers[g_size]; - b_mul = multipliers[b_size]; - - /* Start at the end so that we can do the expansion in place - * when src == dst - */ - for (i = width - 1; i >= 0; i--) - { - const uint32_t pixel = src[i]; - - dst[i].a = a_mask? ((pixel >> a_shift) & a_mask) * a_mul : 1.0f; - dst[i].r = ((pixel >> r_shift) & r_mask) * r_mul; - dst[i].g = ((pixel >> g_shift) & g_mask) * g_mul; - dst[i].b = ((pixel >> b_shift) & b_mask) * b_mul; - } -} - -uint16_t -pixman_float_to_unorm (float f, int n_bits) -{ - return float_to_unorm (f, n_bits); -} - -float -pixman_unorm_to_float (uint16_t u, int n_bits) -{ - return unorm_to_float (u, n_bits); -} - -void -pixman_contract_from_float (uint32_t *dst, - const argb_t *src, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t a, r, g, b; - - a = float_to_unorm (src[i].a, 8); - r = float_to_unorm (src[i].r, 8); - g = float_to_unorm (src[i].g, 8); - b = float_to_unorm (src[i].b, 8); - - dst[i] = (a << 24) | (r << 16) | (g << 8) | (b << 0); - } -} - -uint32_t * -_pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask) -{ - return iter->buffer; -} - -void -_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info) -{ - pixman_image_t *image = iter->image; - uint8_t *b = (uint8_t *)image->bits.bits; - int s = image->bits.rowstride * 4; - - iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (info->format) / 8; - iter->stride = s; -} - -#define N_TMP_BOXES (16) - -pixman_bool_t -pixman_region16_copy_from_region32 (pixman_region16_t *dst, - pixman_region32_t *src) -{ - int n_boxes, i; - pixman_box32_t *boxes32; - pixman_box16_t *boxes16; - pixman_bool_t retval; - - boxes32 = pixman_region32_rectangles (src, &n_boxes); - - boxes16 = pixman_malloc_ab (n_boxes, sizeof (pixman_box16_t)); - - if (!boxes16) - return FALSE; - - for (i = 0; i < n_boxes; ++i) - { - boxes16[i].x1 = boxes32[i].x1; - boxes16[i].y1 = boxes32[i].y1; - boxes16[i].x2 = boxes32[i].x2; - boxes16[i].y2 = boxes32[i].y2; - } - - pixman_region_fini (dst); - retval = pixman_region_init_rects (dst, boxes16, n_boxes); - free (boxes16); - return retval; -} - -pixman_bool_t -pixman_region32_copy_from_region16 (pixman_region32_t *dst, - pixman_region16_t *src) -{ - int n_boxes, i; - pixman_box16_t *boxes16; - pixman_box32_t *boxes32; - pixman_box32_t tmp_boxes[N_TMP_BOXES]; - pixman_bool_t retval; - - boxes16 = pixman_region_rectangles (src, &n_boxes); - - if (n_boxes > N_TMP_BOXES) - boxes32 = pixman_malloc_ab (n_boxes, sizeof (pixman_box32_t)); - else - boxes32 = tmp_boxes; - - if (!boxes32) - return FALSE; - - for (i = 0; i < n_boxes; ++i) - { - boxes32[i].x1 = boxes16[i].x1; - boxes32[i].y1 = boxes16[i].y1; - boxes32[i].x2 = boxes16[i].x2; - boxes32[i].y2 = boxes16[i].y2; - } - - pixman_region32_fini (dst); - retval = pixman_region32_init_rects (dst, boxes32, n_boxes); - - if (boxes32 != tmp_boxes) - free (boxes32); - - return retval; -} - -/* This function is exported for the sake of the test suite and not part - * of the ABI. - */ -PIXMAN_EXPORT pixman_implementation_t * -_pixman_internal_only_get_implementation (void) -{ - return get_implementation (); -} - -void -_pixman_log_error (const char *function, const char *message) -{ - static int n_messages = 0; - - if (n_messages < 10) - { - fprintf (stderr, - "*** BUG ***\n" - "In %s: %s\n" - "Set a breakpoint on '_pixman_log_error' to debug\n\n", - function, message); - - n_messages++; - } -} diff --git a/vendor/pixman/pixman/pixman-version.h.in b/vendor/pixman/pixman/pixman-version.h.in deleted file mode 100644 index 64778a595..000000000 --- a/vendor/pixman/pixman/pixman-version.h.in +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright © 2008 Red Hat, Inc. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Author: Carl D. Worth - */ - -#ifndef PIXMAN_VERSION_H__ -#define PIXMAN_VERSION_H__ - -#ifndef PIXMAN_H__ -# error pixman-version.h should only be included by pixman.h -#endif - -#define PIXMAN_VERSION_MAJOR @PIXMAN_VERSION_MAJOR@ -#define PIXMAN_VERSION_MINOR @PIXMAN_VERSION_MINOR@ -#define PIXMAN_VERSION_MICRO @PIXMAN_VERSION_MICRO@ - -#define PIXMAN_VERSION_STRING "@PIXMAN_VERSION_MAJOR@.@PIXMAN_VERSION_MINOR@.@PIXMAN_VERSION_MICRO@" - -#define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \ - ((major) * 10000) \ - + ((minor) * 100) \ - + ((micro) * 1)) - -#define PIXMAN_VERSION PIXMAN_VERSION_ENCODE( \ - PIXMAN_VERSION_MAJOR, \ - PIXMAN_VERSION_MINOR, \ - PIXMAN_VERSION_MICRO) - -#ifndef PIXMAN_API -# define PIXMAN_API -#endif - -#endif /* PIXMAN_VERSION_H__ */ diff --git a/vendor/pixman/pixman/pixman-vmx.c b/vendor/pixman/pixman/pixman-vmx.c deleted file mode 100644 index 1086b285d..000000000 --- a/vendor/pixman/pixman/pixman-vmx.c +++ /dev/null @@ -1,3159 +0,0 @@ -/* - * Copyright © 2007 Luca Barbato - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Luca Barbato not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. Luca Barbato makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Luca Barbato (lu_zero@gentoo.org) - * - * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell - */ - -#ifdef HAVE_CONFIG_H -#include -#endif -#include "pixman-private.h" -#include "pixman-combine32.h" -#include "pixman-inlines.h" -#include - -#define AVV(x...) {x} - -static vector unsigned int mask_ff000000; -static vector unsigned int mask_red; -static vector unsigned int mask_green; -static vector unsigned int mask_blue; -static vector unsigned int mask_565_fix_rb; -static vector unsigned int mask_565_fix_g; - -static force_inline vector unsigned int -splat_alpha (vector unsigned int pix) -{ -#ifdef WORDS_BIGENDIAN - return vec_perm (pix, pix, - (vector unsigned char)AVV ( - 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04, - 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C)); -#else - return vec_perm (pix, pix, - (vector unsigned char)AVV ( - 0x03, 0x03, 0x03, 0x03, 0x07, 0x07, 0x07, 0x07, - 0x0B, 0x0B, 0x0B, 0x0B, 0x0F, 0x0F, 0x0F, 0x0F)); -#endif -} - -static force_inline vector unsigned int -splat_pixel (vector unsigned int pix) -{ - return vec_perm (pix, pix, - (vector unsigned char)AVV ( - 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, - 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03)); -} - -static force_inline vector unsigned int -pix_multiply (vector unsigned int p, vector unsigned int a) -{ - vector unsigned short hi, lo, mod; - - /* unpack to short */ - hi = (vector unsigned short) -#ifdef WORDS_BIGENDIAN - vec_mergeh ((vector unsigned char)AVV (0), - (vector unsigned char)p); -#else - vec_mergeh ((vector unsigned char) p, - (vector unsigned char) AVV (0)); -#endif - - mod = (vector unsigned short) -#ifdef WORDS_BIGENDIAN - vec_mergeh ((vector unsigned char)AVV (0), - (vector unsigned char)a); -#else - vec_mergeh ((vector unsigned char) a, - (vector unsigned char) AVV (0)); -#endif - - hi = vec_mladd (hi, mod, (vector unsigned short) - AVV (0x0080, 0x0080, 0x0080, 0x0080, - 0x0080, 0x0080, 0x0080, 0x0080)); - - hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8))); - - hi = vec_sr (hi, vec_splat_u16 (8)); - - /* unpack to short */ - lo = (vector unsigned short) -#ifdef WORDS_BIGENDIAN - vec_mergel ((vector unsigned char)AVV (0), - (vector unsigned char)p); -#else - vec_mergel ((vector unsigned char) p, - (vector unsigned char) AVV (0)); -#endif - - mod = (vector unsigned short) -#ifdef WORDS_BIGENDIAN - vec_mergel ((vector unsigned char)AVV (0), - (vector unsigned char)a); -#else - vec_mergel ((vector unsigned char) a, - (vector unsigned char) AVV (0)); -#endif - - lo = vec_mladd (lo, mod, (vector unsigned short) - AVV (0x0080, 0x0080, 0x0080, 0x0080, - 0x0080, 0x0080, 0x0080, 0x0080)); - - lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8))); - - lo = vec_sr (lo, vec_splat_u16 (8)); - - return (vector unsigned int)vec_packsu (hi, lo); -} - -static force_inline vector unsigned int -pix_add (vector unsigned int a, vector unsigned int b) -{ - return (vector unsigned int)vec_adds ((vector unsigned char)a, - (vector unsigned char)b); -} - -static force_inline vector unsigned int -pix_add_mul (vector unsigned int x, - vector unsigned int a, - vector unsigned int y, - vector unsigned int b) -{ - vector unsigned int t1, t2; - - t1 = pix_multiply (x, a); - t2 = pix_multiply (y, b); - - return pix_add (t1, t2); -} - -static force_inline vector unsigned int -negate (vector unsigned int src) -{ - return vec_nor (src, src); -} - -/* dest*~srca + src */ -static force_inline vector unsigned int -over (vector unsigned int src, - vector unsigned int srca, - vector unsigned int dest) -{ - vector unsigned char tmp = (vector unsigned char) - pix_multiply (dest, negate (srca)); - - tmp = vec_adds ((vector unsigned char)src, tmp); - return (vector unsigned int)tmp; -} - -/* in == pix_multiply */ -#define in_over(src, srca, mask, dest) \ - over (pix_multiply (src, mask), \ - pix_multiply (srca, mask), dest) - -#ifdef WORDS_BIGENDIAN - -#define COMPUTE_SHIFT_MASK(source) \ - source ## _mask = vec_lvsl (0, source); - -#define COMPUTE_SHIFT_MASKS(dest, source) \ - source ## _mask = vec_lvsl (0, source); - -#define COMPUTE_SHIFT_MASKC(dest, source, mask) \ - mask ## _mask = vec_lvsl (0, mask); \ - source ## _mask = vec_lvsl (0, source); - -#define LOAD_VECTOR(source) \ -do \ -{ \ - vector unsigned char tmp1, tmp2; \ - tmp1 = (typeof(tmp1))vec_ld (0, source); \ - tmp2 = (typeof(tmp2))vec_ld (15, source); \ - v ## source = (typeof(v ## source)) \ - vec_perm (tmp1, tmp2, source ## _mask); \ -} while (0) - -#define LOAD_VECTORS(dest, source) \ -do \ -{ \ - LOAD_VECTOR(source); \ - v ## dest = (typeof(v ## dest))vec_ld (0, dest); \ -} while (0) - -#define LOAD_VECTORSC(dest, source, mask) \ -do \ -{ \ - LOAD_VECTORS(dest, source); \ - LOAD_VECTOR(mask); \ -} while (0) - -#define DECLARE_SRC_MASK_VAR vector unsigned char src_mask -#define DECLARE_MASK_MASK_VAR vector unsigned char mask_mask - -#else - -/* Now the COMPUTE_SHIFT_{MASK, MASKS, MASKC} below are just no-op. - * They are defined that way because little endian altivec can do unaligned - * reads natively and have no need for constructing the permutation pattern - * variables. - */ -#define COMPUTE_SHIFT_MASK(source) - -#define COMPUTE_SHIFT_MASKS(dest, source) - -#define COMPUTE_SHIFT_MASKC(dest, source, mask) - -# define LOAD_VECTOR(source) \ - v ## source = (typeof(v ## source))vec_xl(0, source); - -# define LOAD_VECTORS(dest, source) \ - LOAD_VECTOR(source); \ - LOAD_VECTOR(dest); \ - -# define LOAD_VECTORSC(dest, source, mask) \ - LOAD_VECTORS(dest, source); \ - LOAD_VECTOR(mask); \ - -#define DECLARE_SRC_MASK_VAR -#define DECLARE_MASK_MASK_VAR - -#endif /* WORDS_BIGENDIAN */ - -#define LOAD_VECTORSM(dest, source, mask) \ - LOAD_VECTORSC (dest, source, mask); \ - v ## source = pix_multiply (v ## source, \ - splat_alpha (v ## mask)); - -#define STORE_VECTOR(dest) \ - vec_st ((vector unsigned int) v ## dest, 0, dest); - -/* load 4 pixels from a 16-byte boundary aligned address */ -static force_inline vector unsigned int -load_128_aligned (const uint32_t* src) -{ - return *((vector unsigned int *) src); -} - -/* load 4 pixels from a unaligned address */ -static force_inline vector unsigned int -load_128_unaligned (const uint32_t* src) -{ - vector unsigned int vsrc; - DECLARE_SRC_MASK_VAR; - - COMPUTE_SHIFT_MASK (src); - LOAD_VECTOR (src); - - return vsrc; -} - -/* save 4 pixels on a 16-byte boundary aligned address */ -static force_inline void -save_128_aligned (uint32_t* data, - vector unsigned int vdata) -{ - STORE_VECTOR(data) -} - -static force_inline vector unsigned int -create_mask_1x32_128 (const uint32_t *src) -{ - vector unsigned int vsrc; - DECLARE_SRC_MASK_VAR; - - COMPUTE_SHIFT_MASK (src); - LOAD_VECTOR (src); - return vec_splat(vsrc, 0); -} - -static force_inline vector unsigned int -create_mask_32_128 (uint32_t mask) -{ - return create_mask_1x32_128(&mask); -} - -static force_inline vector unsigned int -unpacklo_128_16x8 (vector unsigned int data1, vector unsigned int data2) -{ - vector unsigned char lo; - - /* unpack to short */ - lo = (vector unsigned char) -#ifdef WORDS_BIGENDIAN - vec_mergel ((vector unsigned char) data2, - (vector unsigned char) data1); -#else - vec_mergel ((vector unsigned char) data1, - (vector unsigned char) data2); -#endif - - return (vector unsigned int) lo; -} - -static force_inline vector unsigned int -unpackhi_128_16x8 (vector unsigned int data1, vector unsigned int data2) -{ - vector unsigned char hi; - - /* unpack to short */ - hi = (vector unsigned char) -#ifdef WORDS_BIGENDIAN - vec_mergeh ((vector unsigned char) data2, - (vector unsigned char) data1); -#else - vec_mergeh ((vector unsigned char) data1, - (vector unsigned char) data2); -#endif - - return (vector unsigned int) hi; -} - -static force_inline vector unsigned int -unpacklo_128_8x16 (vector unsigned int data1, vector unsigned int data2) -{ - vector unsigned short lo; - - /* unpack to char */ - lo = (vector unsigned short) -#ifdef WORDS_BIGENDIAN - vec_mergel ((vector unsigned short) data2, - (vector unsigned short) data1); -#else - vec_mergel ((vector unsigned short) data1, - (vector unsigned short) data2); -#endif - - return (vector unsigned int) lo; -} - -static force_inline vector unsigned int -unpackhi_128_8x16 (vector unsigned int data1, vector unsigned int data2) -{ - vector unsigned short hi; - - /* unpack to char */ - hi = (vector unsigned short) -#ifdef WORDS_BIGENDIAN - vec_mergeh ((vector unsigned short) data2, - (vector unsigned short) data1); -#else - vec_mergeh ((vector unsigned short) data1, - (vector unsigned short) data2); -#endif - - return (vector unsigned int) hi; -} - -static force_inline void -unpack_128_2x128 (vector unsigned int data1, vector unsigned int data2, - vector unsigned int* data_lo, vector unsigned int* data_hi) -{ - *data_lo = unpacklo_128_16x8(data1, data2); - *data_hi = unpackhi_128_16x8(data1, data2); -} - -static force_inline void -unpack_128_2x128_16 (vector unsigned int data1, vector unsigned int data2, - vector unsigned int* data_lo, vector unsigned int* data_hi) -{ - *data_lo = unpacklo_128_8x16(data1, data2); - *data_hi = unpackhi_128_8x16(data1, data2); -} - -static force_inline vector unsigned int -unpack_565_to_8888 (vector unsigned int lo) -{ - vector unsigned int r, g, b, rb, t; - - r = vec_and (vec_sl(lo, create_mask_32_128(8)), mask_red); - g = vec_and (vec_sl(lo, create_mask_32_128(5)), mask_green); - b = vec_and (vec_sl(lo, create_mask_32_128(3)), mask_blue); - - rb = vec_or (r, b); - t = vec_and (rb, mask_565_fix_rb); - t = vec_sr (t, create_mask_32_128(5)); - rb = vec_or (rb, t); - - t = vec_and (g, mask_565_fix_g); - t = vec_sr (t, create_mask_32_128(6)); - g = vec_or (g, t); - - return vec_or (rb, g); -} - -static force_inline int -is_opaque (vector unsigned int x) -{ - uint32_t cmp_result; - vector bool int ffs = vec_cmpeq(x, x); - - cmp_result = vec_all_eq(x, ffs); - - return (cmp_result & 0x8888) == 0x8888; -} - -static force_inline int -is_zero (vector unsigned int x) -{ - uint32_t cmp_result; - - cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0)); - - return cmp_result == 0xffff; -} - -static force_inline int -is_transparent (vector unsigned int x) -{ - uint32_t cmp_result; - - cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0)); - return (cmp_result & 0x8888) == 0x8888; -} - -static force_inline uint32_t -core_combine_over_u_pixel_vmx (uint32_t src, uint32_t dst) -{ - uint32_t a; - - a = ALPHA_8(src); - - if (a == 0xff) - { - return src; - } - else if (src) - { - UN8x4_MUL_UN8_ADD_UN8x4(dst, (~a & MASK), src); - } - - return dst; -} - -static force_inline uint32_t -combine1 (const uint32_t *ps, const uint32_t *pm) -{ - uint32_t s = *ps; - - if (pm) - UN8x4_MUL_UN8(s, ALPHA_8(*pm)); - - return s; -} - -static force_inline vector unsigned int -combine4 (const uint32_t* ps, const uint32_t* pm) -{ - vector unsigned int src, msk; - - if (pm) - { - msk = load_128_unaligned(pm); - - if (is_transparent(msk)) - return (vector unsigned int) AVV(0); - } - - src = load_128_unaligned(ps); - - if (pm) - src = pix_multiply(src, msk); - - return src; -} - -static void -vmx_combine_over_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) -{ - int i; - vector unsigned int vdest, vsrc; - DECLARE_SRC_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t ia = ALPHA_8 (~s); - - UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); - - *dest++ = d; - width--; - } - - COMPUTE_SHIFT_MASKS (dest, src); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - - LOAD_VECTORS (dest, src); - - vdest = over (vsrc, splat_alpha (vsrc), vdest); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t ia = ALPHA_8 (~s); - - UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); - - dest[i] = d; - } -} - -static void -vmx_combine_over_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t m = ALPHA_8 (*mask++); - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t ia; - - UN8x4_MUL_UN8 (s, m); - - ia = ALPHA_8 (~s); - - UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); - *dest++ = d; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSM (dest, src, mask); - - vdest = over (vsrc, splat_alpha (vsrc), vdest); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t m = ALPHA_8 (mask[i]); - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t ia; - - UN8x4_MUL_UN8 (s, m); - - ia = ALPHA_8 (~s); - - UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); - dest[i] = d; - } -} - -static void -vmx_combine_over_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - if (mask) - vmx_combine_over_u_mask (dest, src, mask, width); - else - vmx_combine_over_u_no_mask (dest, src, width); -} - -static void -vmx_combine_over_reverse_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) -{ - int i; - vector unsigned int vdest, vsrc; - DECLARE_SRC_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t ia = ALPHA_8 (~d); - - UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); - *dest++ = s; - width--; - } - - COMPUTE_SHIFT_MASKS (dest, src); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - - LOAD_VECTORS (dest, src); - - vdest = over (vdest, splat_alpha (vdest), vsrc); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t ia = ALPHA_8 (~dest[i]); - - UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); - dest[i] = s; - } -} - -static void -vmx_combine_over_reverse_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t m = ALPHA_8 (*mask++); - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t ia = ALPHA_8 (~d); - - UN8x4_MUL_UN8 (s, m); - - UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); - *dest++ = s; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - - LOAD_VECTORSM (dest, src, mask); - - vdest = over (vdest, splat_alpha (vdest), vsrc); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t m = ALPHA_8 (mask[i]); - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t ia = ALPHA_8 (~dest[i]); - - UN8x4_MUL_UN8 (s, m); - - UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); - dest[i] = s; - } -} - -static void -vmx_combine_over_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - if (mask) - vmx_combine_over_reverse_u_mask (dest, src, mask, width); - else - vmx_combine_over_reverse_u_no_mask (dest, src, width); -} - -static void -vmx_combine_in_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) -{ - int i; - vector unsigned int vdest, vsrc; - DECLARE_SRC_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t s = *src++; - uint32_t a = ALPHA_8 (*dest); - - UN8x4_MUL_UN8 (s, a); - *dest++ = s; - width--; - } - - COMPUTE_SHIFT_MASKS (dest, src); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORS (dest, src); - - vdest = pix_multiply (vsrc, splat_alpha (vdest)); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t s = src[i]; - uint32_t a = ALPHA_8 (dest[i]); - - UN8x4_MUL_UN8 (s, a); - dest[i] = s; - } -} - -static void -vmx_combine_in_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t m = ALPHA_8 (*mask++); - uint32_t s = *src++; - uint32_t a = ALPHA_8 (*dest); - - UN8x4_MUL_UN8 (s, m); - UN8x4_MUL_UN8 (s, a); - - *dest++ = s; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSM (dest, src, mask); - - vdest = pix_multiply (vsrc, splat_alpha (vdest)); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t m = ALPHA_8 (mask[i]); - uint32_t s = src[i]; - uint32_t a = ALPHA_8 (dest[i]); - - UN8x4_MUL_UN8 (s, m); - UN8x4_MUL_UN8 (s, a); - - dest[i] = s; - } -} - -static void -vmx_combine_in_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - if (mask) - vmx_combine_in_u_mask (dest, src, mask, width); - else - vmx_combine_in_u_no_mask (dest, src, width); -} - -static void -vmx_combine_in_reverse_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) -{ - int i; - vector unsigned int vdest, vsrc; - DECLARE_SRC_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t d = *dest; - uint32_t a = ALPHA_8 (*src++); - - UN8x4_MUL_UN8 (d, a); - - *dest++ = d; - width--; - } - - COMPUTE_SHIFT_MASKS (dest, src); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORS (dest, src); - - vdest = pix_multiply (vdest, splat_alpha (vsrc)); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t d = dest[i]; - uint32_t a = ALPHA_8 (src[i]); - - UN8x4_MUL_UN8 (d, a); - - dest[i] = d; - } -} - -static void -vmx_combine_in_reverse_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t m = ALPHA_8 (*mask++); - uint32_t d = *dest; - uint32_t a = *src++; - - UN8x4_MUL_UN8 (a, m); - a = ALPHA_8 (a); - UN8x4_MUL_UN8 (d, a); - - *dest++ = d; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSM (dest, src, mask); - - vdest = pix_multiply (vdest, splat_alpha (vsrc)); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t m = ALPHA_8 (mask[i]); - uint32_t d = dest[i]; - uint32_t a = src[i]; - - UN8x4_MUL_UN8 (a, m); - a = ALPHA_8 (a); - UN8x4_MUL_UN8 (d, a); - - dest[i] = d; - } -} - -static void -vmx_combine_in_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - if (mask) - vmx_combine_in_reverse_u_mask (dest, src, mask, width); - else - vmx_combine_in_reverse_u_no_mask (dest, src, width); -} - -static void -vmx_combine_out_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) -{ - int i; - vector unsigned int vdest, vsrc; - DECLARE_SRC_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t s = *src++; - uint32_t a = ALPHA_8 (~(*dest)); - - UN8x4_MUL_UN8 (s, a); - - *dest++ = s; - width--; - } - - COMPUTE_SHIFT_MASKS (dest, src); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORS (dest, src); - - vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t s = src[i]; - uint32_t a = ALPHA_8 (~dest[i]); - - UN8x4_MUL_UN8 (s, a); - - dest[i] = s; - } -} - -static void -vmx_combine_out_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t m = ALPHA_8 (*mask++); - uint32_t s = *src++; - uint32_t a = ALPHA_8 (~(*dest)); - - UN8x4_MUL_UN8 (s, m); - UN8x4_MUL_UN8 (s, a); - - *dest++ = s; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSM (dest, src, mask); - - vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t m = ALPHA_8 (mask[i]); - uint32_t s = src[i]; - uint32_t a = ALPHA_8 (~dest[i]); - - UN8x4_MUL_UN8 (s, m); - UN8x4_MUL_UN8 (s, a); - - dest[i] = s; - } -} - -static void -vmx_combine_out_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - if (mask) - vmx_combine_out_u_mask (dest, src, mask, width); - else - vmx_combine_out_u_no_mask (dest, src, width); -} - -static void -vmx_combine_out_reverse_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) -{ - int i; - vector unsigned int vdest, vsrc; - DECLARE_SRC_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t d = *dest; - uint32_t a = ALPHA_8 (~(*src++)); - - UN8x4_MUL_UN8 (d, a); - - *dest++ = d; - width--; - } - - COMPUTE_SHIFT_MASKS (dest, src); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - - LOAD_VECTORS (dest, src); - - vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t d = dest[i]; - uint32_t a = ALPHA_8 (~src[i]); - - UN8x4_MUL_UN8 (d, a); - - dest[i] = d; - } -} - -static void -vmx_combine_out_reverse_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t m = ALPHA_8 (*mask++); - uint32_t d = *dest; - uint32_t a = *src++; - - UN8x4_MUL_UN8 (a, m); - a = ALPHA_8 (~a); - UN8x4_MUL_UN8 (d, a); - - *dest++ = d; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSM (dest, src, mask); - - vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t m = ALPHA_8 (mask[i]); - uint32_t d = dest[i]; - uint32_t a = src[i]; - - UN8x4_MUL_UN8 (a, m); - a = ALPHA_8 (~a); - UN8x4_MUL_UN8 (d, a); - - dest[i] = d; - } -} - -static void -vmx_combine_out_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - if (mask) - vmx_combine_out_reverse_u_mask (dest, src, mask, width); - else - vmx_combine_out_reverse_u_no_mask (dest, src, width); -} - -static void -vmx_combine_atop_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) -{ - int i; - vector unsigned int vdest, vsrc; - DECLARE_SRC_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t dest_a = ALPHA_8 (d); - uint32_t src_ia = ALPHA_8 (~s); - - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); - - *dest++ = s; - width--; - } - - COMPUTE_SHIFT_MASKS (dest, src); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORS (dest, src); - - vdest = pix_add_mul (vsrc, splat_alpha (vdest), - vdest, splat_alpha (negate (vsrc))); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t dest_a = ALPHA_8 (d); - uint32_t src_ia = ALPHA_8 (~s); - - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); - - dest[i] = s; - } -} - -static void -vmx_combine_atop_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t m = ALPHA_8 (*mask++); - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t dest_a = ALPHA_8 (d); - uint32_t src_ia; - - UN8x4_MUL_UN8 (s, m); - - src_ia = ALPHA_8 (~s); - - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); - - *dest++ = s; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSM (dest, src, mask); - - vdest = pix_add_mul (vsrc, splat_alpha (vdest), - vdest, splat_alpha (negate (vsrc))); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t m = ALPHA_8 (mask[i]); - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t dest_a = ALPHA_8 (d); - uint32_t src_ia; - - UN8x4_MUL_UN8 (s, m); - - src_ia = ALPHA_8 (~s); - - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); - - dest[i] = s; - } -} - -static void -vmx_combine_atop_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - if (mask) - vmx_combine_atop_u_mask (dest, src, mask, width); - else - vmx_combine_atop_u_no_mask (dest, src, width); -} - -static void -vmx_combine_atop_reverse_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) -{ - int i; - vector unsigned int vdest, vsrc; - DECLARE_SRC_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t src_a = ALPHA_8 (s); - uint32_t dest_ia = ALPHA_8 (~d); - - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); - - *dest++ = s; - width--; - } - - COMPUTE_SHIFT_MASKS (dest, src); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORS (dest, src); - - vdest = pix_add_mul (vdest, splat_alpha (vsrc), - vsrc, splat_alpha (negate (vdest))); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t src_a = ALPHA_8 (s); - uint32_t dest_ia = ALPHA_8 (~d); - - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); - - dest[i] = s; - } -} - -static void -vmx_combine_atop_reverse_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t m = ALPHA_8 (*mask++); - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t src_a; - uint32_t dest_ia = ALPHA_8 (~d); - - UN8x4_MUL_UN8 (s, m); - - src_a = ALPHA_8 (s); - - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); - - *dest++ = s; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSM (dest, src, mask); - - vdest = pix_add_mul (vdest, splat_alpha (vsrc), - vsrc, splat_alpha (negate (vdest))); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t m = ALPHA_8 (mask[i]); - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t src_a; - uint32_t dest_ia = ALPHA_8 (~d); - - UN8x4_MUL_UN8 (s, m); - - src_a = ALPHA_8 (s); - - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); - - dest[i] = s; - } -} - -static void -vmx_combine_atop_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - if (mask) - vmx_combine_atop_reverse_u_mask (dest, src, mask, width); - else - vmx_combine_atop_reverse_u_no_mask (dest, src, width); -} - -static void -vmx_combine_xor_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) -{ - int i; - vector unsigned int vdest, vsrc; - DECLARE_SRC_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t src_ia = ALPHA_8 (~s); - uint32_t dest_ia = ALPHA_8 (~d); - - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); - - *dest++ = s; - width--; - } - - COMPUTE_SHIFT_MASKS (dest, src); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORS (dest, src); - - vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), - vdest, splat_alpha (negate (vsrc))); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t src_ia = ALPHA_8 (~s); - uint32_t dest_ia = ALPHA_8 (~d); - - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); - - dest[i] = s; - } -} - -static void -vmx_combine_xor_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t m = ALPHA_8 (*mask++); - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t src_ia; - uint32_t dest_ia = ALPHA_8 (~d); - - UN8x4_MUL_UN8 (s, m); - - src_ia = ALPHA_8 (~s); - - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); - - *dest++ = s; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSM (dest, src, mask); - - vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), - vdest, splat_alpha (negate (vsrc))); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t m = ALPHA_8 (mask[i]); - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t src_ia; - uint32_t dest_ia = ALPHA_8 (~d); - - UN8x4_MUL_UN8 (s, m); - - src_ia = ALPHA_8 (~s); - - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); - - dest[i] = s; - } -} - -static void -vmx_combine_xor_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - if (mask) - vmx_combine_xor_u_mask (dest, src, mask, width); - else - vmx_combine_xor_u_no_mask (dest, src, width); -} - -static void -vmx_combine_add_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) -{ - int i; - vector unsigned int vdest, vsrc; - DECLARE_SRC_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t s = *src++; - uint32_t d = *dest; - - UN8x4_ADD_UN8x4 (d, s); - - *dest++ = d; - width--; - } - - COMPUTE_SHIFT_MASKS (dest, src); - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORS (dest, src); - - vdest = pix_add (vsrc, vdest); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t s = src[i]; - uint32_t d = dest[i]; - - UN8x4_ADD_UN8x4 (d, s); - - dest[i] = d; - } -} - -static void -vmx_combine_add_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t m = ALPHA_8 (*mask++); - uint32_t s = *src++; - uint32_t d = *dest; - - UN8x4_MUL_UN8 (s, m); - UN8x4_ADD_UN8x4 (d, s); - - *dest++ = d; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSM (dest, src, mask); - - vdest = pix_add (vsrc, vdest); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t m = ALPHA_8 (mask[i]); - uint32_t s = src[i]; - uint32_t d = dest[i]; - - UN8x4_MUL_UN8 (s, m); - UN8x4_ADD_UN8x4 (d, s); - - dest[i] = d; - } -} - -static void -vmx_combine_add_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - if (mask) - vmx_combine_add_u_mask (dest, src, mask, width); - else - vmx_combine_add_u_no_mask (dest, src, width); -} - -static void -vmx_combine_src_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t a = *mask++; - uint32_t s = *src++; - - UN8x4_MUL_UN8x4 (s, a); - - *dest++ = s; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSC (dest, src, mask); - - vdest = pix_multiply (vsrc, vmask); - - STORE_VECTOR (dest); - - mask += 4; - src += 4; - dest += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t a = mask[i]; - uint32_t s = src[i]; - - UN8x4_MUL_UN8x4 (s, a); - - dest[i] = s; - } -} - -static void -vmx_combine_over_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t a = *mask++; - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t sa = ALPHA_8 (s); - - UN8x4_MUL_UN8x4 (s, a); - UN8x4_MUL_UN8 (a, sa); - UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s); - - *dest++ = d; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSC (dest, src, mask); - - vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest); - - STORE_VECTOR (dest); - - mask += 4; - src += 4; - dest += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t sa = ALPHA_8 (s); - - UN8x4_MUL_UN8x4 (s, a); - UN8x4_MUL_UN8 (a, sa); - UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s); - - dest[i] = d; - } -} - -static void -vmx_combine_over_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t a = *mask++; - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t ida = ALPHA_8 (~d); - - UN8x4_MUL_UN8x4 (s, a); - UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d); - - *dest++ = s; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSC (dest, src, mask); - - vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask)); - - STORE_VECTOR (dest); - - mask += 4; - src += 4; - dest += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t ida = ALPHA_8 (~d); - - UN8x4_MUL_UN8x4 (s, a); - UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d); - - dest[i] = s; - } -} - -static void -vmx_combine_in_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t a = *mask++; - uint32_t s = *src++; - uint32_t da = ALPHA_8 (*dest); - - UN8x4_MUL_UN8x4 (s, a); - UN8x4_MUL_UN8 (s, da); - - *dest++ = s; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSC (dest, src, mask); - - vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest)); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t da = ALPHA_8 (dest[i]); - - UN8x4_MUL_UN8x4 (s, a); - UN8x4_MUL_UN8 (s, da); - - dest[i] = s; - } -} - -static void -vmx_combine_in_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t a = *mask++; - uint32_t d = *dest; - uint32_t sa = ALPHA_8 (*src++); - - UN8x4_MUL_UN8 (a, sa); - UN8x4_MUL_UN8x4 (d, a); - - *dest++ = d; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - - LOAD_VECTORSC (dest, src, mask); - - vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc))); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t a = mask[i]; - uint32_t d = dest[i]; - uint32_t sa = ALPHA_8 (src[i]); - - UN8x4_MUL_UN8 (a, sa); - UN8x4_MUL_UN8x4 (d, a); - - dest[i] = d; - } -} - -static void -vmx_combine_out_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t a = *mask++; - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t da = ALPHA_8 (~d); - - UN8x4_MUL_UN8x4 (s, a); - UN8x4_MUL_UN8 (s, da); - - *dest++ = s; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSC (dest, src, mask); - - vdest = pix_multiply ( - pix_multiply (vsrc, vmask), splat_alpha (negate (vdest))); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t da = ALPHA_8 (~d); - - UN8x4_MUL_UN8x4 (s, a); - UN8x4_MUL_UN8 (s, da); - - dest[i] = s; - } -} - -static void -vmx_combine_out_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t a = *mask++; - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t sa = ALPHA_8 (s); - - UN8x4_MUL_UN8 (a, sa); - UN8x4_MUL_UN8x4 (d, ~a); - - *dest++ = d; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSC (dest, src, mask); - - vdest = pix_multiply ( - vdest, negate (pix_multiply (vmask, splat_alpha (vsrc)))); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t sa = ALPHA_8 (s); - - UN8x4_MUL_UN8 (a, sa); - UN8x4_MUL_UN8x4 (d, ~a); - - dest[i] = d; - } -} - -static void -vmx_combine_atop_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask, vsrca; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t a = *mask++; - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t sa = ALPHA_8 (s); - uint32_t da = ALPHA_8 (d); - - UN8x4_MUL_UN8x4 (s, a); - UN8x4_MUL_UN8 (a, sa); - UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); - - *dest++ = d; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSC (dest, src, mask); - - vsrca = splat_alpha (vsrc); - - vsrc = pix_multiply (vsrc, vmask); - vmask = pix_multiply (vmask, vsrca); - - vdest = pix_add_mul (vsrc, splat_alpha (vdest), - negate (vmask), vdest); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t sa = ALPHA_8 (s); - uint32_t da = ALPHA_8 (d); - - UN8x4_MUL_UN8x4 (s, a); - UN8x4_MUL_UN8 (a, sa); - UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); - - dest[i] = d; - } -} - -static void -vmx_combine_atop_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t a = *mask++; - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t sa = ALPHA_8 (s); - uint32_t da = ALPHA_8 (~d); - - UN8x4_MUL_UN8x4 (s, a); - UN8x4_MUL_UN8 (a, sa); - UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da); - - *dest++ = d; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSC (dest, src, mask); - - vdest = pix_add_mul (vdest, - pix_multiply (vmask, splat_alpha (vsrc)), - pix_multiply (vsrc, vmask), - negate (splat_alpha (vdest))); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t sa = ALPHA_8 (s); - uint32_t da = ALPHA_8 (~d); - - UN8x4_MUL_UN8x4 (s, a); - UN8x4_MUL_UN8 (a, sa); - UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da); - - dest[i] = d; - } -} - -static void -vmx_combine_xor_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t a = *mask++; - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t sa = ALPHA_8 (s); - uint32_t da = ALPHA_8 (~d); - - UN8x4_MUL_UN8x4 (s, a); - UN8x4_MUL_UN8 (a, sa); - UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); - - *dest++ = d; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSC (dest, src, mask); - - vdest = pix_add_mul (vdest, - negate (pix_multiply (vmask, splat_alpha (vsrc))), - pix_multiply (vsrc, vmask), - negate (splat_alpha (vdest))); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t sa = ALPHA_8 (s); - uint32_t da = ALPHA_8 (~d); - - UN8x4_MUL_UN8x4 (s, a); - UN8x4_MUL_UN8 (a, sa); - UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); - - dest[i] = d; - } -} - -static void -vmx_combine_add_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - vector unsigned int vdest, vsrc, vmask; - DECLARE_SRC_MASK_VAR; - DECLARE_MASK_MASK_VAR; - - while (width && ((uintptr_t)dest & 15)) - { - uint32_t a = *mask++; - uint32_t s = *src++; - uint32_t d = *dest; - - UN8x4_MUL_UN8x4 (s, a); - UN8x4_ADD_UN8x4 (s, d); - - *dest++ = s; - width--; - } - - COMPUTE_SHIFT_MASKC (dest, src, mask); - - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) - { - LOAD_VECTORSC (dest, src, mask); - - vdest = pix_add (pix_multiply (vsrc, vmask), vdest); - - STORE_VECTOR (dest); - - src += 4; - dest += 4; - mask += 4; - } - - for (i = width % 4; --i >= 0;) - { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - - UN8x4_MUL_UN8x4 (s, a); - UN8x4_ADD_UN8x4 (s, d); - - dest[i] = s; - } -} - -static void -vmx_composite_over_n_8_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, srca; - uint32_t *dst_line, *dst; - uint8_t *mask_line; - int dst_stride, mask_stride; - int32_t w; - uint32_t m, d, s, ia; - - vector unsigned int vsrc, valpha, vmask, vdst; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - srca = ALPHA_8(src); - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - vsrc = (vector unsigned int) {src, src, src, src}; - valpha = splat_alpha(vsrc); - - while (height--) - { - const uint8_t *pm = mask_line; - dst = dst_line; - dst_line += dst_stride; - mask_line += mask_stride; - w = width; - - while (w && (uintptr_t)dst & 15) - { - s = src; - m = *pm++; - - if (m) - { - d = *dst; - UN8x4_MUL_UN8 (s, m); - ia = ALPHA_8 (~s); - UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); - *dst = d; - } - - w--; - dst++; - } - - while (w >= 4) - { - m = *((uint32_t*)pm); - - if (srca == 0xff && m == 0xffffffff) - { - save_128_aligned(dst, vsrc); - } - else if (m) - { - vmask = splat_pixel((vector unsigned int) {m, m, m, m}); - - /* dst is 16-byte aligned */ - vdst = in_over (vsrc, valpha, vmask, load_128_aligned (dst)); - - save_128_aligned(dst, vdst); - } - - w -= 4; - dst += 4; - pm += 4; - } - - while (w) - { - s = src; - m = *pm++; - - if (m) - { - d = *dst; - UN8x4_MUL_UN8 (s, m); - ia = ALPHA_8 (~s); - UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); - *dst = d; - } - - w--; - dst++; - } - } - -} - -static pixman_bool_t -vmx_fill (pixman_implementation_t *imp, - uint32_t * bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t filler) -{ - uint32_t byte_width; - uint8_t *byte_line; - - vector unsigned int vfiller; - - if (bpp == 8) - { - uint8_t b; - uint16_t w; - - stride = stride * (int) sizeof (uint32_t) / 1; - byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x); - byte_width = width; - stride *= 1; - - b = filler & 0xff; - w = (b << 8) | b; - filler = (w << 16) | w; - } - else if (bpp == 16) - { - stride = stride * (int) sizeof (uint32_t) / 2; - byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); - byte_width = 2 * width; - stride *= 2; - - filler = (filler & 0xffff) * 0x00010001; - } - else if (bpp == 32) - { - stride = stride * (int) sizeof (uint32_t) / 4; - byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); - byte_width = 4 * width; - stride *= 4; - } - else - { - return FALSE; - } - - vfiller = create_mask_1x32_128(&filler); - - while (height--) - { - int w; - uint8_t *d = byte_line; - byte_line += stride; - w = byte_width; - - if (w >= 1 && ((uintptr_t)d & 1)) - { - *(uint8_t *)d = filler; - w -= 1; - d += 1; - } - - while (w >= 2 && ((uintptr_t)d & 3)) - { - *(uint16_t *)d = filler; - w -= 2; - d += 2; - } - - while (w >= 4 && ((uintptr_t)d & 15)) - { - *(uint32_t *)d = filler; - - w -= 4; - d += 4; - } - - while (w >= 128) - { - vec_st(vfiller, 0, (uint32_t *) d); - vec_st(vfiller, 0, (uint32_t *) d + 4); - vec_st(vfiller, 0, (uint32_t *) d + 8); - vec_st(vfiller, 0, (uint32_t *) d + 12); - vec_st(vfiller, 0, (uint32_t *) d + 16); - vec_st(vfiller, 0, (uint32_t *) d + 20); - vec_st(vfiller, 0, (uint32_t *) d + 24); - vec_st(vfiller, 0, (uint32_t *) d + 28); - - d += 128; - w -= 128; - } - - if (w >= 64) - { - vec_st(vfiller, 0, (uint32_t *) d); - vec_st(vfiller, 0, (uint32_t *) d + 4); - vec_st(vfiller, 0, (uint32_t *) d + 8); - vec_st(vfiller, 0, (uint32_t *) d + 12); - - d += 64; - w -= 64; - } - - if (w >= 32) - { - vec_st(vfiller, 0, (uint32_t *) d); - vec_st(vfiller, 0, (uint32_t *) d + 4); - - d += 32; - w -= 32; - } - - if (w >= 16) - { - vec_st(vfiller, 0, (uint32_t *) d); - - d += 16; - w -= 16; - } - - while (w >= 4) - { - *(uint32_t *)d = filler; - - w -= 4; - d += 4; - } - - if (w >= 2) - { - *(uint16_t *)d = filler; - w -= 2; - d += 2; - } - - if (w >= 1) - { - *(uint8_t *)d = filler; - w -= 1; - d += 1; - } - } - - return TRUE; -} - -static void -vmx_composite_src_x888_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - int32_t w; - int dst_stride, src_stride; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w && (uintptr_t)dst & 15) - { - *dst++ = *src++ | 0xff000000; - w--; - } - - while (w >= 16) - { - vector unsigned int vmx_src1, vmx_src2, vmx_src3, vmx_src4; - - vmx_src1 = load_128_unaligned (src); - vmx_src2 = load_128_unaligned (src + 4); - vmx_src3 = load_128_unaligned (src + 8); - vmx_src4 = load_128_unaligned (src + 12); - - save_128_aligned (dst, vec_or (vmx_src1, mask_ff000000)); - save_128_aligned (dst + 4, vec_or (vmx_src2, mask_ff000000)); - save_128_aligned (dst + 8, vec_or (vmx_src3, mask_ff000000)); - save_128_aligned (dst + 12, vec_or (vmx_src4, mask_ff000000)); - - dst += 16; - src += 16; - w -= 16; - } - - while (w) - { - *dst++ = *src++ | 0xff000000; - w--; - } - } -} - -static void -vmx_composite_over_n_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t src, ia; - int i, w, dst_stride; - vector unsigned int vdst, vsrc, via; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - - vsrc = (vector unsigned int){src, src, src, src}; - via = negate (splat_alpha (vsrc)); - ia = ALPHA_8 (~src); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - w = width; - - while (w && ((uintptr_t)dst & 15)) - { - uint32_t d = *dst; - UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, src); - *dst++ = d; - w--; - } - - for (i = w / 4; i > 0; i--) - { - vdst = pix_multiply (load_128_aligned (dst), via); - save_128_aligned (dst, pix_add (vsrc, vdst)); - dst += 4; - } - - for (i = w % 4; --i >= 0;) - { - uint32_t d = dst[i]; - UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, src); - dst[i] = d; - } - } -} - -static void -vmx_composite_over_8888_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - int dst_stride, src_stride; - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - dst = dst_line; - src = src_line; - - while (height--) - { - vmx_combine_over_u (imp, op, dst, src, NULL, width); - - dst += dst_stride; - src += src_stride; - } -} - -static void -vmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, ia; - uint32_t *dst_line, d; - uint32_t *mask_line, m; - uint32_t pack_cmp; - int dst_stride, mask_stride; - - vector unsigned int vsrc, valpha, vmask, vdest; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - - vsrc = (vector unsigned int) {src, src, src, src}; - valpha = splat_alpha(vsrc); - ia = ALPHA_8 (src); - - while (height--) - { - int w = width; - const uint32_t *pm = (uint32_t *)mask_line; - uint32_t *pd = (uint32_t *)dst_line; - uint32_t s; - - dst_line += dst_stride; - mask_line += mask_stride; - - while (w && (uintptr_t)pd & 15) - { - s = src; - m = *pm++; - - if (m) - { - d = *pd; - UN8x4_MUL_UN8x4 (s, m); - UN8x4_MUL_UN8 (m, ia); - m = ~m; - UN8x4_MUL_UN8x4_ADD_UN8x4 (d, m, s); - *pd = d; - } - - pd++; - w--; - } - - while (w >= 4) - { - /* pm is NOT necessarily 16-byte aligned */ - vmask = load_128_unaligned (pm); - - pack_cmp = vec_all_eq(vmask, (vector unsigned int) AVV(0)); - - /* if all bits in mask are zero, pack_cmp is not 0 */ - if (pack_cmp == 0) - { - /* pd is 16-byte aligned */ - vdest = in_over (vsrc, valpha, vmask, load_128_aligned (pd)); - - save_128_aligned(pd, vdest); - } - - pd += 4; - pm += 4; - w -= 4; - } - - while (w) - { - s = src; - m = *pm++; - - if (m) - { - d = *pd; - UN8x4_MUL_UN8x4 (s, m); - UN8x4_MUL_UN8 (m, ia); - m = ~m; - UN8x4_MUL_UN8x4_ADD_UN8x4 (d, m, s); - *pd = d; - } - - pd++; - w--; - } - } -} - -static void -vmx_composite_add_8_8 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint8_t *dst_line, *dst; - uint8_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; - uint16_t t; - - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - src = src_line; - - dst_line += dst_stride; - src_line += src_stride; - w = width; - - /* Small head */ - while (w && (uintptr_t)dst & 3) - { - t = (*dst) + (*src++); - *dst++ = t | (0 - (t >> 8)); - w--; - } - - vmx_combine_add_u (imp, op, - (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2); - - /* Small tail */ - dst += w & 0xfffc; - src += w & 0xfffc; - - w &= 3; - - while (w) - { - t = (*dst) + (*src++); - *dst++ = t | (0 - (t >> 8)); - w--; - } - } -} - -static void -vmx_composite_add_8888_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - int dst_stride, src_stride; - - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - - vmx_combine_add_u (imp, op, dst, src, NULL, width); - } -} - -static force_inline void -scaled_nearest_scanline_vmx_8888_8888_OVER (uint32_t* pd, - const uint32_t* ps, - int32_t w, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t src_width_fixed, - pixman_bool_t fully_transparent_src) -{ - uint32_t s, d; - const uint32_t* pm = NULL; - - vector unsigned int vsrc, vdst; - - if (fully_transparent_src) - return; - - /* Align dst on a 16-byte boundary */ - while (w && ((uintptr_t)pd & 15)) - { - d = *pd; - s = combine1 (ps + pixman_fixed_to_int (vx), pm); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - - *pd++ = core_combine_over_u_pixel_vmx (s, d); - if (pm) - pm++; - w--; - } - - while (w >= 4) - { - vector unsigned int tmp; - uint32_t tmp1, tmp2, tmp3, tmp4; - - tmp1 = *(ps + pixman_fixed_to_int (vx)); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - tmp2 = *(ps + pixman_fixed_to_int (vx)); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - tmp3 = *(ps + pixman_fixed_to_int (vx)); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - tmp4 = *(ps + pixman_fixed_to_int (vx)); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - - tmp[0] = tmp1; - tmp[1] = tmp2; - tmp[2] = tmp3; - tmp[3] = tmp4; - - vsrc = combine4 ((const uint32_t *) &tmp, pm); - - if (is_opaque (vsrc)) - { - save_128_aligned (pd, vsrc); - } - else if (!is_zero (vsrc)) - { - vdst = over(vsrc, splat_alpha(vsrc), load_128_aligned (pd)); - - save_128_aligned (pd, vdst); - } - - w -= 4; - pd += 4; - if (pm) - pm += 4; - } - - while (w) - { - d = *pd; - s = combine1 (ps + pixman_fixed_to_int (vx), pm); - vx += unit_x; - while (vx >= 0) - vx -= src_width_fixed; - - *pd++ = core_combine_over_u_pixel_vmx (s, d); - if (pm) - pm++; - - w--; - } -} - -FAST_NEAREST_MAINLOOP (vmx_8888_8888_cover_OVER, - scaled_nearest_scanline_vmx_8888_8888_OVER, - uint32_t, uint32_t, COVER) -FAST_NEAREST_MAINLOOP (vmx_8888_8888_none_OVER, - scaled_nearest_scanline_vmx_8888_8888_OVER, - uint32_t, uint32_t, NONE) -FAST_NEAREST_MAINLOOP (vmx_8888_8888_pad_OVER, - scaled_nearest_scanline_vmx_8888_8888_OVER, - uint32_t, uint32_t, PAD) -FAST_NEAREST_MAINLOOP (vmx_8888_8888_normal_OVER, - scaled_nearest_scanline_vmx_8888_8888_OVER, - uint32_t, uint32_t, NORMAL) - -static const pixman_fast_path_t vmx_fast_paths[] = -{ - PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, vmx_composite_over_n_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, vmx_composite_over_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, vmx_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, vmx_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, vmx_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, vmx_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, vmx_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, vmx_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, vmx_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, vmx_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, vmx_composite_over_n_8888_8888_ca), - - /* PIXMAN_OP_ADD */ - PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, vmx_composite_add_8_8), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, vmx_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, vmx_composite_add_8888_8888), - - /* PIXMAN_OP_SRC */ - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, vmx_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, vmx_composite_src_x888_8888), - - SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, vmx_8888_8888), - SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, vmx_8888_8888), - SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, vmx_8888_8888), - SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, vmx_8888_8888), - - { PIXMAN_OP_NONE }, -}; - -static uint32_t * -vmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) -{ - int w = iter->width; - vector unsigned int ff000000 = mask_ff000000; - uint32_t *dst = iter->buffer; - uint32_t *src = (uint32_t *)iter->bits; - - iter->bits += iter->stride; - - while (w && ((uintptr_t)dst) & 0x0f) - { - *dst++ = (*src++) | 0xff000000; - w--; - } - - while (w >= 4) - { - save_128_aligned(dst, vec_or(load_128_unaligned(src), ff000000)); - - dst += 4; - src += 4; - w -= 4; - } - - while (w) - { - *dst++ = (*src++) | 0xff000000; - w--; - } - - return iter->buffer; -} - -static uint32_t * -vmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) -{ - int w = iter->width; - uint32_t *dst = iter->buffer; - uint8_t *src = iter->bits; - vector unsigned int vmx0, vmx1, vmx2, vmx3, vmx4, vmx5, vmx6; - - iter->bits += iter->stride; - - while (w && (((uintptr_t)dst) & 15)) - { - *dst++ = *(src++) << 24; - w--; - } - - while (w >= 16) - { - vmx0 = load_128_unaligned((uint32_t *) src); - - unpack_128_2x128((vector unsigned int) AVV(0), vmx0, &vmx1, &vmx2); - unpack_128_2x128_16((vector unsigned int) AVV(0), vmx1, &vmx3, &vmx4); - unpack_128_2x128_16((vector unsigned int) AVV(0), vmx2, &vmx5, &vmx6); - - save_128_aligned(dst, vmx6); - save_128_aligned((dst + 4), vmx5); - save_128_aligned((dst + 8), vmx4); - save_128_aligned((dst + 12), vmx3); - - dst += 16; - src += 16; - w -= 16; - } - - while (w) - { - *dst++ = *(src++) << 24; - w--; - } - - return iter->buffer; -} - -#define IMAGE_FLAGS \ - (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ - FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) - -static const pixman_iter_info_t vmx_iters[] = -{ - { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, - _pixman_iter_init_bits_stride, vmx_fetch_x8r8g8b8, NULL - }, - { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, - _pixman_iter_init_bits_stride, vmx_fetch_a8, NULL - }, - { PIXMAN_null }, -}; - -pixman_implementation_t * -_pixman_implementation_create_vmx (pixman_implementation_t *fallback) -{ - pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths); - - /* VMX constants */ - mask_ff000000 = create_mask_32_128 (0xff000000); - mask_red = create_mask_32_128 (0x00f80000); - mask_green = create_mask_32_128 (0x0000fc00); - mask_blue = create_mask_32_128 (0x000000f8); - mask_565_fix_rb = create_mask_32_128 (0x00e000e0); - mask_565_fix_g = create_mask_32_128 (0x0000c000); - - /* Set up function pointers */ - - imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u; - imp->combine_32[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_u; - imp->combine_32[PIXMAN_OP_IN] = vmx_combine_in_u; - imp->combine_32[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_u; - imp->combine_32[PIXMAN_OP_OUT] = vmx_combine_out_u; - imp->combine_32[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_u; - imp->combine_32[PIXMAN_OP_ATOP] = vmx_combine_atop_u; - imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_u; - imp->combine_32[PIXMAN_OP_XOR] = vmx_combine_xor_u; - - imp->combine_32[PIXMAN_OP_ADD] = vmx_combine_add_u; - - imp->combine_32_ca[PIXMAN_OP_SRC] = vmx_combine_src_ca; - imp->combine_32_ca[PIXMAN_OP_OVER] = vmx_combine_over_ca; - imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_IN] = vmx_combine_in_ca; - imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_OUT] = vmx_combine_out_ca; - imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_ATOP] = vmx_combine_atop_ca; - imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca; - imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca; - - imp->fill = vmx_fill; - - imp->iter_info = vmx_iters; - - return imp; -} diff --git a/vendor/pixman/pixman/pixman-x86.c b/vendor/pixman/pixman/pixman-x86.c deleted file mode 100644 index 7f4d80e94..000000000 --- a/vendor/pixman/pixman/pixman-x86.c +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "pixman-private.h" - -#if defined(USE_X86_MMX) || defined (USE_SSE2) || defined (USE_SSSE3) - -/* The CPU detection code needs to be in a file not compiled with - * "-mmmx -msse", as gcc would generate CMOV instructions otherwise - * that would lead to SIGILL instructions on old CPUs that don't have - * it. - */ - -typedef enum -{ - X86_MMX = (1 << 0), - X86_MMX_EXTENSIONS = (1 << 1), - X86_SSE = (1 << 2) | X86_MMX_EXTENSIONS, - X86_SSE2 = (1 << 3), - X86_CMOV = (1 << 4), - X86_SSSE3 = (1 << 5) -} cpu_features_t; - -#ifdef HAVE_GETISAX - -#include - -static cpu_features_t -detect_cpu_features (void) -{ - cpu_features_t features = 0; - unsigned int result = 0; - - if (getisax (&result, 1)) - { - if (result & AV_386_CMOV) - features |= X86_CMOV; - if (result & AV_386_MMX) - features |= X86_MMX; - if (result & AV_386_AMD_MMX) - features |= X86_MMX_EXTENSIONS; - if (result & AV_386_SSE) - features |= X86_SSE; - if (result & AV_386_SSE2) - features |= X86_SSE2; - if (result & AV_386_SSSE3) - features |= X86_SSSE3; - } - - return features; -} - -#else - -#define _PIXMAN_X86_64 \ - (defined(__amd64__) || defined(__x86_64__) || defined(_M_AMD64)) - -static pixman_bool_t -have_cpuid (void) -{ -#if _PIXMAN_X86_64 || defined (_MSC_VER) - - return TRUE; - -#elif defined (__GNUC__) - uint32_t result; - - __asm__ volatile ( - "pushf" "\n\t" - "pop %%eax" "\n\t" - "mov %%eax, %%ecx" "\n\t" - "xor $0x00200000, %%eax" "\n\t" - "push %%eax" "\n\t" - "popf" "\n\t" - "pushf" "\n\t" - "pop %%eax" "\n\t" - "xor %%ecx, %%eax" "\n\t" - "mov %%eax, %0" "\n\t" - : "=r" (result) - : - : "%eax", "%ecx"); - - return !!result; - -#else -#error "Unknown compiler" -#endif -} - -static void -pixman_cpuid (uint32_t feature, - uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) -{ -#if defined (__GNUC__) - -#if _PIXMAN_X86_64 - __asm__ volatile ( - "cpuid" "\n\t" - : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) - : "a" (feature)); -#else - /* On x86-32 we need to be careful about the handling of %ebx - * and %esp. We can't declare either one as clobbered - * since they are special registers (%ebx is the "PIC - * register" holding an offset to global data, %esp the - * stack pointer), so we need to make sure that %ebx is - * preserved, and that %esp has its original value when - * accessing the output operands. - */ - __asm__ volatile ( - "xchg %%ebx, %1" "\n\t" - "cpuid" "\n\t" - "xchg %%ebx, %1" "\n\t" - : "=a" (*a), "=r" (*b), "=c" (*c), "=d" (*d) - : "a" (feature)); -#endif - -#elif defined (_MSC_VER) - int info[4]; - - __cpuid (info, feature); - - *a = info[0]; - *b = info[1]; - *c = info[2]; - *d = info[3]; -#else -#error Unknown compiler -#endif -} - -static cpu_features_t -detect_cpu_features (void) -{ - uint32_t a, b, c, d; - cpu_features_t features = 0; - - if (!have_cpuid()) - return features; - - /* Get feature bits */ - pixman_cpuid (0x01, &a, &b, &c, &d); - if (d & (1 << 15)) - features |= X86_CMOV; - if (d & (1 << 23)) - features |= X86_MMX; - if (d & (1 << 25)) - features |= X86_SSE; - if (d & (1 << 26)) - features |= X86_SSE2; - if (c & (1 << 9)) - features |= X86_SSSE3; - - /* Check for AMD specific features */ - if ((features & X86_MMX) && !(features & X86_SSE)) - { - char vendor[13]; - - /* Get vendor string */ - memset (vendor, 0, sizeof vendor); - - pixman_cpuid (0x00, &a, &b, &c, &d); - memcpy (vendor + 0, &b, 4); - memcpy (vendor + 4, &d, 4); - memcpy (vendor + 8, &c, 4); - - if (strcmp (vendor, "AuthenticAMD") == 0 || - strcmp (vendor, "HygonGenuine") == 0 || - strcmp (vendor, "Geode by NSC") == 0) - { - pixman_cpuid (0x80000000, &a, &b, &c, &d); - if (a >= 0x80000001) - { - pixman_cpuid (0x80000001, &a, &b, &c, &d); - - if (d & (1 << 22)) - features |= X86_MMX_EXTENSIONS; - } - } - } - - return features; -} - -#endif - -static pixman_bool_t -have_feature (cpu_features_t feature) -{ - static pixman_bool_t initialized; - static cpu_features_t features; - - if (!initialized) - { - features = detect_cpu_features(); - initialized = TRUE; - } - - return (features & feature) == feature; -} - -#endif - -pixman_implementation_t * -_pixman_x86_get_implementations (pixman_implementation_t *imp) -{ -#define MMX_BITS (X86_MMX | X86_MMX_EXTENSIONS) -#define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2) -#define SSSE3_BITS (X86_SSE | X86_SSE2 | X86_SSSE3) - -#ifdef USE_X86_MMX - if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS)) - imp = _pixman_implementation_create_mmx (imp); -#endif - -#ifdef USE_SSE2 - if (!_pixman_disabled ("sse2") && have_feature (SSE2_BITS)) - imp = _pixman_implementation_create_sse2 (imp); -#endif - -#ifdef USE_SSSE3 - if (!_pixman_disabled ("ssse3") && have_feature (SSSE3_BITS)) - imp = _pixman_implementation_create_ssse3 (imp); -#endif - - return imp; -} diff --git a/vendor/pixman/pixman/pixman.c b/vendor/pixman/pixman/pixman.c deleted file mode 100644 index 82ec236a6..000000000 --- a/vendor/pixman/pixman/pixman.c +++ /dev/null @@ -1,1134 +0,0 @@ -/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Author: Keith Packard, SuSE, Inc. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif -#include "pixman-private.h" - -#include - -pixman_implementation_t *global_implementation; - -#ifdef TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR -static void __attribute__((constructor)) -pixman_constructor (void) -{ - global_implementation = _pixman_choose_implementation (); -} -#endif - -typedef struct operator_info_t operator_info_t; - -struct operator_info_t -{ - uint8_t opaque_info[4]; -}; - -#define PACK(neither, src, dest, both) \ - {{ (uint8_t)PIXMAN_OP_ ## neither, \ - (uint8_t)PIXMAN_OP_ ## src, \ - (uint8_t)PIXMAN_OP_ ## dest, \ - (uint8_t)PIXMAN_OP_ ## both }} - -static const operator_info_t operator_table[] = -{ - /* Neither Opaque Src Opaque Dst Opaque Both Opaque */ - PACK (CLEAR, CLEAR, CLEAR, CLEAR), - PACK (SRC, SRC, SRC, SRC), - PACK (DST, DST, DST, DST), - PACK (OVER, SRC, OVER, SRC), - PACK (OVER_REVERSE, OVER_REVERSE, DST, DST), - PACK (IN, IN, SRC, SRC), - PACK (IN_REVERSE, DST, IN_REVERSE, DST), - PACK (OUT, OUT, CLEAR, CLEAR), - PACK (OUT_REVERSE, CLEAR, OUT_REVERSE, CLEAR), - PACK (ATOP, IN, OVER, SRC), - PACK (ATOP_REVERSE, OVER_REVERSE, IN_REVERSE, DST), - PACK (XOR, OUT, OUT_REVERSE, CLEAR), - PACK (ADD, ADD, ADD, ADD), - PACK (SATURATE, OVER_REVERSE, DST, DST), - - {{ 0 /* 0x0e */ }}, - {{ 0 /* 0x0f */ }}, - - PACK (CLEAR, CLEAR, CLEAR, CLEAR), - PACK (SRC, SRC, SRC, SRC), - PACK (DST, DST, DST, DST), - PACK (DISJOINT_OVER, DISJOINT_OVER, DISJOINT_OVER, DISJOINT_OVER), - PACK (DISJOINT_OVER_REVERSE, DISJOINT_OVER_REVERSE, DISJOINT_OVER_REVERSE, DISJOINT_OVER_REVERSE), - PACK (DISJOINT_IN, DISJOINT_IN, DISJOINT_IN, DISJOINT_IN), - PACK (DISJOINT_IN_REVERSE, DISJOINT_IN_REVERSE, DISJOINT_IN_REVERSE, DISJOINT_IN_REVERSE), - PACK (DISJOINT_OUT, DISJOINT_OUT, DISJOINT_OUT, DISJOINT_OUT), - PACK (DISJOINT_OUT_REVERSE, DISJOINT_OUT_REVERSE, DISJOINT_OUT_REVERSE, DISJOINT_OUT_REVERSE), - PACK (DISJOINT_ATOP, DISJOINT_ATOP, DISJOINT_ATOP, DISJOINT_ATOP), - PACK (DISJOINT_ATOP_REVERSE, DISJOINT_ATOP_REVERSE, DISJOINT_ATOP_REVERSE, DISJOINT_ATOP_REVERSE), - PACK (DISJOINT_XOR, DISJOINT_XOR, DISJOINT_XOR, DISJOINT_XOR), - - {{ 0 /* 0x1c */ }}, - {{ 0 /* 0x1d */ }}, - {{ 0 /* 0x1e */ }}, - {{ 0 /* 0x1f */ }}, - - PACK (CLEAR, CLEAR, CLEAR, CLEAR), - PACK (SRC, SRC, SRC, SRC), - PACK (DST, DST, DST, DST), - PACK (CONJOINT_OVER, CONJOINT_OVER, CONJOINT_OVER, CONJOINT_OVER), - PACK (CONJOINT_OVER_REVERSE, CONJOINT_OVER_REVERSE, CONJOINT_OVER_REVERSE, CONJOINT_OVER_REVERSE), - PACK (CONJOINT_IN, CONJOINT_IN, CONJOINT_IN, CONJOINT_IN), - PACK (CONJOINT_IN_REVERSE, CONJOINT_IN_REVERSE, CONJOINT_IN_REVERSE, CONJOINT_IN_REVERSE), - PACK (CONJOINT_OUT, CONJOINT_OUT, CONJOINT_OUT, CONJOINT_OUT), - PACK (CONJOINT_OUT_REVERSE, CONJOINT_OUT_REVERSE, CONJOINT_OUT_REVERSE, CONJOINT_OUT_REVERSE), - PACK (CONJOINT_ATOP, CONJOINT_ATOP, CONJOINT_ATOP, CONJOINT_ATOP), - PACK (CONJOINT_ATOP_REVERSE, CONJOINT_ATOP_REVERSE, CONJOINT_ATOP_REVERSE, CONJOINT_ATOP_REVERSE), - PACK (CONJOINT_XOR, CONJOINT_XOR, CONJOINT_XOR, CONJOINT_XOR), - - {{ 0 /* 0x2c */ }}, - {{ 0 /* 0x2d */ }}, - {{ 0 /* 0x2e */ }}, - {{ 0 /* 0x2f */ }}, - - PACK (MULTIPLY, MULTIPLY, MULTIPLY, MULTIPLY), - PACK (SCREEN, SCREEN, SCREEN, SCREEN), - PACK (OVERLAY, OVERLAY, OVERLAY, OVERLAY), - PACK (DARKEN, DARKEN, DARKEN, DARKEN), - PACK (LIGHTEN, LIGHTEN, LIGHTEN, LIGHTEN), - PACK (COLOR_DODGE, COLOR_DODGE, COLOR_DODGE, COLOR_DODGE), - PACK (COLOR_BURN, COLOR_BURN, COLOR_BURN, COLOR_BURN), - PACK (HARD_LIGHT, HARD_LIGHT, HARD_LIGHT, HARD_LIGHT), - PACK (SOFT_LIGHT, SOFT_LIGHT, SOFT_LIGHT, SOFT_LIGHT), - PACK (DIFFERENCE, DIFFERENCE, DIFFERENCE, DIFFERENCE), - PACK (EXCLUSION, EXCLUSION, EXCLUSION, EXCLUSION), - PACK (HSL_HUE, HSL_HUE, HSL_HUE, HSL_HUE), - PACK (HSL_SATURATION, HSL_SATURATION, HSL_SATURATION, HSL_SATURATION), - PACK (HSL_COLOR, HSL_COLOR, HSL_COLOR, HSL_COLOR), - PACK (HSL_LUMINOSITY, HSL_LUMINOSITY, HSL_LUMINOSITY, HSL_LUMINOSITY), -}; - -/* - * Optimize the current operator based on opacity of source or destination - * The output operator should be mathematically equivalent to the source. - */ -static pixman_op_t -optimize_operator (pixman_op_t op, - uint32_t src_flags, - uint32_t mask_flags, - uint32_t dst_flags) -{ - pixman_bool_t is_source_opaque, is_dest_opaque; - -#define OPAQUE_SHIFT 13 - - COMPILE_TIME_ASSERT (FAST_PATH_IS_OPAQUE == (1 << OPAQUE_SHIFT)); - - is_dest_opaque = (dst_flags & FAST_PATH_IS_OPAQUE); - is_source_opaque = ((src_flags & mask_flags) & FAST_PATH_IS_OPAQUE); - - is_dest_opaque >>= OPAQUE_SHIFT - 1; - is_source_opaque >>= OPAQUE_SHIFT; - - return operator_table[op].opaque_info[is_dest_opaque | is_source_opaque]; -} - -/* - * Computing composite region - */ -static inline pixman_bool_t -clip_general_image (pixman_region32_t * region, - pixman_region32_t * clip, - int dx, - int dy) -{ - if (pixman_region32_n_rects (region) == 1 && - pixman_region32_n_rects (clip) == 1) - { - pixman_box32_t * rbox = pixman_region32_rectangles (region, NULL); - pixman_box32_t * cbox = pixman_region32_rectangles (clip, NULL); - int v; - - if (rbox->x1 < (v = cbox->x1 + dx)) - rbox->x1 = v; - if (rbox->x2 > (v = cbox->x2 + dx)) - rbox->x2 = v; - if (rbox->y1 < (v = cbox->y1 + dy)) - rbox->y1 = v; - if (rbox->y2 > (v = cbox->y2 + dy)) - rbox->y2 = v; - if (rbox->x1 >= rbox->x2 || rbox->y1 >= rbox->y2) - { - pixman_region32_init (region); - return FALSE; - } - } - else if (pixman_region32_empty (clip)) - { - return FALSE; - } - else - { - if (dx || dy) - pixman_region32_translate (region, -dx, -dy); - - if (!pixman_region32_intersect (region, region, clip)) - return FALSE; - - if (dx || dy) - pixman_region32_translate (region, dx, dy); - } - - return pixman_region32_not_empty (region); -} - -static inline pixman_bool_t -clip_source_image (pixman_region32_t * region, - pixman_image_t * image, - int dx, - int dy) -{ - /* Source clips are ignored, unless they are explicitly turned on - * and the clip in question was set by an X client. (Because if - * the clip was not set by a client, then it is a hierarchy - * clip and those should always be ignored for sources). - */ - if (!image->common.clip_sources || !image->common.client_clip) - return TRUE; - - return clip_general_image (region, - &image->common.clip_region, - dx, dy); -} - -/* - * returns FALSE if the final region is empty. Indistinguishable from - * an allocation failure, but rendering ignores those anyways. - */ -pixman_bool_t -_pixman_compute_composite_region32 (pixman_region32_t * region, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dest_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - region->extents.x1 = dest_x; - region->extents.x2 = dest_x + width; - region->extents.y1 = dest_y; - region->extents.y2 = dest_y + height; - - region->extents.x1 = MAX (region->extents.x1, 0); - region->extents.y1 = MAX (region->extents.y1, 0); - region->extents.x2 = MIN (region->extents.x2, dest_image->bits.width); - region->extents.y2 = MIN (region->extents.y2, dest_image->bits.height); - - region->data = 0; - - /* Check for empty operation */ - if (region->extents.x1 >= region->extents.x2 || - region->extents.y1 >= region->extents.y2) - { - region->extents.x1 = 0; - region->extents.x2 = 0; - region->extents.y1 = 0; - region->extents.y2 = 0; - return FALSE; - } - - if (dest_image->common.have_clip_region) - { - if (!clip_general_image (region, &dest_image->common.clip_region, 0, 0)) - return FALSE; - } - - if (dest_image->common.alpha_map) - { - if (!pixman_region32_intersect_rect (region, region, - dest_image->common.alpha_origin_x, - dest_image->common.alpha_origin_y, - dest_image->common.alpha_map->width, - dest_image->common.alpha_map->height)) - { - return FALSE; - } - if (pixman_region32_empty (region)) - return FALSE; - if (dest_image->common.alpha_map->common.have_clip_region) - { - if (!clip_general_image (region, &dest_image->common.alpha_map->common.clip_region, - -dest_image->common.alpha_origin_x, - -dest_image->common.alpha_origin_y)) - { - return FALSE; - } - } - } - - /* clip against src */ - if (src_image->common.have_clip_region) - { - if (!clip_source_image (region, src_image, dest_x - src_x, dest_y - src_y)) - return FALSE; - } - if (src_image->common.alpha_map && src_image->common.alpha_map->common.have_clip_region) - { - if (!clip_source_image (region, (pixman_image_t *)src_image->common.alpha_map, - dest_x - (src_x - src_image->common.alpha_origin_x), - dest_y - (src_y - src_image->common.alpha_origin_y))) - { - return FALSE; - } - } - /* clip against mask */ - if (mask_image && mask_image->common.have_clip_region) - { - if (!clip_source_image (region, mask_image, dest_x - mask_x, dest_y - mask_y)) - return FALSE; - - if (mask_image->common.alpha_map && mask_image->common.alpha_map->common.have_clip_region) - { - if (!clip_source_image (region, (pixman_image_t *)mask_image->common.alpha_map, - dest_x - (mask_x - mask_image->common.alpha_origin_x), - dest_y - (mask_y - mask_image->common.alpha_origin_y))) - { - return FALSE; - } - } - } - - return TRUE; -} - -typedef struct box_48_16 box_48_16_t; - -struct box_48_16 -{ - pixman_fixed_48_16_t x1; - pixman_fixed_48_16_t y1; - pixman_fixed_48_16_t x2; - pixman_fixed_48_16_t y2; -}; - -static pixman_bool_t -compute_transformed_extents (pixman_transform_t *transform, - const pixman_box32_t *extents, - box_48_16_t *transformed) -{ - pixman_fixed_48_16_t tx1, ty1, tx2, ty2; - pixman_fixed_t x1, y1, x2, y2; - int i; - - x1 = pixman_int_to_fixed (extents->x1) + pixman_fixed_1 / 2; - y1 = pixman_int_to_fixed (extents->y1) + pixman_fixed_1 / 2; - x2 = pixman_int_to_fixed (extents->x2) - pixman_fixed_1 / 2; - y2 = pixman_int_to_fixed (extents->y2) - pixman_fixed_1 / 2; - - if (!transform) - { - transformed->x1 = x1; - transformed->y1 = y1; - transformed->x2 = x2; - transformed->y2 = y2; - - return TRUE; - } - - tx1 = ty1 = INT64_MAX; - tx2 = ty2 = INT64_MIN; - - for (i = 0; i < 4; ++i) - { - pixman_fixed_48_16_t tx, ty; - pixman_vector_t v; - - v.vector[0] = (i & 0x01)? x1 : x2; - v.vector[1] = (i & 0x02)? y1 : y2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point (transform, &v)) - return FALSE; - - tx = (pixman_fixed_48_16_t)v.vector[0]; - ty = (pixman_fixed_48_16_t)v.vector[1]; - - if (tx < tx1) - tx1 = tx; - if (ty < ty1) - ty1 = ty; - if (tx > tx2) - tx2 = tx; - if (ty > ty2) - ty2 = ty; - } - - transformed->x1 = tx1; - transformed->y1 = ty1; - transformed->x2 = tx2; - transformed->y2 = ty2; - - return TRUE; -} - -#define IS_16BIT(x) (((x) >= INT16_MIN) && ((x) <= INT16_MAX)) -#define ABS(f) (((f) < 0)? (-(f)) : (f)) -#define IS_16_16(f) (((f) >= pixman_min_fixed_48_16 && ((f) <= pixman_max_fixed_48_16))) - -static pixman_bool_t -analyze_extent (pixman_image_t *image, - const pixman_box32_t *extents, - uint32_t *flags) -{ - pixman_transform_t *transform; - pixman_fixed_t x_off, y_off; - pixman_fixed_t width, height; - pixman_fixed_t *params; - box_48_16_t transformed; - pixman_box32_t exp_extents; - - if (!image) - return TRUE; - - /* Some compositing functions walk one step - * outside the destination rectangle, so we - * check here that the expanded-by-one source - * extents in destination space fits in 16 bits - */ - if (!IS_16BIT (extents->x1 - 1) || - !IS_16BIT (extents->y1 - 1) || - !IS_16BIT (extents->x2 + 1) || - !IS_16BIT (extents->y2 + 1)) - { - return FALSE; - } - - transform = image->common.transform; - if (image->common.type == BITS) - { - /* During repeat mode calculations we might convert the - * width/height of an image to fixed 16.16, so we need - * them to be smaller than 16 bits. - */ - if (image->bits.width >= 0x7fff || image->bits.height >= 0x7fff) - return FALSE; - - if ((image->common.flags & FAST_PATH_ID_TRANSFORM) == FAST_PATH_ID_TRANSFORM && - extents->x1 >= 0 && - extents->y1 >= 0 && - extents->x2 <= image->bits.width && - extents->y2 <= image->bits.height) - { - *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; - return TRUE; - } - - switch (image->common.filter) - { - case PIXMAN_FILTER_CONVOLUTION: - params = image->common.filter_params; - x_off = - pixman_fixed_e - ((params[0] - pixman_fixed_1) >> 1); - y_off = - pixman_fixed_e - ((params[1] - pixman_fixed_1) >> 1); - width = params[0]; - height = params[1]; - break; - - case PIXMAN_FILTER_SEPARABLE_CONVOLUTION: - params = image->common.filter_params; - x_off = - pixman_fixed_e - ((params[0] - pixman_fixed_1) >> 1); - y_off = - pixman_fixed_e - ((params[1] - pixman_fixed_1) >> 1); - width = params[0]; - height = params[1]; - break; - - case PIXMAN_FILTER_GOOD: - case PIXMAN_FILTER_BEST: - case PIXMAN_FILTER_BILINEAR: - x_off = - pixman_fixed_1 / 2; - y_off = - pixman_fixed_1 / 2; - width = pixman_fixed_1; - height = pixman_fixed_1; - break; - - case PIXMAN_FILTER_FAST: - case PIXMAN_FILTER_NEAREST: - x_off = - pixman_fixed_e; - y_off = - pixman_fixed_e; - width = 0; - height = 0; - break; - - default: - return FALSE; - } - } - else - { - x_off = 0; - y_off = 0; - width = 0; - height = 0; - } - - if (!compute_transformed_extents (transform, extents, &transformed)) - return FALSE; - - if (image->common.type == BITS) - { - if (pixman_fixed_to_int (transformed.x1 - pixman_fixed_e) >= 0 && - pixman_fixed_to_int (transformed.y1 - pixman_fixed_e) >= 0 && - pixman_fixed_to_int (transformed.x2 - pixman_fixed_e) < image->bits.width && - pixman_fixed_to_int (transformed.y2 - pixman_fixed_e) < image->bits.height) - { - *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; - } - - if (pixman_fixed_to_int (transformed.x1 - pixman_fixed_1 / 2) >= 0 && - pixman_fixed_to_int (transformed.y1 - pixman_fixed_1 / 2) >= 0 && - pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2) < image->bits.width && - pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2) < image->bits.height) - { - *flags |= FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR; - } - } - - /* Check we don't overflow when the destination extents are expanded by one. - * This ensures that compositing functions can simply walk the source space - * using 16.16 variables without worrying about overflow. - */ - exp_extents = *extents; - exp_extents.x1 -= 1; - exp_extents.y1 -= 1; - exp_extents.x2 += 1; - exp_extents.y2 += 1; - - if (!compute_transformed_extents (transform, &exp_extents, &transformed)) - return FALSE; - - if (!IS_16_16 (transformed.x1 + x_off - 8 * pixman_fixed_e) || - !IS_16_16 (transformed.y1 + y_off - 8 * pixman_fixed_e) || - !IS_16_16 (transformed.x2 + x_off + 8 * pixman_fixed_e + width) || - !IS_16_16 (transformed.y2 + y_off + 8 * pixman_fixed_e + height)) - { - return FALSE; - } - - return TRUE; -} - -/* - * Work around GCC bug causing crashes in Mozilla with SSE2 - * - * When using -msse, gcc generates movdqa instructions assuming that - * the stack is 16 byte aligned. Unfortunately some applications, such - * as Mozilla and Mono, end up aligning the stack to 4 bytes, which - * causes the movdqa instructions to fail. - * - * The __force_align_arg_pointer__ makes gcc generate a prologue that - * realigns the stack pointer to 16 bytes. - * - * On x86-64 this is not necessary because the standard ABI already - * calls for a 16 byte aligned stack. - * - * See https://bugs.freedesktop.org/show_bug.cgi?id=15693 - */ -#if defined (USE_SSE2) && defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) -__attribute__((__force_align_arg_pointer__)) -#endif -PIXMAN_EXPORT void -pixman_image_composite32 (pixman_op_t op, - pixman_image_t * src, - pixman_image_t * mask, - pixman_image_t * dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - pixman_format_code_t src_format, mask_format, dest_format; - pixman_region32_t region; - pixman_box32_t extents; - pixman_implementation_t *imp; - pixman_composite_func_t func; - pixman_composite_info_t info; - const pixman_box32_t *pbox; - int n; - - _pixman_image_validate (src); - if (mask) - _pixman_image_validate (mask); - _pixman_image_validate (dest); - - src_format = src->common.extended_format_code; - info.src_flags = src->common.flags; - - if (mask && !(mask->common.flags & FAST_PATH_IS_OPAQUE)) - { - mask_format = mask->common.extended_format_code; - info.mask_flags = mask->common.flags; - } - else - { - mask_format = PIXMAN_null; - info.mask_flags = FAST_PATH_IS_OPAQUE | FAST_PATH_NO_ALPHA_MAP; - } - - dest_format = dest->common.extended_format_code; - info.dest_flags = dest->common.flags; - - /* Check for pixbufs */ - if ((mask_format == PIXMAN_a8r8g8b8 || mask_format == PIXMAN_a8b8g8r8) && - (src->type == BITS && src->bits.bits == mask->bits.bits) && - (src->common.repeat == mask->common.repeat) && - (info.src_flags & info.mask_flags & FAST_PATH_ID_TRANSFORM) && - (src_x == mask_x && src_y == mask_y)) - { - if (src_format == PIXMAN_x8b8g8r8) - src_format = mask_format = PIXMAN_pixbuf; - else if (src_format == PIXMAN_x8r8g8b8) - src_format = mask_format = PIXMAN_rpixbuf; - } - - pixman_region32_init (®ion); - - if (!_pixman_compute_composite_region32 ( - ®ion, src, mask, dest, - src_x, src_y, mask_x, mask_y, dest_x, dest_y, width, height)) - { - goto out; - } - - extents = *pixman_region32_extents (®ion); - - extents.x1 -= dest_x - src_x; - extents.y1 -= dest_y - src_y; - extents.x2 -= dest_x - src_x; - extents.y2 -= dest_y - src_y; - - if (!analyze_extent (src, &extents, &info.src_flags)) - goto out; - - extents.x1 -= src_x - mask_x; - extents.y1 -= src_y - mask_y; - extents.x2 -= src_x - mask_x; - extents.y2 -= src_y - mask_y; - - if (!analyze_extent (mask, &extents, &info.mask_flags)) - goto out; - - /* If the clip is within the source samples, and the samples are - * opaque, then the source is effectively opaque. - */ -#define NEAREST_OPAQUE (FAST_PATH_SAMPLES_OPAQUE | \ - FAST_PATH_NEAREST_FILTER | \ - FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) -#define BILINEAR_OPAQUE (FAST_PATH_SAMPLES_OPAQUE | \ - FAST_PATH_BILINEAR_FILTER | \ - FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR) - - if ((info.src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || - (info.src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) - { - info.src_flags |= FAST_PATH_IS_OPAQUE; - } - - if ((info.mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || - (info.mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) - { - info.mask_flags |= FAST_PATH_IS_OPAQUE; - } - - /* - * Check if we can replace our operator by a simpler one - * if the src or dest are opaque. The output operator should be - * mathematically equivalent to the source. - */ - info.op = optimize_operator (op, info.src_flags, info.mask_flags, info.dest_flags); - - _pixman_implementation_lookup_composite ( - get_implementation (), info.op, - src_format, info.src_flags, - mask_format, info.mask_flags, - dest_format, info.dest_flags, - &imp, &func); - - info.src_image = src; - info.mask_image = mask; - info.dest_image = dest; - - pbox = pixman_region32_rectangles (®ion, &n); - - while (n--) - { - info.src_x = pbox->x1 + src_x - dest_x; - info.src_y = pbox->y1 + src_y - dest_y; - info.mask_x = pbox->x1 + mask_x - dest_x; - info.mask_y = pbox->y1 + mask_y - dest_y; - info.dest_x = pbox->x1; - info.dest_y = pbox->y1; - info.width = pbox->x2 - pbox->x1; - info.height = pbox->y2 - pbox->y1; - - func (imp, &info); - - pbox++; - } - -out: - pixman_region32_fini (®ion); -} - -PIXMAN_EXPORT void -pixman_image_composite (pixman_op_t op, - pixman_image_t * src, - pixman_image_t * mask, - pixman_image_t * dest, - int16_t src_x, - int16_t src_y, - int16_t mask_x, - int16_t mask_y, - int16_t dest_x, - int16_t dest_y, - uint16_t width, - uint16_t height) -{ - pixman_image_composite32 (op, src, mask, dest, src_x, src_y, - mask_x, mask_y, dest_x, dest_y, width, height); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_blt (uint32_t *src_bits, - uint32_t *dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dest_x, - int dest_y, - int width, - int height) -{ - return _pixman_implementation_blt (get_implementation(), - src_bits, dst_bits, src_stride, dst_stride, - src_bpp, dst_bpp, - src_x, src_y, - dest_x, dest_y, - width, height); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_fill (uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t filler) -{ - return _pixman_implementation_fill ( - get_implementation(), bits, stride, bpp, x, y, width, height, filler); -} - -static uint32_t -color_to_uint32 (const pixman_color_t *color) -{ - return - (color->alpha >> 8 << 24) | - (color->red >> 8 << 16) | - (color->green & 0xff00) | - (color->blue >> 8); -} - -static pixman_bool_t -color_to_pixel (const pixman_color_t *color, - uint32_t * pixel, - pixman_format_code_t format) -{ - uint32_t c = color_to_uint32 (color); - - if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_RGBA_FLOAT) - { - return FALSE; - } - - if (!(format == PIXMAN_a8r8g8b8 || - format == PIXMAN_x8r8g8b8 || - format == PIXMAN_a8b8g8r8 || - format == PIXMAN_x8b8g8r8 || - format == PIXMAN_b8g8r8a8 || - format == PIXMAN_b8g8r8x8 || - format == PIXMAN_r8g8b8a8 || - format == PIXMAN_r8g8b8x8 || - format == PIXMAN_r5g6b5 || - format == PIXMAN_b5g6r5 || - format == PIXMAN_a8 || - format == PIXMAN_a1)) - { - return FALSE; - } - - if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_ABGR) - { - c = ((c & 0xff000000) >> 0) | - ((c & 0x00ff0000) >> 16) | - ((c & 0x0000ff00) >> 0) | - ((c & 0x000000ff) << 16); - } - if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_BGRA) - { - c = ((c & 0xff000000) >> 24) | - ((c & 0x00ff0000) >> 8) | - ((c & 0x0000ff00) << 8) | - ((c & 0x000000ff) << 24); - } - if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_RGBA) - c = ((c & 0xff000000) >> 24) | (c << 8); - - if (format == PIXMAN_a1) - c = c >> 31; - else if (format == PIXMAN_a8) - c = c >> 24; - else if (format == PIXMAN_r5g6b5 || - format == PIXMAN_b5g6r5) - c = convert_8888_to_0565 (c); - -#if 0 - printf ("color: %x %x %x %x\n", color->alpha, color->red, color->green, color->blue); - printf ("pixel: %x\n", c); -#endif - - *pixel = c; - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_image_fill_rectangles (pixman_op_t op, - pixman_image_t * dest, - const pixman_color_t * color, - int n_rects, - const pixman_rectangle16_t *rects) -{ - pixman_box32_t stack_boxes[6]; - pixman_box32_t *boxes; - pixman_bool_t result; - int i; - - if (n_rects > 6) - { - boxes = pixman_malloc_ab (sizeof (pixman_box32_t), n_rects); - if (boxes == NULL) - return FALSE; - } - else - { - boxes = stack_boxes; - } - - for (i = 0; i < n_rects; ++i) - { - boxes[i].x1 = rects[i].x; - boxes[i].y1 = rects[i].y; - boxes[i].x2 = boxes[i].x1 + rects[i].width; - boxes[i].y2 = boxes[i].y1 + rects[i].height; - } - - result = pixman_image_fill_boxes (op, dest, color, n_rects, boxes); - - if (boxes != stack_boxes) - free (boxes); - - return result; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_image_fill_boxes (pixman_op_t op, - pixman_image_t * dest, - const pixman_color_t *color, - int n_boxes, - const pixman_box32_t *boxes) -{ - pixman_image_t *solid; - pixman_color_t c; - int i; - - _pixman_image_validate (dest); - - if (color->alpha == 0xffff) - { - if (op == PIXMAN_OP_OVER) - op = PIXMAN_OP_SRC; - } - - if (op == PIXMAN_OP_CLEAR) - { - c.red = 0; - c.green = 0; - c.blue = 0; - c.alpha = 0; - - color = &c; - - op = PIXMAN_OP_SRC; - } - - if (op == PIXMAN_OP_SRC) - { - uint32_t pixel; - - if (color_to_pixel (color, &pixel, dest->bits.format)) - { - pixman_region32_t fill_region; - int n_rects, j; - pixman_box32_t *rects; - - if (!pixman_region32_init_rects (&fill_region, boxes, n_boxes)) - return FALSE; - - if (dest->common.have_clip_region) - { - if (!pixman_region32_intersect (&fill_region, - &fill_region, - &dest->common.clip_region)) - return FALSE; - } - - rects = pixman_region32_rectangles (&fill_region, &n_rects); - for (j = 0; j < n_rects; ++j) - { - const pixman_box32_t *rect = &(rects[j]); - pixman_fill (dest->bits.bits, dest->bits.rowstride, PIXMAN_FORMAT_BPP (dest->bits.format), - rect->x1, rect->y1, rect->x2 - rect->x1, rect->y2 - rect->y1, - pixel); - } - - pixman_region32_fini (&fill_region); - return TRUE; - } - } - - solid = pixman_image_create_solid_fill (color); - if (!solid) - return FALSE; - - for (i = 0; i < n_boxes; ++i) - { - const pixman_box32_t *box = &(boxes[i]); - - pixman_image_composite32 (op, solid, NULL, dest, - 0, 0, 0, 0, - box->x1, box->y1, - box->x2 - box->x1, box->y2 - box->y1); - } - - pixman_image_unref (solid); - - return TRUE; -} - -/** - * pixman_version: - * - * Returns the version of the pixman library encoded in a single - * integer as per %PIXMAN_VERSION_ENCODE. The encoding ensures that - * later versions compare greater than earlier versions. - * - * A run-time comparison to check that pixman's version is greater than - * or equal to version X.Y.Z could be performed as follows: - * - * - * if (pixman_version() >= PIXMAN_VERSION_ENCODE(X,Y,Z)) {...} - * - * - * See also pixman_version_string() as well as the compile-time - * equivalents %PIXMAN_VERSION and %PIXMAN_VERSION_STRING. - * - * Return value: the encoded version. - **/ -PIXMAN_EXPORT int -pixman_version (void) -{ - return PIXMAN_VERSION; -} - -/** - * pixman_version_string: - * - * Returns the version of the pixman library as a human-readable string - * of the form "X.Y.Z". - * - * See also pixman_version() as well as the compile-time equivalents - * %PIXMAN_VERSION_STRING and %PIXMAN_VERSION. - * - * Return value: a string containing the version. - **/ -PIXMAN_EXPORT const char* -pixman_version_string (void) -{ - return PIXMAN_VERSION_STRING; -} - -/** - * pixman_format_supported_source: - * @format: A pixman_format_code_t format - * - * Return value: whether the provided format code is a supported - * format for a pixman surface used as a source in - * rendering. - * - * Currently, all pixman_format_code_t values are supported. - **/ -PIXMAN_EXPORT pixman_bool_t -pixman_format_supported_source (pixman_format_code_t format) -{ - switch (format) - { - /* 32 bpp formats */ - case PIXMAN_a2b10g10r10: - case PIXMAN_x2b10g10r10: - case PIXMAN_a2r10g10b10: - case PIXMAN_x2r10g10b10: - case PIXMAN_a8r8g8b8: - case PIXMAN_a8r8g8b8_sRGB: - case PIXMAN_r8g8b8_sRGB: - case PIXMAN_x8r8g8b8: - case PIXMAN_a8b8g8r8: - case PIXMAN_x8b8g8r8: - case PIXMAN_b8g8r8a8: - case PIXMAN_b8g8r8x8: - case PIXMAN_r8g8b8a8: - case PIXMAN_r8g8b8x8: - case PIXMAN_r8g8b8: - case PIXMAN_b8g8r8: - case PIXMAN_r5g6b5: - case PIXMAN_b5g6r5: - case PIXMAN_x14r6g6b6: - /* 16 bpp formats */ - case PIXMAN_a1r5g5b5: - case PIXMAN_x1r5g5b5: - case PIXMAN_a1b5g5r5: - case PIXMAN_x1b5g5r5: - case PIXMAN_a4r4g4b4: - case PIXMAN_x4r4g4b4: - case PIXMAN_a4b4g4r4: - case PIXMAN_x4b4g4r4: - /* 8bpp formats */ - case PIXMAN_a8: - case PIXMAN_r3g3b2: - case PIXMAN_b2g3r3: - case PIXMAN_a2r2g2b2: - case PIXMAN_a2b2g2r2: - case PIXMAN_c8: - case PIXMAN_g8: - case PIXMAN_x4a4: - /* Collides with PIXMAN_c8 - case PIXMAN_x4c4: - */ - /* Collides with PIXMAN_g8 - case PIXMAN_x4g4: - */ - /* 4bpp formats */ - case PIXMAN_a4: - case PIXMAN_r1g2b1: - case PIXMAN_b1g2r1: - case PIXMAN_a1r1g1b1: - case PIXMAN_a1b1g1r1: - case PIXMAN_c4: - case PIXMAN_g4: - /* 1bpp formats */ - case PIXMAN_a1: - case PIXMAN_g1: - /* YUV formats */ - case PIXMAN_yuy2: - case PIXMAN_yv12: - return TRUE; - - default: - return FALSE; - } -} - -/** - * pixman_format_supported_destination: - * @format: A pixman_format_code_t format - * - * Return value: whether the provided format code is a supported - * format for a pixman surface used as a destination in - * rendering. - * - * Currently, all pixman_format_code_t values are supported - * except for the YUV formats. - **/ -PIXMAN_EXPORT pixman_bool_t -pixman_format_supported_destination (pixman_format_code_t format) -{ - /* YUV formats cannot be written to at the moment */ - if (format == PIXMAN_yuy2 || format == PIXMAN_yv12) - return FALSE; - - return pixman_format_supported_source (format); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_compute_composite_region (pixman_region16_t * region, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dest_image, - int16_t src_x, - int16_t src_y, - int16_t mask_x, - int16_t mask_y, - int16_t dest_x, - int16_t dest_y, - uint16_t width, - uint16_t height) -{ - pixman_region32_t r32; - pixman_bool_t retval; - - pixman_region32_init (&r32); - - retval = _pixman_compute_composite_region32 ( - &r32, src_image, mask_image, dest_image, - src_x, src_y, mask_x, mask_y, dest_x, dest_y, - width, height); - - if (retval) - { - if (!pixman_region16_copy_from_region32 (region, &r32)) - retval = FALSE; - } - - pixman_region32_fini (&r32); - return retval; -} diff --git a/vendor/pixman/pixman/pixman.h b/vendor/pixman/pixman/pixman.h deleted file mode 100644 index d697b5357..000000000 --- a/vendor/pixman/pixman/pixman.h +++ /dev/null @@ -1,1426 +0,0 @@ -/*********************************************************** - -Copyright 1987, 1998 The Open Group - -Permission to use, copy, modify, distribute, and sell this software and its -documentation for any purpose is hereby granted without fee, provided that -the above copyright notice appear in all copies and that both that -copyright notice and this permission notice appear in supporting -documentation. - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN -AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -Except as contained in this notice, the name of The Open Group shall not be -used in advertising or otherwise to promote the sale, use or other dealings -in this Software without prior written authorization from The Open Group. - -Copyright 1987 by Digital Equipment Corporation, Maynard, Massachusetts. - - All Rights Reserved - -Permission to use, copy, modify, and distribute this software and its -documentation for any purpose and without fee is hereby granted, -provided that the above copyright notice appear in all copies and that -both that copyright notice and this permission notice appear in -supporting documentation, and that the name of Digital not be -used in advertising or publicity pertaining to distribution of the -software without specific, written prior permission. - -DIGITAL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL -DIGITAL BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR -ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS -SOFTWARE. - -******************************************************************/ -/* - * Copyright © 1998, 2004 Keith Packard - * Copyright 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef PIXMAN_H__ -#define PIXMAN_H__ - -#include - -#ifdef __cplusplus -#define PIXMAN_BEGIN_DECLS extern "C" { -#define PIXMAN_END_DECLS } -#else -#define PIXMAN_BEGIN_DECLS -#define PIXMAN_END_DECLS -#endif - -PIXMAN_BEGIN_DECLS - -/* - * Standard integers - */ - -#if !defined (PIXMAN_DONT_DEFINE_STDINT) - -#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) || defined (__sun) || defined (sun) || defined (__digital__) || defined (__HP_cc) -# include -/* VS 2010 (_MSC_VER 1600) has stdint.h */ -#elif defined (_MSC_VER) && _MSC_VER < 1600 -typedef __int8 int8_t; -typedef unsigned __int8 uint8_t; -typedef __int16 int16_t; -typedef unsigned __int16 uint16_t; -typedef __int32 int32_t; -typedef unsigned __int32 uint32_t; -typedef __int64 int64_t; -typedef unsigned __int64 uint64_t; -#elif defined (_AIX) -# include -#else -# include -#endif - -#endif - -/* - * Boolean - */ -typedef int pixman_bool_t; - -/* - * Fixpoint numbers - */ -typedef int64_t pixman_fixed_32_32_t; -typedef pixman_fixed_32_32_t pixman_fixed_48_16_t; -typedef uint32_t pixman_fixed_1_31_t; -typedef uint32_t pixman_fixed_1_16_t; -typedef int32_t pixman_fixed_16_16_t; -typedef pixman_fixed_16_16_t pixman_fixed_t; - -#define pixman_fixed_e ((pixman_fixed_t) 1) -#define pixman_fixed_1 (pixman_int_to_fixed(1)) -#define pixman_fixed_1_minus_e (pixman_fixed_1 - pixman_fixed_e) -#define pixman_fixed_minus_1 (pixman_int_to_fixed(-1)) -#define pixman_fixed_to_int(f) ((int) ((f) >> 16)) -#define pixman_int_to_fixed(i) ((pixman_fixed_t) ((uint32_t) (i) << 16)) -#define pixman_fixed_to_double(f) (double) ((f) / (double) pixman_fixed_1) -#define pixman_double_to_fixed(d) ((pixman_fixed_t) ((d) * 65536.0)) -#define pixman_fixed_frac(f) ((f) & pixman_fixed_1_minus_e) -#define pixman_fixed_floor(f) ((f) & ~pixman_fixed_1_minus_e) -#define pixman_fixed_ceil(f) pixman_fixed_floor ((f) + pixman_fixed_1_minus_e) -#define pixman_fixed_fraction(f) ((f) & pixman_fixed_1_minus_e) -#define pixman_fixed_mod_2(f) ((f) & (pixman_fixed1 | pixman_fixed_1_minus_e)) -#define pixman_max_fixed_48_16 ((pixman_fixed_48_16_t) 0x7fffffff) -#define pixman_min_fixed_48_16 (-((pixman_fixed_48_16_t) 1 << 31)) - -/* - * Misc structs - */ -typedef struct pixman_color pixman_color_t; -typedef struct pixman_point_fixed pixman_point_fixed_t; -typedef struct pixman_line_fixed pixman_line_fixed_t; -typedef struct pixman_vector pixman_vector_t; -typedef struct pixman_transform pixman_transform_t; - -struct pixman_color -{ - uint16_t red; - uint16_t green; - uint16_t blue; - uint16_t alpha; -}; - -struct pixman_point_fixed -{ - pixman_fixed_t x; - pixman_fixed_t y; -}; - -struct pixman_line_fixed -{ - pixman_point_fixed_t p1, p2; -}; - -/* - * Fixed point matrices - */ - -struct pixman_vector -{ - pixman_fixed_t vector[3]; -}; - -struct pixman_transform -{ - pixman_fixed_t matrix[3][3]; -}; - -/* forward declaration (sorry) */ -struct pixman_box16; -typedef union pixman_image pixman_image_t; - -PIXMAN_API -void pixman_transform_init_identity (struct pixman_transform *matrix); - -PIXMAN_API -pixman_bool_t pixman_transform_point_3d (const struct pixman_transform *transform, - struct pixman_vector *vector); - -PIXMAN_API -pixman_bool_t pixman_transform_point (const struct pixman_transform *transform, - struct pixman_vector *vector); - -PIXMAN_API -pixman_bool_t pixman_transform_multiply (struct pixman_transform *dst, - const struct pixman_transform *l, - const struct pixman_transform *r); - -PIXMAN_API -void pixman_transform_init_scale (struct pixman_transform *t, - pixman_fixed_t sx, - pixman_fixed_t sy); - -PIXMAN_API -pixman_bool_t pixman_transform_scale (struct pixman_transform *forward, - struct pixman_transform *reverse, - pixman_fixed_t sx, - pixman_fixed_t sy); - -PIXMAN_API -void pixman_transform_init_rotate (struct pixman_transform *t, - pixman_fixed_t cos, - pixman_fixed_t sin); - -PIXMAN_API -pixman_bool_t pixman_transform_rotate (struct pixman_transform *forward, - struct pixman_transform *reverse, - pixman_fixed_t c, - pixman_fixed_t s); - -PIXMAN_API -void pixman_transform_init_translate (struct pixman_transform *t, - pixman_fixed_t tx, - pixman_fixed_t ty); - -PIXMAN_API -pixman_bool_t pixman_transform_translate (struct pixman_transform *forward, - struct pixman_transform *reverse, - pixman_fixed_t tx, - pixman_fixed_t ty); - -PIXMAN_API -pixman_bool_t pixman_transform_bounds (const struct pixman_transform *matrix, - struct pixman_box16 *b); - -PIXMAN_API -pixman_bool_t pixman_transform_invert (struct pixman_transform *dst, - const struct pixman_transform *src); - -PIXMAN_API -pixman_bool_t pixman_transform_is_identity (const struct pixman_transform *t); - -PIXMAN_API -pixman_bool_t pixman_transform_is_scale (const struct pixman_transform *t); - -PIXMAN_API -pixman_bool_t pixman_transform_is_int_translate (const struct pixman_transform *t); - -PIXMAN_API -pixman_bool_t pixman_transform_is_inverse (const struct pixman_transform *a, - const struct pixman_transform *b); - -/* - * Floating point matrices - */ -typedef struct pixman_f_transform pixman_f_transform_t; -typedef struct pixman_f_vector pixman_f_vector_t; - -struct pixman_f_vector -{ - double v[3]; -}; - -struct pixman_f_transform -{ - double m[3][3]; -}; - - -PIXMAN_API -pixman_bool_t pixman_transform_from_pixman_f_transform (struct pixman_transform *t, - const struct pixman_f_transform *ft); - -PIXMAN_API -void pixman_f_transform_from_pixman_transform (struct pixman_f_transform *ft, - const struct pixman_transform *t); - -PIXMAN_API -pixman_bool_t pixman_f_transform_invert (struct pixman_f_transform *dst, - const struct pixman_f_transform *src); - -PIXMAN_API -pixman_bool_t pixman_f_transform_point (const struct pixman_f_transform *t, - struct pixman_f_vector *v); - -PIXMAN_API -void pixman_f_transform_point_3d (const struct pixman_f_transform *t, - struct pixman_f_vector *v); - -PIXMAN_API -void pixman_f_transform_multiply (struct pixman_f_transform *dst, - const struct pixman_f_transform *l, - const struct pixman_f_transform *r); - -PIXMAN_API -void pixman_f_transform_init_scale (struct pixman_f_transform *t, - double sx, - double sy); - -PIXMAN_API -pixman_bool_t pixman_f_transform_scale (struct pixman_f_transform *forward, - struct pixman_f_transform *reverse, - double sx, - double sy); - -PIXMAN_API -void pixman_f_transform_init_rotate (struct pixman_f_transform *t, - double cos, - double sin); - -PIXMAN_API -pixman_bool_t pixman_f_transform_rotate (struct pixman_f_transform *forward, - struct pixman_f_transform *reverse, - double c, - double s); - -PIXMAN_API -void pixman_f_transform_init_translate (struct pixman_f_transform *t, - double tx, - double ty); - -PIXMAN_API -pixman_bool_t pixman_f_transform_translate (struct pixman_f_transform *forward, - struct pixman_f_transform *reverse, - double tx, - double ty); - -PIXMAN_API -pixman_bool_t pixman_f_transform_bounds (const struct pixman_f_transform *t, - struct pixman_box16 *b); - -PIXMAN_API -void pixman_f_transform_init_identity (struct pixman_f_transform *t); - -typedef enum -{ - PIXMAN_REPEAT_NONE, - PIXMAN_REPEAT_NORMAL, - PIXMAN_REPEAT_PAD, - PIXMAN_REPEAT_REFLECT -} pixman_repeat_t; - -typedef enum -{ - PIXMAN_DITHER_NONE, - PIXMAN_DITHER_FAST, - PIXMAN_DITHER_GOOD, - PIXMAN_DITHER_BEST, - PIXMAN_DITHER_ORDERED_BAYER_8, - PIXMAN_DITHER_ORDERED_BLUE_NOISE_64, -} pixman_dither_t; - -typedef enum -{ - PIXMAN_FILTER_FAST, - PIXMAN_FILTER_GOOD, - PIXMAN_FILTER_BEST, - PIXMAN_FILTER_NEAREST, - PIXMAN_FILTER_BILINEAR, - PIXMAN_FILTER_CONVOLUTION, - - /* The SEPARABLE_CONVOLUTION filter takes the following parameters: - * - * width: integer given as 16.16 fixpoint number - * height: integer given as 16.16 fixpoint number - * x_phase_bits: integer given as 16.16 fixpoint - * y_phase_bits: integer given as 16.16 fixpoint - * xtables: (1 << x_phase_bits) tables of size width - * ytables: (1 << y_phase_bits) tables of size height - * - * When sampling at (x, y), the location is first rounded to one of - * n_x_phases * n_y_phases subpixel positions. These subpixel positions - * determine an xtable and a ytable to use. - * - * Conceptually a width x height matrix is then formed in which each entry - * is the product of the corresponding entries in the x and y tables. - * This matrix is then aligned with the image pixels such that its center - * is as close as possible to the subpixel location chosen earlier. Then - * the image is convolved with the matrix and the resulting pixel returned. - */ - PIXMAN_FILTER_SEPARABLE_CONVOLUTION -} pixman_filter_t; - -typedef enum -{ - PIXMAN_OP_CLEAR = 0x00, - PIXMAN_OP_SRC = 0x01, - PIXMAN_OP_DST = 0x02, - PIXMAN_OP_OVER = 0x03, - PIXMAN_OP_OVER_REVERSE = 0x04, - PIXMAN_OP_IN = 0x05, - PIXMAN_OP_IN_REVERSE = 0x06, - PIXMAN_OP_OUT = 0x07, - PIXMAN_OP_OUT_REVERSE = 0x08, - PIXMAN_OP_ATOP = 0x09, - PIXMAN_OP_ATOP_REVERSE = 0x0a, - PIXMAN_OP_XOR = 0x0b, - PIXMAN_OP_ADD = 0x0c, - PIXMAN_OP_SATURATE = 0x0d, - - PIXMAN_OP_DISJOINT_CLEAR = 0x10, - PIXMAN_OP_DISJOINT_SRC = 0x11, - PIXMAN_OP_DISJOINT_DST = 0x12, - PIXMAN_OP_DISJOINT_OVER = 0x13, - PIXMAN_OP_DISJOINT_OVER_REVERSE = 0x14, - PIXMAN_OP_DISJOINT_IN = 0x15, - PIXMAN_OP_DISJOINT_IN_REVERSE = 0x16, - PIXMAN_OP_DISJOINT_OUT = 0x17, - PIXMAN_OP_DISJOINT_OUT_REVERSE = 0x18, - PIXMAN_OP_DISJOINT_ATOP = 0x19, - PIXMAN_OP_DISJOINT_ATOP_REVERSE = 0x1a, - PIXMAN_OP_DISJOINT_XOR = 0x1b, - - PIXMAN_OP_CONJOINT_CLEAR = 0x20, - PIXMAN_OP_CONJOINT_SRC = 0x21, - PIXMAN_OP_CONJOINT_DST = 0x22, - PIXMAN_OP_CONJOINT_OVER = 0x23, - PIXMAN_OP_CONJOINT_OVER_REVERSE = 0x24, - PIXMAN_OP_CONJOINT_IN = 0x25, - PIXMAN_OP_CONJOINT_IN_REVERSE = 0x26, - PIXMAN_OP_CONJOINT_OUT = 0x27, - PIXMAN_OP_CONJOINT_OUT_REVERSE = 0x28, - PIXMAN_OP_CONJOINT_ATOP = 0x29, - PIXMAN_OP_CONJOINT_ATOP_REVERSE = 0x2a, - PIXMAN_OP_CONJOINT_XOR = 0x2b, - - PIXMAN_OP_MULTIPLY = 0x30, - PIXMAN_OP_SCREEN = 0x31, - PIXMAN_OP_OVERLAY = 0x32, - PIXMAN_OP_DARKEN = 0x33, - PIXMAN_OP_LIGHTEN = 0x34, - PIXMAN_OP_COLOR_DODGE = 0x35, - PIXMAN_OP_COLOR_BURN = 0x36, - PIXMAN_OP_HARD_LIGHT = 0x37, - PIXMAN_OP_SOFT_LIGHT = 0x38, - PIXMAN_OP_DIFFERENCE = 0x39, - PIXMAN_OP_EXCLUSION = 0x3a, - PIXMAN_OP_HSL_HUE = 0x3b, - PIXMAN_OP_HSL_SATURATION = 0x3c, - PIXMAN_OP_HSL_COLOR = 0x3d, - PIXMAN_OP_HSL_LUMINOSITY = 0x3e - -#ifdef PIXMAN_USE_INTERNAL_API - , - PIXMAN_N_OPERATORS, - PIXMAN_OP_NONE = PIXMAN_N_OPERATORS -#endif -} pixman_op_t; - -/* - * Regions - */ -typedef struct pixman_region16_data pixman_region16_data_t; -typedef struct pixman_box16 pixman_box16_t; -typedef struct pixman_rectangle16 pixman_rectangle16_t; -typedef struct pixman_region16 pixman_region16_t; - -struct pixman_region16_data { - long size; - long numRects; -/* pixman_box16_t rects[size]; in memory but not explicitly declared */ -}; - -struct pixman_rectangle16 -{ - int16_t x, y; - uint16_t width, height; -}; - -struct pixman_box16 -{ - int16_t x1, y1, x2, y2; -}; - -struct pixman_region16 -{ - pixman_box16_t extents; - pixman_region16_data_t *data; -}; - -typedef enum -{ - PIXMAN_REGION_OUT, - PIXMAN_REGION_IN, - PIXMAN_REGION_PART -} pixman_region_overlap_t; - -/* This function exists only to make it possible to preserve - * the X ABI - it should go away at first opportunity. - */ -PIXMAN_API -void pixman_region_set_static_pointers (pixman_box16_t *empty_box, - pixman_region16_data_t *empty_data, - pixman_region16_data_t *broken_data); - -/* creation/destruction */ -PIXMAN_API -void pixman_region_init (pixman_region16_t *region); - -PIXMAN_API -void pixman_region_init_rect (pixman_region16_t *region, - int x, - int y, - unsigned int width, - unsigned int height); - -PIXMAN_API -pixman_bool_t pixman_region_init_rects (pixman_region16_t *region, - const pixman_box16_t *boxes, - int count); - -PIXMAN_API -void pixman_region_init_with_extents (pixman_region16_t *region, - const pixman_box16_t *extents); - -PIXMAN_API -void pixman_region_init_from_image (pixman_region16_t *region, - pixman_image_t *image); - -PIXMAN_API -void pixman_region_fini (pixman_region16_t *region); - - -/* manipulation */ -PIXMAN_API -void pixman_region_translate (pixman_region16_t *region, - int x, - int y); - -PIXMAN_API -pixman_bool_t pixman_region_copy (pixman_region16_t *dest, - const pixman_region16_t *source); - -PIXMAN_API -pixman_bool_t pixman_region_intersect (pixman_region16_t *new_reg, - const pixman_region16_t *reg1, - const pixman_region16_t *reg2); - -PIXMAN_API -pixman_bool_t pixman_region_union (pixman_region16_t *new_reg, - const pixman_region16_t *reg1, - const pixman_region16_t *reg2); - -PIXMAN_API -pixman_bool_t pixman_region_union_rect (pixman_region16_t *dest, - const pixman_region16_t *source, - int x, - int y, - unsigned int width, - unsigned int height); - -PIXMAN_API -pixman_bool_t pixman_region_intersect_rect (pixman_region16_t *dest, - const pixman_region16_t *source, - int x, - int y, - unsigned int width, - unsigned int height); - -PIXMAN_API -pixman_bool_t pixman_region_subtract (pixman_region16_t *reg_d, - const pixman_region16_t *reg_m, - const pixman_region16_t *reg_s); - -PIXMAN_API -pixman_bool_t pixman_region_inverse (pixman_region16_t *new_reg, - const pixman_region16_t *reg1, - const pixman_box16_t *inv_rect); - -PIXMAN_API -pixman_bool_t pixman_region_contains_point (const pixman_region16_t *region, - int x, - int y, - pixman_box16_t *box); - -PIXMAN_API -pixman_region_overlap_t pixman_region_contains_rectangle (const pixman_region16_t *region, - const pixman_box16_t *prect); - -PIXMAN_API -pixman_bool_t pixman_region_empty (const pixman_region16_t *region); - -PIXMAN_API -pixman_bool_t pixman_region_not_empty (const pixman_region16_t *region); - -PIXMAN_API -pixman_box16_t * pixman_region_extents (const pixman_region16_t *region); - -PIXMAN_API -int pixman_region_n_rects (const pixman_region16_t *region); - -PIXMAN_API -pixman_box16_t * pixman_region_rectangles (const pixman_region16_t *region, - int *n_rects); - -PIXMAN_API -pixman_bool_t pixman_region_equal (const pixman_region16_t *region1, - const pixman_region16_t *region2); - -PIXMAN_API -pixman_bool_t pixman_region_selfcheck (pixman_region16_t *region); - -PIXMAN_API -void pixman_region_reset (pixman_region16_t *region, - const pixman_box16_t *box); - -PIXMAN_API -void pixman_region_clear (pixman_region16_t *region); -/* - * 32 bit regions - */ -typedef struct pixman_region32_data pixman_region32_data_t; -typedef struct pixman_box32 pixman_box32_t; -typedef struct pixman_rectangle32 pixman_rectangle32_t; -typedef struct pixman_region32 pixman_region32_t; - -struct pixman_region32_data { - long size; - long numRects; -/* pixman_box32_t rects[size]; in memory but not explicitly declared */ -}; - -struct pixman_rectangle32 -{ - int32_t x, y; - uint32_t width, height; -}; - -struct pixman_box32 -{ - int32_t x1, y1, x2, y2; -}; - -struct pixman_region32 -{ - pixman_box32_t extents; - pixman_region32_data_t *data; -}; - -/* creation/destruction */ -PIXMAN_API -void pixman_region32_init (pixman_region32_t *region); - -PIXMAN_API -void pixman_region32_init_rect (pixman_region32_t *region, - int x, - int y, - unsigned int width, - unsigned int height); - -PIXMAN_API -pixman_bool_t pixman_region32_init_rects (pixman_region32_t *region, - const pixman_box32_t *boxes, - int count); - -PIXMAN_API -void pixman_region32_init_with_extents (pixman_region32_t *region, - const pixman_box32_t *extents); - -PIXMAN_API -void pixman_region32_init_from_image (pixman_region32_t *region, - pixman_image_t *image); - -PIXMAN_API -void pixman_region32_fini (pixman_region32_t *region); - - -/* manipulation */ -PIXMAN_API -void pixman_region32_translate (pixman_region32_t *region, - int x, - int y); - -PIXMAN_API -pixman_bool_t pixman_region32_copy (pixman_region32_t *dest, - const pixman_region32_t *source); - -PIXMAN_API -pixman_bool_t pixman_region32_intersect (pixman_region32_t *new_reg, - const pixman_region32_t *reg1, - const pixman_region32_t *reg2); - -PIXMAN_API -pixman_bool_t pixman_region32_union (pixman_region32_t *new_reg, - const pixman_region32_t *reg1, - const pixman_region32_t *reg2); - -PIXMAN_API -pixman_bool_t pixman_region32_intersect_rect (pixman_region32_t *dest, - const pixman_region32_t *source, - int x, - int y, - unsigned int width, - unsigned int height); - -PIXMAN_API -pixman_bool_t pixman_region32_union_rect (pixman_region32_t *dest, - const pixman_region32_t *source, - int x, - int y, - unsigned int width, - unsigned int height); - -PIXMAN_API -pixman_bool_t pixman_region32_subtract (pixman_region32_t *reg_d, - const pixman_region32_t *reg_m, - const pixman_region32_t *reg_s); - -PIXMAN_API -pixman_bool_t pixman_region32_inverse (pixman_region32_t *new_reg, - const pixman_region32_t *reg1, - const pixman_box32_t *inv_rect); - -PIXMAN_API -pixman_bool_t pixman_region32_contains_point (const pixman_region32_t *region, - int x, - int y, - pixman_box32_t *box); - -PIXMAN_API -pixman_region_overlap_t pixman_region32_contains_rectangle (const pixman_region32_t *region, - const pixman_box32_t *prect); - -PIXMAN_API -pixman_bool_t pixman_region32_empty (const pixman_region32_t *region); - -PIXMAN_API -pixman_bool_t pixman_region32_not_empty (const pixman_region32_t *region); - -PIXMAN_API -pixman_box32_t * pixman_region32_extents (const pixman_region32_t *region); - -PIXMAN_API -int pixman_region32_n_rects (const pixman_region32_t *region); - -PIXMAN_API -pixman_box32_t * pixman_region32_rectangles (const pixman_region32_t *region, - int *n_rects); - -PIXMAN_API -pixman_bool_t pixman_region32_equal (const pixman_region32_t *region1, - const pixman_region32_t *region2); - -PIXMAN_API -pixman_bool_t pixman_region32_selfcheck (pixman_region32_t *region); - -PIXMAN_API -void pixman_region32_reset (pixman_region32_t *region, - const pixman_box32_t *box); - -PIXMAN_API -void pixman_region32_clear (pixman_region32_t *region); - - -/* Copy / Fill / Misc */ -PIXMAN_API -pixman_bool_t pixman_blt (uint32_t *src_bits, - uint32_t *dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dest_x, - int dest_y, - int width, - int height); - -PIXMAN_API -pixman_bool_t pixman_fill (uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t _xor); - - -PIXMAN_API -int pixman_version (void); - -PIXMAN_API -const char* pixman_version_string (void); - -/* - * Images - */ -typedef struct pixman_indexed pixman_indexed_t; -typedef struct pixman_gradient_stop pixman_gradient_stop_t; - -typedef uint32_t (* pixman_read_memory_func_t) (const void *src, int size); -typedef void (* pixman_write_memory_func_t) (void *dst, uint32_t value, int size); - -typedef void (* pixman_image_destroy_func_t) (pixman_image_t *image, void *data); - -struct pixman_gradient_stop { - pixman_fixed_t x; - pixman_color_t color; -}; - -#define PIXMAN_MAX_INDEXED 256 /* XXX depth must be <= 8 */ - -#if PIXMAN_MAX_INDEXED <= 256 -typedef uint8_t pixman_index_type; -#endif - -struct pixman_indexed -{ - pixman_bool_t color; - uint32_t rgba[PIXMAN_MAX_INDEXED]; - pixman_index_type ent[32768]; -}; - -/* - * While the protocol is generous in format support, the - * sample implementation allows only packed RGB and GBR - * representations for data to simplify software rendering, - */ -#define PIXMAN_FORMAT(bpp,type,a,r,g,b) (((bpp) << 24) | \ - ((type) << 16) | \ - ((a) << 12) | \ - ((r) << 8) | \ - ((g) << 4) | \ - ((b))) - -#define PIXMAN_FORMAT_BYTE(bpp,type,a,r,g,b) \ - (((bpp >> 3) << 24) | \ - (3 << 22) | ((type) << 16) | \ - ((a >> 3) << 12) | \ - ((r >> 3) << 8) | \ - ((g >> 3) << 4) | \ - ((b >> 3))) - -#define PIXMAN_FORMAT_RESHIFT(val, ofs, num) \ - (((val >> (ofs)) & ((1 << (num)) - 1)) << ((val >> 22) & 3)) - -#define PIXMAN_FORMAT_BPP(f) PIXMAN_FORMAT_RESHIFT(f, 24, 8) -#define PIXMAN_FORMAT_SHIFT(f) ((uint32_t)((f >> 22) & 3)) -#define PIXMAN_FORMAT_TYPE(f) (((f) >> 16) & 0x3f) -#define PIXMAN_FORMAT_A(f) PIXMAN_FORMAT_RESHIFT(f, 12, 4) -#define PIXMAN_FORMAT_R(f) PIXMAN_FORMAT_RESHIFT(f, 8, 4) -#define PIXMAN_FORMAT_G(f) PIXMAN_FORMAT_RESHIFT(f, 4, 4) -#define PIXMAN_FORMAT_B(f) PIXMAN_FORMAT_RESHIFT(f, 0, 4) -#define PIXMAN_FORMAT_RGB(f) (((f) ) & 0xfff) -#define PIXMAN_FORMAT_VIS(f) (((f) ) & 0xffff) -#define PIXMAN_FORMAT_DEPTH(f) (PIXMAN_FORMAT_A(f) + \ - PIXMAN_FORMAT_R(f) + \ - PIXMAN_FORMAT_G(f) + \ - PIXMAN_FORMAT_B(f)) - -#define PIXMAN_TYPE_OTHER 0 -#define PIXMAN_TYPE_A 1 -#define PIXMAN_TYPE_ARGB 2 -#define PIXMAN_TYPE_ABGR 3 -#define PIXMAN_TYPE_COLOR 4 -#define PIXMAN_TYPE_GRAY 5 -#define PIXMAN_TYPE_YUY2 6 -#define PIXMAN_TYPE_YV12 7 -#define PIXMAN_TYPE_BGRA 8 -#define PIXMAN_TYPE_RGBA 9 -#define PIXMAN_TYPE_ARGB_SRGB 10 -#define PIXMAN_TYPE_RGBA_FLOAT 11 - -#define PIXMAN_FORMAT_COLOR(f) \ - (PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ARGB || \ - PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ABGR || \ - PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_BGRA || \ - PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA || \ - PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA_FLOAT) - -typedef enum { -/* 128bpp formats */ - PIXMAN_rgba_float = PIXMAN_FORMAT_BYTE(128,PIXMAN_TYPE_RGBA_FLOAT,32,32,32,32), -/* 96bpp formats */ - PIXMAN_rgb_float = PIXMAN_FORMAT_BYTE(96,PIXMAN_TYPE_RGBA_FLOAT,0,32,32,32), - -/* 32bpp formats */ - PIXMAN_a8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,8,8,8,8), - PIXMAN_x8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,8,8,8), - PIXMAN_a8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,8,8,8,8), - PIXMAN_x8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,8,8,8), - PIXMAN_b8g8r8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,8,8,8,8), - PIXMAN_b8g8r8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,0,8,8,8), - PIXMAN_r8g8b8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,8,8,8,8), - PIXMAN_r8g8b8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,0,8,8,8), - PIXMAN_x14r6g6b6 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,6,6,6), - PIXMAN_x2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,10,10,10), - PIXMAN_a2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,2,10,10,10), - PIXMAN_x2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,10,10,10), - PIXMAN_a2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,2,10,10,10), - -/* sRGB formats */ - PIXMAN_a8r8g8b8_sRGB = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB_SRGB,8,8,8,8), - PIXMAN_r8g8b8_sRGB = PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB_SRGB,0,8,8,8), - -/* 24bpp formats */ - PIXMAN_r8g8b8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB,0,8,8,8), - PIXMAN_b8g8r8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ABGR,0,8,8,8), - -/* 16bpp formats */ - PIXMAN_r5g6b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,6,5), - PIXMAN_b5g6r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,6,5), - - PIXMAN_a1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,1,5,5,5), - PIXMAN_x1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,5,5), - PIXMAN_a1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,1,5,5,5), - PIXMAN_x1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,5,5), - PIXMAN_a4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,4,4,4,4), - PIXMAN_x4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,4,4,4), - PIXMAN_a4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,4,4,4,4), - PIXMAN_x4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,4,4,4), - -/* 8bpp formats */ - PIXMAN_a8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0), - PIXMAN_r3g3b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,0,3,3,2), - PIXMAN_b2g3r3 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,0,3,3,2), - PIXMAN_a2r2g2b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,2,2,2,2), - PIXMAN_a2b2g2r2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,2,2,2,2), - - PIXMAN_c8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0), - PIXMAN_g8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0), - - PIXMAN_x4a4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,4,0,0,0), - - PIXMAN_x4c4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0), - PIXMAN_x4g4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0), - -/* 4bpp formats */ - PIXMAN_a4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_A,4,0,0,0), - PIXMAN_r1g2b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,0,1,2,1), - PIXMAN_b1g2r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,0,1,2,1), - PIXMAN_a1r1g1b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,1,1,1,1), - PIXMAN_a1b1g1r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,1,1,1,1), - - PIXMAN_c4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_COLOR,0,0,0,0), - PIXMAN_g4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_GRAY,0,0,0,0), - -/* 1bpp formats */ - PIXMAN_a1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_A,1,0,0,0), - - PIXMAN_g1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0), - -/* YUV formats */ - PIXMAN_yuy2 = PIXMAN_FORMAT(16,PIXMAN_TYPE_YUY2,0,0,0,0), - PIXMAN_yv12 = PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0) -} pixman_format_code_t; - -/* Querying supported format values. */ -PIXMAN_API -pixman_bool_t pixman_format_supported_destination (pixman_format_code_t format); - -PIXMAN_API -pixman_bool_t pixman_format_supported_source (pixman_format_code_t format); - -/* Constructors */ -PIXMAN_API -pixman_image_t *pixman_image_create_solid_fill (const pixman_color_t *color); - -PIXMAN_API -pixman_image_t *pixman_image_create_linear_gradient (const pixman_point_fixed_t *p1, - const pixman_point_fixed_t *p2, - const pixman_gradient_stop_t *stops, - int n_stops); - -PIXMAN_API -pixman_image_t *pixman_image_create_radial_gradient (const pixman_point_fixed_t *inner, - const pixman_point_fixed_t *outer, - pixman_fixed_t inner_radius, - pixman_fixed_t outer_radius, - const pixman_gradient_stop_t *stops, - int n_stops); - -PIXMAN_API -pixman_image_t *pixman_image_create_conical_gradient (const pixman_point_fixed_t *center, - pixman_fixed_t angle, - const pixman_gradient_stop_t *stops, - int n_stops); - -PIXMAN_API -pixman_image_t *pixman_image_create_bits (pixman_format_code_t format, - int width, - int height, - uint32_t *bits, - int rowstride_bytes); - -PIXMAN_API -pixman_image_t *pixman_image_create_bits_no_clear (pixman_format_code_t format, - int width, - int height, - uint32_t * bits, - int rowstride_bytes); - -/* Destructor */ -PIXMAN_API -pixman_image_t *pixman_image_ref (pixman_image_t *image); - -PIXMAN_API -pixman_bool_t pixman_image_unref (pixman_image_t *image); - - -PIXMAN_API -void pixman_image_set_destroy_function (pixman_image_t *image, - pixman_image_destroy_func_t function, - void *data); - -PIXMAN_API -void * pixman_image_get_destroy_data (pixman_image_t *image); - -/* Set properties */ -PIXMAN_API -pixman_bool_t pixman_image_set_clip_region (pixman_image_t *image, - const pixman_region16_t *region); - -PIXMAN_API -pixman_bool_t pixman_image_set_clip_region32 (pixman_image_t *image, - const pixman_region32_t *region); - -PIXMAN_API -void pixman_image_set_has_client_clip (pixman_image_t *image, - pixman_bool_t clien_clip); - -PIXMAN_API -pixman_bool_t pixman_image_set_transform (pixman_image_t *image, - const pixman_transform_t *transform); - -PIXMAN_API -void pixman_image_set_repeat (pixman_image_t *image, - pixman_repeat_t repeat); - -PIXMAN_API -void pixman_image_set_dither (pixman_image_t *image, - pixman_dither_t dither); - -PIXMAN_API -void pixman_image_set_dither_offset (pixman_image_t *image, - int offset_x, - int offset_y); - -PIXMAN_API -pixman_bool_t pixman_image_set_filter (pixman_image_t *image, - pixman_filter_t filter, - const pixman_fixed_t *filter_params, - int n_filter_params); - -PIXMAN_API -void pixman_image_set_source_clipping (pixman_image_t *image, - pixman_bool_t source_clipping); - -PIXMAN_API -void pixman_image_set_alpha_map (pixman_image_t *image, - pixman_image_t *alpha_map, - int16_t x, - int16_t y); - -PIXMAN_API -void pixman_image_set_component_alpha (pixman_image_t *image, - pixman_bool_t component_alpha); - -PIXMAN_API -pixman_bool_t pixman_image_get_component_alpha (pixman_image_t *image); - -PIXMAN_API -void pixman_image_set_accessors (pixman_image_t *image, - pixman_read_memory_func_t read_func, - pixman_write_memory_func_t write_func); - -PIXMAN_API -void pixman_image_set_indexed (pixman_image_t *image, - const pixman_indexed_t *indexed); - -PIXMAN_API -uint32_t *pixman_image_get_data (pixman_image_t *image); - -PIXMAN_API -int pixman_image_get_width (pixman_image_t *image); - -PIXMAN_API -int pixman_image_get_height (pixman_image_t *image); - -PIXMAN_API -int pixman_image_get_stride (pixman_image_t *image); /* in bytes */ - -PIXMAN_API -int pixman_image_get_depth (pixman_image_t *image); - -PIXMAN_API -pixman_format_code_t pixman_image_get_format (pixman_image_t *image); - -typedef enum -{ - PIXMAN_KERNEL_IMPULSE, - PIXMAN_KERNEL_BOX, - PIXMAN_KERNEL_LINEAR, - PIXMAN_KERNEL_CUBIC, - PIXMAN_KERNEL_GAUSSIAN, - PIXMAN_KERNEL_LANCZOS2, - PIXMAN_KERNEL_LANCZOS3, - PIXMAN_KERNEL_LANCZOS3_STRETCHED /* Jim Blinn's 'nice' filter */ -} pixman_kernel_t; - -/* Create the parameter list for a SEPARABLE_CONVOLUTION filter - * with the given kernels and scale parameters. - */ -PIXMAN_API -pixman_fixed_t * -pixman_filter_create_separable_convolution (int *n_values, - pixman_fixed_t scale_x, - pixman_fixed_t scale_y, - pixman_kernel_t reconstruct_x, - pixman_kernel_t reconstruct_y, - pixman_kernel_t sample_x, - pixman_kernel_t sample_y, - int subsample_bits_x, - int subsample_bits_y); - - -PIXMAN_API -pixman_bool_t pixman_image_fill_rectangles (pixman_op_t op, - pixman_image_t *image, - const pixman_color_t *color, - int n_rects, - const pixman_rectangle16_t *rects); - -PIXMAN_API -pixman_bool_t pixman_image_fill_boxes (pixman_op_t op, - pixman_image_t *dest, - const pixman_color_t *color, - int n_boxes, - const pixman_box32_t *boxes); - -/* Composite */ -PIXMAN_API -pixman_bool_t pixman_compute_composite_region (pixman_region16_t *region, - pixman_image_t *src_image, - pixman_image_t *mask_image, - pixman_image_t *dest_image, - int16_t src_x, - int16_t src_y, - int16_t mask_x, - int16_t mask_y, - int16_t dest_x, - int16_t dest_y, - uint16_t width, - uint16_t height); - -PIXMAN_API -void pixman_image_composite (pixman_op_t op, - pixman_image_t *src, - pixman_image_t *mask, - pixman_image_t *dest, - int16_t src_x, - int16_t src_y, - int16_t mask_x, - int16_t mask_y, - int16_t dest_x, - int16_t dest_y, - uint16_t width, - uint16_t height); - -PIXMAN_API -void pixman_image_composite32 (pixman_op_t op, - pixman_image_t *src, - pixman_image_t *mask, - pixman_image_t *dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height); - -/* Executive Summary: This function is a no-op that only exists - * for historical reasons. - * - * There used to be a bug in the X server where it would rely on - * out-of-bounds accesses when it was asked to composite with a - * window as the source. It would create a pixman image pointing - * to some bogus position in memory, but then set a clip region - * to the position where the actual bits were. - * - * Due to a bug in old versions of pixman, where it would not clip - * against the image bounds when a clip region was set, this would - * actually work. So when the pixman bug was fixed, a workaround was - * added to allow certain out-of-bound accesses. This function disabled - * those workarounds. - * - * Since 0.21.2, pixman doesn't do these workarounds anymore, so now this - * function is a no-op. - */ -PIXMAN_API -void pixman_disable_out_of_bounds_workaround (void); - -/* - * Glyphs - */ -typedef struct pixman_glyph_cache_t pixman_glyph_cache_t; -typedef struct -{ - int x, y; - const void *glyph; -} pixman_glyph_t; - -PIXMAN_API -pixman_glyph_cache_t *pixman_glyph_cache_create (void); - -PIXMAN_API -void pixman_glyph_cache_destroy (pixman_glyph_cache_t *cache); - -PIXMAN_API -void pixman_glyph_cache_freeze (pixman_glyph_cache_t *cache); - -PIXMAN_API -void pixman_glyph_cache_thaw (pixman_glyph_cache_t *cache); - -PIXMAN_API -const void * pixman_glyph_cache_lookup (pixman_glyph_cache_t *cache, - void *font_key, - void *glyph_key); - -PIXMAN_API -const void * pixman_glyph_cache_insert (pixman_glyph_cache_t *cache, - void *font_key, - void *glyph_key, - int origin_x, - int origin_y, - pixman_image_t *glyph_image); - -PIXMAN_API -void pixman_glyph_cache_remove (pixman_glyph_cache_t *cache, - void *font_key, - void *glyph_key); - -PIXMAN_API -void pixman_glyph_get_extents (pixman_glyph_cache_t *cache, - int n_glyphs, - pixman_glyph_t *glyphs, - pixman_box32_t *extents); - -PIXMAN_API -pixman_format_code_t pixman_glyph_get_mask_format (pixman_glyph_cache_t *cache, - int n_glyphs, - const pixman_glyph_t *glyphs); - -PIXMAN_API -void pixman_composite_glyphs (pixman_op_t op, - pixman_image_t *src, - pixman_image_t *dest, - pixman_format_code_t mask_format, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height, - pixman_glyph_cache_t *cache, - int n_glyphs, - const pixman_glyph_t *glyphs); - -PIXMAN_API -void pixman_composite_glyphs_no_mask (pixman_op_t op, - pixman_image_t *src, - pixman_image_t *dest, - int32_t src_x, - int32_t src_y, - int32_t dest_x, - int32_t dest_y, - pixman_glyph_cache_t *cache, - int n_glyphs, - const pixman_glyph_t *glyphs); - -/* - * Trapezoids - */ -typedef struct pixman_edge pixman_edge_t; -typedef struct pixman_trapezoid pixman_trapezoid_t; -typedef struct pixman_trap pixman_trap_t; -typedef struct pixman_span_fix pixman_span_fix_t; -typedef struct pixman_triangle pixman_triangle_t; - -/* - * An edge structure. This represents a single polygon edge - * and can be quickly stepped across small or large gaps in the - * sample grid - */ -struct pixman_edge -{ - pixman_fixed_t x; - pixman_fixed_t e; - pixman_fixed_t stepx; - pixman_fixed_t signdx; - pixman_fixed_t dy; - pixman_fixed_t dx; - - pixman_fixed_t stepx_small; - pixman_fixed_t stepx_big; - pixman_fixed_t dx_small; - pixman_fixed_t dx_big; -}; - -struct pixman_trapezoid -{ - pixman_fixed_t top, bottom; - pixman_line_fixed_t left, right; -}; - -struct pixman_triangle -{ - pixman_point_fixed_t p1, p2, p3; -}; - -/* whether 't' is a well defined not obviously empty trapezoid */ -#define pixman_trapezoid_valid(t) \ - ((t)->left.p1.y != (t)->left.p2.y && \ - (t)->right.p1.y != (t)->right.p2.y && \ - ((t)->bottom > (t)->top)) - -struct pixman_span_fix -{ - pixman_fixed_t l, r, y; -}; - -struct pixman_trap -{ - pixman_span_fix_t top, bot; -}; - -PIXMAN_API -pixman_fixed_t pixman_sample_ceil_y (pixman_fixed_t y, - int bpp); - -PIXMAN_API -pixman_fixed_t pixman_sample_floor_y (pixman_fixed_t y, - int bpp); - -PIXMAN_API -void pixman_edge_step (pixman_edge_t *e, - int n); - -PIXMAN_API -void pixman_edge_init (pixman_edge_t *e, - int bpp, - pixman_fixed_t y_start, - pixman_fixed_t x_top, - pixman_fixed_t y_top, - pixman_fixed_t x_bot, - pixman_fixed_t y_bot); - -PIXMAN_API -void pixman_line_fixed_edge_init (pixman_edge_t *e, - int bpp, - pixman_fixed_t y, - const pixman_line_fixed_t *line, - int x_off, - int y_off); - -PIXMAN_API -void pixman_rasterize_edges (pixman_image_t *image, - pixman_edge_t *l, - pixman_edge_t *r, - pixman_fixed_t t, - pixman_fixed_t b); - -PIXMAN_API -void pixman_add_traps (pixman_image_t *image, - int16_t x_off, - int16_t y_off, - int ntrap, - const pixman_trap_t *traps); - -PIXMAN_API -void pixman_add_trapezoids (pixman_image_t *image, - int16_t x_off, - int y_off, - int ntraps, - const pixman_trapezoid_t *traps); - -PIXMAN_API -void pixman_rasterize_trapezoid (pixman_image_t *image, - const pixman_trapezoid_t *trap, - int x_off, - int y_off); - -PIXMAN_API -void pixman_composite_trapezoids (pixman_op_t op, - pixman_image_t * src, - pixman_image_t * dst, - pixman_format_code_t mask_format, - int x_src, - int y_src, - int x_dst, - int y_dst, - int n_traps, - const pixman_trapezoid_t * traps); - -PIXMAN_API -void pixman_composite_triangles (pixman_op_t op, - pixman_image_t * src, - pixman_image_t * dst, - pixman_format_code_t mask_format, - int x_src, - int y_src, - int x_dst, - int y_dst, - int n_tris, - const pixman_triangle_t * tris); - -PIXMAN_API -void pixman_add_triangles (pixman_image_t *image, - int32_t x_off, - int32_t y_off, - int n_tris, - const pixman_triangle_t *tris); - -PIXMAN_END_DECLS - -#endif /* PIXMAN_H__ */ diff --git a/vendor/pixman/pixman/rounding.txt b/vendor/pixman/pixman/rounding.txt deleted file mode 100644 index 1c00019b6..000000000 --- a/vendor/pixman/pixman/rounding.txt +++ /dev/null @@ -1,168 +0,0 @@ -*** General notes about rounding - -Suppose a function is sampled at positions [k + o] where k is an -integer and o is a fractional offset 0 <= o < 1. - -To round a value to the nearest sample, breaking ties by rounding up, -we can do this: - - round(x) = floor(x - o + 0.5) + o - -That is, first subtract o to let us pretend that the samples are at -integer coordinates, then add 0.5 and floor to round to nearest -integer, then add the offset back in. - -To break ties by rounding down: - - round(x) = ceil(x - o - 0.5) + o - -or if we have an epsilon value: - - round(x) = floor(x - o + 0.5 - e) + o - -To always round *up* to the next sample: - - round_up(x) = ceil(x - o) + o - -To always round *down* to the previous sample: - - round_down(x) = floor(x - o) + o - -If a set of samples is stored in an array, you get from the sample -position to an index by subtracting the position of the first sample -in the array: - - index(s) = s - first_sample - - -*** Application to pixman - -In pixman, images are sampled with o = 0.5, that is, pixels are -located midways between integers. We usually break ties by rounding -down (i.e., "round towards north-west"). - - --- NEAREST filtering: - -The NEAREST filter simply picks the closest pixel to the given -position: - - round(x) = floor(x - 0.5 + 0.5 - e) + 0.5 = floor (x - e) + 0.5 - -The first sample of a pixman image has position 0.5, so to find the -index in the pixel array, we have to subtract 0.5: - - floor (x - e) + 0.5 - 0.5 = floor (x - e). - -Therefore a 16.16 fixed-point image location is turned into a pixel -value with NEAREST filtering by doing this: - - pixels[((y - e) >> 16) * stride + ((x - e) >> 16)] - -where stride is the number of pixels allocated per scanline and e = -0x0001. - - --- CONVOLUTION filtering: - -A convolution matrix is considered a sampling of a function f at -values surrounding 0. For example, this convolution matrix: - - [a, b, c, d] - -is interpreted as the values of a function f: - - a = f(-1.5) - b = f(-0.5) - c = f(0.5) - d = f(1.5) - -The sample offset in this case is o = 0.5 and the first sample has -position s0 = -1.5. If the matrix is: - - [a, b, c, d, e] - -the sample offset is o = 0 and the first sample has position s0 = --2.0. In general we have - - s0 = (- width / 2.0 + 0.5). - -and - - o = frac (s0) - -To evaluate f at a position between the samples, we round to the -closest sample, and then we subtract the position of the first sample -to get the index in the matrix: - - f(t) = matrix[floor(t - o + 0.5) + o - s0] - -Note that in this case we break ties by rounding up. - -If we write s0 = m + o, where m is an integer, this is equivalent to - - f(t) = matrix[floor(t - o + 0.5) + o - (m + o)] - = matrix[floor(t - o + 0.5 - m) + o - o] - = matrix[floor(t - s0 + 0.5)] - -The convolution filter in pixman positions f such that 0 aligns with -the given position x. For a given pixel x0 in the image, the closest -sample of f is then computed by taking (x - x0) and rounding that to -the closest index: - - i = floor ((x0 - x) - s0 + 0.5) - -To perform the convolution, we have to find the first pixel x0 whose -corresponding sample has index 0. We can write x0 = k + 0.5, where k -is an integer: - - 0 = floor(k + 0.5 - x - s0 + 0.5) - - = k + floor(1 - x - s0) - - = k - ceil(x + s0 - 1) - - = k - floor(x + s0 - e) - - = k - floor(x - (width - 1) / 2.0 - e) - -And so the final formula for the index k of x0 in the image is: - - k = floor(x - (width - 1) / 2.0 - e) - -Computing the result is then simply a matter of convolving all the -pixels starting at k with all the samples in the matrix. - - ---- SEPARABLE_CONVOLUTION - -For this filter, x is first rounded to one of n regularly spaced -subpixel positions. This subpixel position determines which of n -convolution matrices is being used. - -Then, as in a regular convolution filter, the first pixel to be used -is determined: - - k = floor (x - (width - 1) / 2.0 - e) - -and then the image pixels starting there are convolved with the chosen -matrix. If we write x = xi + frac, where xi is an integer, we get - - k = xi + floor (frac - (width - 1) / 2.0 - e) - -so the location of k relative to x is given by: - - (k + 0.5 - x) = xi + floor (frac - (width - 1) / 2.0 - e) + 0.5 - x - - = floor (frac - (width - 1) / 2.0 - e) + 0.5 - frac - -which means the contents of the matrix corresponding to (frac) should -contain width samplings of the function, with the first sample at: - - floor (frac - (width - 1) / 2.0 - e) + 0.5 - frac - = ceil (frac - width / 2.0 - 0.5) + 0.5 - frac - -This filter is called separable because each of the k x k convolution -matrices is specified with two k-wide vectors, one for each dimension, -where each entry in the matrix is computed as the product of the -corresponding entries in the vectors. diff --git a/vendor/pixman/pixman/solaris-hwcap.mapfile b/vendor/pixman/pixman/solaris-hwcap.mapfile deleted file mode 100644 index 87efce1e3..000000000 --- a/vendor/pixman/pixman/solaris-hwcap.mapfile +++ /dev/null @@ -1,30 +0,0 @@ -############################################################################### -# -# Copyright 2009, Oracle and/or its affiliates. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. -# -############################################################################### -# -# Override the linker's detection of CMOV/MMX/SSE instructions so this -# library isn't flagged as only usable on CPU's with those ISA's, since it -# checks at runtime for availability before calling them - -hwcap_1 = V0x0 FPU OVERRIDE;