diff --git a/kmeans.c b/kmeans.c index f596009..7ee273d 100644 --- a/kmeans.c +++ b/kmeans.c @@ -1,20 +1,6 @@ /* -© 2011-2016 by Kornel Lesiński. - -This file is part of libimagequant. - -libimagequant is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -libimagequant is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with libimagequant. If not, see . +** © 2011-2016 by Kornel Lesiński. +** See COPYRIGHT file for license. */ #include "libimagequant.h" @@ -80,14 +66,14 @@ LIQ_PRIVATE void kmeans_finalize(colormap *map, const unsigned int max_threads, LIQ_PRIVATE double kmeans_do_iteration(histogram *hist, colormap *const map, kmeans_callback callback) { const unsigned int max_threads = omp_get_max_threads(); - kmeans_state average_color[(KMEANS_CACHE_LINE_GAP+map->colors) * max_threads]; + LIQ_ARRAY(kmeans_state, average_color, (KMEANS_CACHE_LINE_GAP+map->colors) * max_threads); kmeans_init(map, max_threads, average_color); struct nearest_map *const n = nearest_init(map); hist_item *const achv = hist->achv; const int hist_size = hist->size; double total_diff=0; - #pragma omp parallel for if (hist_size > 3000) \ + #pragma omp parallel for if (hist_size > 2000) \ schedule(static) default(none) shared(average_color,callback) reduction(+:total_diff) for(int j=0; j < hist_size; j++) { float diff; diff --git a/libimagequant.c b/libimagequant.c index 08f6962..3506564 100644 --- a/libimagequant.c +++ b/libimagequant.c @@ -1,31 +1,9 @@ /* -** © 2009-2016 by Kornel Lesiński. +** © 2009-2018 by Kornel Lesiński. +** © 1989, 1991 by Jef Poskanzer. +** © 1997, 2000, 2002 by Greg Roelofs; based on an idea by Stefan Schneider. ** -** This file is part of libimagequant. -** -** libimagequant is free software: you can redistribute it and/or modify -** it under the terms of the GNU General Public License as published by -** the Free Software Foundation, either version 3 of the License, or -** (at your option) any later version. -** -** libimagequant is distributed in the hope that it will be useful, -** but WITHOUT ANY WARRANTY; without even the implied warranty of -** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -** GNU General Public License for more details. -** -** You should have received a copy of the GNU General Public License -** along with libimagequant. If not, see . -*/ -/* Copyright (C) 1989, 1991 by Jef Poskanzer. -** Copyright (C) 1997, 2000, 2002 by Greg Roelofs; based on an idea by -** Stefan Schneider. -** -** Permission to use, copy, modify, and distribute this software and its -** documentation for any purpose and without fee is hereby granted, provided -** that the above copyright notice appear in all copies and that both that -** copyright notice and this permission notice appear in supporting -** documentation. This software is provided "as is" without express or -** implied warranty. +** See COPYRIGHT file for license. */ #include @@ -43,7 +21,9 @@ #ifdef _OPENMP #include +#define LIQ_TEMP_ROW_WIDTH(img_width) (((img_width) | 15) + 1) /* keep alignment & leave space between rows to avoid cache line contention */ #else +#define LIQ_TEMP_ROW_WIDTH(img_width) (img_width) #define omp_get_max_threads() 1 #define omp_get_thread_num() 0 #endif @@ -78,7 +58,8 @@ struct liq_attr { unsigned int max_colors, max_histogram_entries; unsigned int min_posterization_output /* user setting */, min_posterization_input /* speed setting */; unsigned int kmeans_iterations, feedback_loop_trials; - bool last_index_transparent, use_contrast_maps, use_dither_map; + bool last_index_transparent, use_contrast_maps; + unsigned char use_dither_map; unsigned char speed; unsigned char progress_stage1, progress_stage2, progress_stage3; @@ -100,11 +81,12 @@ struct liq_image { rgba_pixel **rows; double gamma; unsigned int width, height; - unsigned char *noise, *edges, *dither_map; + unsigned char *importance_map, *edges, *dither_map; rgba_pixel *pixels, *temp_row; f_pixel *temp_f_row; liq_image_get_rgba_row_callback *row_callback; void *row_callback_user_info; + liq_image *background; float min_opaque_val; f_pixel fixed_colors[256]; unsigned short fixed_colors_count; @@ -124,7 +106,8 @@ typedef struct liq_remapping_result { liq_palette int_palette; double gamma, palette_error; float dither_level; - bool use_dither_map; unsigned char progress_stage1; + unsigned char use_dither_map; + unsigned char progress_stage1; } liq_remapping_result; struct liq_result { @@ -141,7 +124,7 @@ struct liq_result { float dither_level; double gamma, palette_error; int min_posterization_output; - bool use_dither_map; + unsigned char use_dither_map; }; struct liq_histogram { @@ -161,6 +144,7 @@ static void modify_alpha(liq_image *input_image, rgba_pixel *const row_pixels) L static void contrast_maps(liq_image *image) LIQ_NONNULL; static liq_error finalize_histogram(liq_histogram *input_hist, liq_attr *options, histogram **hist_output) LIQ_NONNULL; static const rgba_pixel *liq_image_get_row_rgba(liq_image *input_image, unsigned int row) LIQ_NONNULL; +static bool liq_image_get_row_f_init(liq_image *img) LIQ_NONNULL; static const f_pixel *liq_image_get_row_f(liq_image *input_image, unsigned int row) LIQ_NONNULL; static void liq_remapping_result_destroy(liq_remapping_result *result) LIQ_NONNULL; static liq_error pngquant_quantize(histogram *hist, const liq_attr *options, const int fixed_colors_count, const f_pixel fixed_colors[], const double gamma, bool fixed_result_colors, liq_result **) LIQ_NONNULL; @@ -174,7 +158,7 @@ LIQ_NONNULL static void liq_verbose_printf(const liq_attr *context, const char * int required_space = vsnprintf(NULL, 0, fmt, va)+1; // +\0 va_end(va); - char buf[required_space]; + LIQ_ARRAY(char, buf, required_space); va_start(va, fmt); vsnprintf(buf, required_space, fmt, va); va_end(va); @@ -210,8 +194,13 @@ LIQ_NONNULL static bool liq_remap_progress(const liq_remapping_result *quant, co #if USE_SSE inline static bool is_sse_available() { -#if (defined(__x86_64__) || defined(__amd64)) +#if (defined(__x86_64__) || defined(__amd64) || defined(_WIN64)) return true; +#elif _MSC_VER + int info[4]; + __cpuid(info, 1); + /* bool is implemented as a built-in type of size 1 in MSVC */ + return info[3] & (1<<26) ? true : false; #else int a,b,c,d; cpuid(1, a, b, c, d); @@ -348,7 +337,8 @@ LIQ_EXPORT LIQ_NONNULL liq_error liq_set_speed(liq_attr* attr, int speed) if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return LIQ_INVALID_POINTER; if (speed < 1 || speed > 10) return LIQ_VALUE_OUT_OF_RANGE; - unsigned int iterations = MAX(8-speed, 0); iterations += iterations * iterations/2; + unsigned int iterations = MAX(8-speed, 0); + iterations += iterations * iterations/2; attr->kmeans_iterations = iterations; attr->kmeans_iteration_limit = 1.0/(double)(1<<(23-speed)); attr->feedback_loop_trials = MAX(56-9*speed, 0); @@ -356,11 +346,16 @@ LIQ_EXPORT LIQ_NONNULL liq_error liq_set_speed(liq_attr* attr, int speed) attr->max_histogram_entries = (1<<17) + (1<<18)*(10-speed); attr->min_posterization_input = (speed >= 8) ? 1 : 0; attr->use_dither_map = (speed <= (omp_get_max_threads() > 1 ? 7 : 5)); // parallelized dither map might speed up floyd remapping + if (attr->use_dither_map && speed < 3) { + attr->use_dither_map = 2; // always + } attr->use_contrast_maps = (speed <= 7) || attr->use_dither_map; attr->speed = speed; attr->progress_stage1 = attr->use_contrast_maps ? 20 : 8; - if (attr->feedback_loop_trials < 2) attr->progress_stage1 += 30; + if (attr->feedback_loop_trials < 2) { + attr->progress_stage1 += 30; + } attr->progress_stage3 = 50 / (1+speed); attr->progress_stage2 = 100 - attr->progress_stage1 - attr->progress_stage3; return LIQ_OK; @@ -400,7 +395,7 @@ LIQ_EXPORT LIQ_NONNULL int liq_get_min_opacity(const liq_attr *attr) { if (!CHECK_STRUCT_TYPE(attr, liq_attr)) return -1; - return MIN(255, 256.0 * attr->min_opaque_val); + return MIN(255.f, 256.f * attr->min_opaque_val); } LIQ_EXPORT LIQ_NONNULL void liq_set_last_index_transparent(liq_attr* attr, int is_last) @@ -520,7 +515,7 @@ LIQ_EXPORT liq_attr* liq_attr_create_with_allocator(void* (*custom_malloc)(size_ .target_mse = 0, .max_mse = MAX_DIFF, }; - liq_set_speed(attr, 3); + liq_set_speed(attr, 4); return attr; } @@ -540,7 +535,7 @@ LIQ_EXPORT LIQ_NONNULL liq_error liq_image_add_fixed_color(liq_image *img, liq_c return LIQ_OK; } -LIQ_NONNULL static liq_error liq_histogram_add_fixed_color_internal(liq_histogram *hist, f_pixel color) +LIQ_NONNULL static liq_error liq_histogram_add_fixed_color_f(liq_histogram *hist, f_pixel color) { if (hist->fixed_colors_count > 255) return LIQ_UNSUPPORTED; @@ -548,9 +543,24 @@ LIQ_NONNULL static liq_error liq_histogram_add_fixed_color_internal(liq_histogra return LIQ_OK; } +LIQ_EXPORT LIQ_NONNULL liq_error liq_histogram_add_fixed_color(liq_histogram *hist, liq_color color, double gamma) +{ + if (!CHECK_STRUCT_TYPE(hist, liq_histogram)) return LIQ_INVALID_POINTER; + + float gamma_lut[256]; + to_f_set_gamma(gamma_lut, gamma ? gamma : 0.45455); + const f_pixel px = rgba_to_f(gamma_lut, (rgba_pixel){ + .r = color.r, + .g = color.g, + .b = color.b, + .a = color.a, + }); + return liq_histogram_add_fixed_color_f(hist, px); +} + LIQ_NONNULL static bool liq_image_use_low_memory(liq_image *img) { - img->temp_f_row = img->malloc(sizeof(img->f_pixels[0]) * img->width * omp_get_max_threads()); + img->temp_f_row = img->malloc(sizeof(img->f_pixels[0]) * LIQ_TEMP_ROW_WIDTH(img->width) * omp_get_max_threads()); return img->temp_f_row != NULL; } @@ -586,7 +596,7 @@ static liq_image *liq_image_create_internal(const liq_attr *attr, rgba_pixel* ro }; if (!rows || attr->min_opaque_val < 1.f) { - img->temp_row = attr->malloc(sizeof(img->temp_row[0]) * width * omp_get_max_threads()); + img->temp_row = attr->malloc(sizeof(img->temp_row[0]) * LIQ_TEMP_ROW_WIDTH(width) * omp_get_max_threads()); if (!img->temp_row) return NULL; } @@ -630,6 +640,57 @@ LIQ_EXPORT LIQ_NONNULL liq_error liq_image_set_memory_ownership(liq_image *img, return LIQ_OK; } +LIQ_NONNULL static void liq_image_free_maps(liq_image *input_image); +LIQ_NONNULL static void liq_image_free_importance_map(liq_image *input_image); + +LIQ_EXPORT LIQ_NONNULL liq_error liq_image_set_importance_map(liq_image *img, unsigned char importance_map[], size_t buffer_size, enum liq_ownership ownership) { + if (!CHECK_STRUCT_TYPE(img, liq_image)) return LIQ_INVALID_POINTER; + if (!CHECK_USER_POINTER(importance_map)) return LIQ_INVALID_POINTER; + + const size_t required_size = img->width * img->height; + if (buffer_size < required_size) { + return LIQ_BUFFER_TOO_SMALL; + } + + if (ownership == LIQ_COPY_PIXELS) { + unsigned char *tmp = img->malloc(required_size); + if (!tmp) { + return LIQ_OUT_OF_MEMORY; + } + memcpy(tmp, importance_map, required_size); + importance_map = tmp; + } else if (ownership != LIQ_OWN_PIXELS) { + return LIQ_UNSUPPORTED; + } + + liq_image_free_importance_map(img); + img->importance_map = importance_map; + + return LIQ_OK; +} + +LIQ_EXPORT LIQ_NONNULL liq_error liq_image_set_background(liq_image *img, liq_image *background) +{ + if (!CHECK_STRUCT_TYPE(img, liq_image)) return LIQ_INVALID_POINTER; + if (!CHECK_STRUCT_TYPE(background, liq_image)) return LIQ_INVALID_POINTER; + + if (background->background) { + return LIQ_UNSUPPORTED; + } + if (img->width != background->width || img->height != background->height) { + return LIQ_BUFFER_TOO_SMALL; + } + + if (img->background) { + liq_image_destroy(img->background); + } + + img->background = background; + liq_image_free_maps(img); // Force them to be re-analyzed with the background + + return LIQ_OK; +} + LIQ_NONNULL static bool check_image_size(const liq_attr *attr, const int width, const int height) { if (!CHECK_STRUCT_TYPE(attr, liq_attr)) { @@ -730,7 +791,7 @@ LIQ_NONNULL static const rgba_pixel *liq_image_get_row_rgba(liq_image *img, unsi } assert(img->temp_row); - rgba_pixel *temp_row = img->temp_row + img->width * omp_get_thread_num(); + rgba_pixel *temp_row = img->temp_row + LIQ_TEMP_ROW_WIDTH(img->width) * omp_get_thread_num(); if (img->rows) { memcpy(temp_row, img->rows[row], img->width * sizeof(temp_row[0])); } else { @@ -753,31 +814,40 @@ LIQ_NONNULL static void convert_row_to_f(liq_image *img, f_pixel *row_f_pixels, } } +LIQ_NONNULL static bool liq_image_get_row_f_init(liq_image *img) +{ + assert(omp_get_thread_num() == 0); + if (img->f_pixels) { + return true; + } + if (!liq_image_should_use_low_memory(img, false)) { + img->f_pixels = img->malloc(sizeof(img->f_pixels[0]) * img->width * img->height); + } + if (!img->f_pixels) { + return liq_image_use_low_memory(img); + } + + if (!liq_image_has_rgba_pixels(img)) { + return false; + } + + float gamma_lut[256]; + to_f_set_gamma(gamma_lut, img->gamma); + for(unsigned int i=0; i < img->height; i++) { + convert_row_to_f(img, &img->f_pixels[i*img->width], i, gamma_lut); + } + return true; +} + LIQ_NONNULL static const f_pixel *liq_image_get_row_f(liq_image *img, unsigned int row) { if (!img->f_pixels) { - if (img->temp_f_row) { - float gamma_lut[256]; - to_f_set_gamma(gamma_lut, img->gamma); - f_pixel *row_for_thread = img->temp_f_row + img->width * omp_get_thread_num(); - convert_row_to_f(img, row_for_thread, row, gamma_lut); - return row_for_thread; - } - - assert(omp_get_thread_num() == 0); - if (!liq_image_should_use_low_memory(img, false)) { - img->f_pixels = img->malloc(sizeof(img->f_pixels[0]) * img->width * img->height); - } - if (!img->f_pixels) { - if (!liq_image_use_low_memory(img)) return NULL; - return liq_image_get_row_f(img, row); - } - + assert(img->temp_f_row); // init should have done that float gamma_lut[256]; to_f_set_gamma(gamma_lut, img->gamma); - for(unsigned int i=0; i < img->height; i++) { - convert_row_to_f(img, &img->f_pixels[i*img->width], i, gamma_lut); - } + f_pixel *row_for_thread = img->temp_f_row + LIQ_TEMP_ROW_WIDTH(img->width) * omp_get_thread_num(); + convert_row_to_f(img, row_for_thread, row, gamma_lut); + return row_for_thread; } return img->f_pixels + img->width * row; } @@ -818,23 +888,34 @@ LIQ_NONNULL static void liq_image_free_rgba_source(liq_image *input_image) } } +LIQ_NONNULL static void liq_image_free_importance_map(liq_image *input_image) { + if (input_image->importance_map) { + input_image->free(input_image->importance_map); + input_image->importance_map = NULL; + } +} + +LIQ_NONNULL static void liq_image_free_maps(liq_image *input_image) { + liq_image_free_importance_map(input_image); + + if (input_image->edges) { + input_image->free(input_image->edges); + input_image->edges = NULL; + } + + if (input_image->dither_map) { + input_image->free(input_image->dither_map); + input_image->dither_map = NULL; + } +} + LIQ_EXPORT LIQ_NONNULL void liq_image_destroy(liq_image *input_image) { if (!CHECK_STRUCT_TYPE(input_image, liq_image)) return; liq_image_free_rgba_source(input_image); - if (input_image->noise) { - input_image->free(input_image->noise); - } - - if (input_image->edges) { - input_image->free(input_image->edges); - } - - if (input_image->dither_map) { - input_image->free(input_image->dither_map); - } + liq_image_free_maps(input_image); if (input_image->f_pixels) { input_image->free(input_image->f_pixels); @@ -848,6 +929,10 @@ LIQ_EXPORT LIQ_NONNULL void liq_image_destroy(liq_image *input_image) input_image->free(input_image->temp_f_row); } + if (input_image->background) { + liq_image_destroy(input_image->background); + } + input_image->magic_header = liq_freed_magic; input_image->free(input_image); } @@ -1077,7 +1162,7 @@ LIQ_NONNULL static void sort_palette(colormap *map, const liq_attr *options) */ if (options->last_index_transparent) { for(unsigned int i=0; i < map->colors; i++) { - if (map->palette[i].acolor.a < 1.0/256.0) { + if (map->palette[i].acolor.a < 1.f/256.f) { const unsigned int old = i, transparent_dest = map->colors-1; SWAP_PALETTE(map, transparent_dest, old); @@ -1100,7 +1185,7 @@ LIQ_NONNULL static void sort_palette(colormap *map, const liq_attr *options) /* move transparent colors to the beginning to shrink trns chunk */ unsigned int num_transparent = 0; for(unsigned int i = 0; i < non_fixed_colors; i++) { - if (map->palette[i].acolor.a < 255.0/256.0) { + if (map->palette[i].acolor.a < 255.f/256.f) { // current transparent color is swapped with earlier opaque one if (i != num_transparent) { SWAP_PALETTE(map, num_transparent, i); @@ -1174,24 +1259,37 @@ LIQ_NONNULL static float remap_to_palette(liq_image *const input_image, unsigned const unsigned int cols = input_image->width; double remapping_error=0; - if (!liq_image_get_row_f(input_image, 0)) { // trigger lazy conversion + if (!liq_image_get_row_f_init(input_image)) { + return -1; + } + if (input_image->background && !liq_image_get_row_f_init(input_image->background)) { return -1; } + const colormap_item *acolormap = map->palette; + struct nearest_map *const n = nearest_init(map); + const int transparent_index = input_image->background ? nearest_search(n, &(f_pixel){0,0,0,0}, 0, NULL) : 0; + const unsigned int max_threads = omp_get_max_threads(); - kmeans_state average_color[(KMEANS_CACHE_LINE_GAP+map->colors) * max_threads]; + LIQ_ARRAY(kmeans_state, average_color, (KMEANS_CACHE_LINE_GAP+map->colors) * max_threads); kmeans_init(map, max_threads, average_color); #pragma omp parallel for if (rows*cols > 3000) \ - schedule(static) default(none) shared(average_color) reduction(+:remapping_error) + schedule(static) default(none) shared(acolormap) shared(average_color) reduction(+:remapping_error) for(int row = 0; row < rows; ++row) { const f_pixel *const row_pixels = liq_image_get_row_f(input_image, row); + const f_pixel *const bg_pixels = input_image->background && acolormap[transparent_index].acolor.a < 1.f/256.f ? liq_image_get_row_f(input_image->background, row) : NULL; + unsigned int last_match=0; for(unsigned int col = 0; col < cols; ++col) { float diff; - output_pixels[row][col] = last_match = nearest_search(n, &row_pixels[col], last_match, &diff); + last_match = nearest_search(n, &row_pixels[col], last_match, &diff); + if (bg_pixels && colordifference(bg_pixels[col], acolormap[last_match].acolor) <= diff) { + last_match = transparent_index; + } + output_pixels[row][col] = last_match; remapping_error += diff; kmeans_update_color(row_pixels[col], 1.0, map, last_match, omp_get_thread_num(), average_color); @@ -1226,25 +1324,25 @@ inline static f_pixel get_dithered_pixel(const float dither_level, const float m else { if (px.b + sb < max_underflow) ratio = MIN(ratio, (max_underflow-px.b)/sb); } float a = px.a + sa; - if (a > 1.0) { a = 1.0; } + if (a > 1.f) { a = 1.f; } else if (a < 0) { a = 0; } // If dithering error is crazy high, don't propagate it that much // This prevents crazy geen pixels popping out of the blue (or red or black! ;) const float dither_error = sr*sr + sg*sg + sb*sb + sa*sa; if (dither_error > max_dither_error) { - ratio *= 0.8; + ratio *= 0.8f; } else if (dither_error < 2.f/256.f/256.f) { // don't dither areas that don't have noticeable error — makes file smaller return px; - } + } - return (f_pixel){ - .r=px.r + sr * ratio, - .g=px.g + sg * ratio, - .b=px.b + sb * ratio, - .a=a, - }; + return (f_pixel) { + .r=px.r + sr * ratio, + .g=px.g + sg * ratio, + .b=px.b + sb * ratio, + .a=a, + }; } /** @@ -1260,25 +1358,32 @@ LIQ_NONNULL static bool remap_to_palette_floyd(liq_image *input_image, unsigned const colormap *map = quant->palette; const colormap_item *acolormap = map->palette; + if (!liq_image_get_row_f_init(input_image)) { + return false; + } + if (input_image->background && !liq_image_get_row_f_init(input_image->background)) { + return false; + } + /* Initialize Floyd-Steinberg error vectors. */ - f_pixel *restrict thiserr, *restrict nexterr; - const size_t errsize = (cols + 2) * sizeof(*thiserr) * 2; - thiserr = input_image->malloc(errsize); // +2 saves from checking out of bounds access + const size_t errwidth = cols+2; + f_pixel *restrict thiserr = input_image->malloc(errwidth * sizeof(thiserr[0]) * 2); // +2 saves from checking out of bounds access if (!thiserr) return false; - memset(thiserr, 0, errsize); - nexterr = thiserr + (cols + 2); + f_pixel *restrict nexterr = thiserr + errwidth; + memset(thiserr, 0, errwidth * sizeof(thiserr[0])); bool ok = true; struct nearest_map *const n = nearest_init(map); + const int transparent_index = input_image->background ? nearest_search(n, &(f_pixel){0,0,0,0}, 0, NULL) : 0; // response to this value is non-linear and without it any value < 0.8 would give almost no dithering float base_dithering_level = quant->dither_level; - base_dithering_level = 1.0 - (1.0-base_dithering_level)*(1.0-base_dithering_level); + base_dithering_level = 1.f - (1.f-base_dithering_level)*(1.f-base_dithering_level); if (dither_map) { - base_dithering_level *= 1.0/255.0; // convert byte to float + base_dithering_level *= 1.f/255.f; // convert byte to float } - base_dithering_level *= 15.0/16.0; // prevent small errors from accumulating + base_dithering_level *= 15.f/16.f; // prevent small errors from accumulating int fs_direction = 1; unsigned int last_match=0; @@ -1288,10 +1393,11 @@ LIQ_NONNULL static bool remap_to_palette_floyd(liq_image *input_image, unsigned break; } - memset(nexterr, 0, (cols + 2) * sizeof(*nexterr)); + memset(nexterr, 0, errwidth * sizeof(nexterr[0])); int col = (fs_direction > 0) ? 0 : (cols - 1); const f_pixel *const row_pixels = liq_image_get_row_f(input_image, row); + const f_pixel *const bg_pixels = input_image->background && acolormap[transparent_index].acolor.a < 1.f/256.f ? liq_image_get_row_f(input_image->background, row) : NULL; do { float dither_level = base_dithering_level; @@ -1302,9 +1408,16 @@ LIQ_NONNULL static bool remap_to_palette_floyd(liq_image *input_image, unsigned const f_pixel spx = get_dithered_pixel(dither_level, max_dither_error, thiserr[col + 1], row_pixels[col]); const unsigned int guessed_match = output_image_is_remapped ? output_pixels[row][col] : last_match; - output_pixels[row][col] = last_match = nearest_search(n, &spx, guessed_match, NULL); + float diff; + last_match = nearest_search(n, &spx, guessed_match, &diff); + f_pixel output_px = acolormap[last_match].acolor; + if (bg_pixels && colordifference(bg_pixels[col], output_px) <= diff) { + output_px = bg_pixels[col]; + output_pixels[row][col] = transparent_index; + } else { + output_pixels[row][col] = last_match; + } - const f_pixel output_px = acolormap[last_match].acolor; f_pixel err = { .r = (spx.r - output_px.r), .g = (spx.g - output_px.g), @@ -1315,10 +1428,10 @@ LIQ_NONNULL static bool remap_to_palette_floyd(liq_image *input_image, unsigned // If dithering error is crazy high, don't propagate it that much // This prevents crazy geen pixels popping out of the blue (or red or black! ;) if (err.r*err.r + err.g*err.g + err.b*err.b + err.a*err.a > max_dither_error) { - err.r *= 0.75; - err.g *= 0.75; - err.b *= 0.75; - err.a *= 0.75; + err.r *= 0.75f; + err.g *= 0.75f; + err.b *= 0.75f; + err.a *= 0.75f; } /* Propagate Floyd-Steinberg error terms. */ @@ -1370,7 +1483,7 @@ LIQ_NONNULL static bool remap_to_palette_floyd(liq_image *input_image, unsigned if (fs_direction > 0) { if (col >= cols) break; } else { - if (col <= 0) break; + if (col < 0) break; } } while(1); @@ -1389,7 +1502,7 @@ LIQ_NONNULL static bool remap_to_palette_floyd(liq_image *input_image, unsigned /* fixed colors are always included in the palette, so it would be wasteful to duplicate them in palette from histogram */ LIQ_NONNULL static void remove_fixed_colors_from_histogram(histogram *hist, const int fixed_colors_count, const f_pixel fixed_colors[], const float target_mse) { - const float max_difference = MAX(target_mse/2.0, 2.0/256.0/256.0); + const float max_difference = MAX(target_mse/2.f, 2.f/256.f/256.f); if (fixed_colors_count) { for(int j=0; j < hist->size; j++) { for(unsigned int i=0; i < fixed_colors_count; i++) { @@ -1461,26 +1574,28 @@ LIQ_EXPORT LIQ_NONNULL liq_error liq_histogram_add_image(liq_histogram *input_hi const unsigned int cols = input_image->width, rows = input_image->height; - if (!input_image->noise && options->use_contrast_maps) { + if (!input_image->importance_map && options->use_contrast_maps) { contrast_maps(input_image); } input_hist->gamma = input_image->gamma; for(int i = 0; i < input_image->fixed_colors_count; i++) { - liq_error res = liq_histogram_add_fixed_color_internal(input_hist, input_image->fixed_colors[i]); + liq_error res = liq_histogram_add_fixed_color_f(input_hist, input_image->fixed_colors[i]); if (res != LIQ_OK) { return res; } } - /* - ** Step 2: attempt to make a histogram of the colors, unclustered. - ** If at first we don't succeed, increase ignorebits to increase color - ** coherence and try again. - */ + /* + ** Step 2: attempt to make a histogram of the colors, unclustered. + ** If at first we don't succeed, increase ignorebits to increase color + ** coherence and try again. + */ - if (liq_progress(options, options->progress_stage1 * 0.4f)) return LIQ_ABORTED; + if (liq_progress(options, options->progress_stage1 * 0.4f)) { + return LIQ_ABORTED; + } const bool all_rows_at_once = liq_image_can_use_rgba_rows(input_image); @@ -1498,11 +1613,11 @@ LIQ_EXPORT LIQ_NONNULL liq_error liq_histogram_add_image(liq_histogram *input_hi for(unsigned int row=0; row < rows; row++) { bool added_ok; if (all_rows_at_once) { - added_ok = pam_computeacolorhash(input_hist->acht, (const rgba_pixel *const *)input_image->rows, cols, rows, input_image->noise); + added_ok = pam_computeacolorhash(input_hist->acht, (const rgba_pixel *const *)input_image->rows, cols, rows, input_image->importance_map); if (added_ok) break; } else { const rgba_pixel* rows_p[1] = { liq_image_get_row_rgba(input_image, row) }; - added_ok = pam_computeacolorhash(input_hist->acht, rows_p, cols, 1, input_image->noise ? &input_image->noise[row * cols] : NULL); + added_ok = pam_computeacolorhash(input_hist->acht, rows_p, cols, 1, input_image->importance_map ? &input_image->importance_map[row * cols] : NULL); } if (!added_ok) { input_hist->ignorebits++; @@ -1517,10 +1632,7 @@ LIQ_EXPORT LIQ_NONNULL liq_error liq_histogram_add_image(liq_histogram *input_hi input_hist->had_image_added = true; - if (input_image->noise) { - input_image->free(input_image->noise); - input_image->noise = NULL; - } + liq_image_free_importance_map(input_image); if (input_image->free_pixels && input_image->f_pixels) { liq_image_free_rgba_source(input_image); // bow can free the RGBA source if copy has been made in f_pixels @@ -1578,7 +1690,7 @@ LIQ_NONNULL static void modify_alpha(liq_image *input_image, rgba_pixel *const r /** Builds two maps: - noise - approximation of areas with high-frequency noise, except straight edges. 1=flat, 0=noisy. + importance_map - approximation of areas with high-frequency noise, except straight edges. 1=flat, 0=noisy. edges - noise map including all edges */ LIQ_NONNULL static void contrast_maps(liq_image *image) @@ -1588,14 +1700,14 @@ LIQ_NONNULL static void contrast_maps(liq_image *image) return; } - unsigned char *restrict noise = image->noise ? image->noise : image->malloc(cols*rows); - image->noise = NULL; + unsigned char *restrict noise = image->importance_map ? image->importance_map : image->malloc(cols*rows); + image->importance_map = NULL; unsigned char *restrict edges = image->edges ? image->edges : image->malloc(cols*rows); image->edges = NULL; unsigned char *restrict tmp = image->malloc(cols*rows); - if (!noise || !edges || !tmp) { + if (!noise || !edges || !tmp || !liq_image_get_row_f_init(image)) { image->free(noise); image->free(edges); image->free(tmp); @@ -1637,11 +1749,11 @@ LIQ_NONNULL static void contrast_maps(liq_image *image) z = 1.f - MAX(z,MIN(horiz,vert)); z *= z; // noise is amplified z *= z; - - z *= 256.f; - noise[j*cols+i] = z < 256 ? z : 255; - z = (1.f-edge)*256.f; - edges[j*cols+i] = z > 0 ? (z < 256 ? z : 255) : 0; + // 85 is about 1/3rd of weight (not 0, because noisy pixels still need to be included, just not as precisely). + const unsigned int z_int = 85 + (unsigned int)(z * 171.f); + noise[j*cols+i] = MIN(z_int, 255); + const int e_int = 255 - (int)(edge * 256.f); + edges[j*cols+i] = e_int > 0 ? MIN(e_int, 255) : 0; } } @@ -1663,7 +1775,7 @@ LIQ_NONNULL static void contrast_maps(liq_image *image) image->free(tmp); - image->noise = noise; + image->importance_map = noise; image->edges = edges; } @@ -1674,7 +1786,7 @@ LIQ_NONNULL static void contrast_maps(liq_image *image) * and peeks 1 pixel above/below. Full 2d algorithm doesn't improve it significantly. * Correct flood fill doesn't have visually good properties. */ -LIQ_NONNULL static void update_dither_map(unsigned char *const *const row_pointers, liq_image *input_image) +LIQ_NONNULL static void update_dither_map(liq_image *input_image, unsigned char *const *const row_pointers, colormap *map) { const unsigned int width = input_image->width; const unsigned int height = input_image->height; @@ -1686,6 +1798,10 @@ LIQ_NONNULL static void update_dither_map(unsigned char *const *const row_pointe for(unsigned int col=1; col < width; col++) { const unsigned char px = row_pointers[row][col]; + if (input_image->background && map->palette[px].acolor.a < 1.f/256.f) { + // Transparency may or may not create an edge. When there's an explicit background set, assume no edge. + continue; + } if (px != lastpixel || col == width-1) { int neighbor_count = 10 * (col-lastcol); @@ -1758,6 +1874,10 @@ static colormap *find_best_palette(histogram *hist, const liq_attr *options, con // at this point actual gamma is not set, so very conservative posterization estimate is used const double target_mse = MIN(max_mse, MAX(options->target_mse, pow((1<min_posterization_output)/1024.0, 2))); int feedback_loop_trials = options->feedback_loop_trials; + if (hist->size > 5000) {feedback_loop_trials = (feedback_loop_trials*3 + 3)/4;} + if (hist->size > 25000) {feedback_loop_trials = (feedback_loop_trials*3 + 3)/4;} + if (hist->size > 50000) {feedback_loop_trials = (feedback_loop_trials*3 + 3)/4;} + if (hist->size > 100000) {feedback_loop_trials = (feedback_loop_trials*3 + 3)/4;} colormap *acolormap = NULL; double least_error = MAX_DIFF; double target_mse_overshoot = feedback_loop_trials>0 ? 1.05 : 1.0; @@ -1767,7 +1887,7 @@ static colormap *find_best_palette(histogram *hist, const liq_attr *options, con colormap *newmap; if (hist->size && fixed_colors_count < max_colors) { newmap = mediancut(hist, max_colors-fixed_colors_count, target_mse * target_mse_overshoot, MAX(MAX(45.0/65536.0, target_mse), least_error)*1.2, - options->malloc, options->free); + options->malloc, options->free); } else { feedback_loop_trials = 0; newmap = NULL; @@ -1864,7 +1984,7 @@ LIQ_NONNULL static liq_error pngquant_quantize(histogram *hist, const liq_attr * } // K-Means iteration approaches local minimum for the palette - const double iteration_limit = options->kmeans_iteration_limit; + double iteration_limit = options->kmeans_iteration_limit; unsigned int iterations = options->kmeans_iterations; if (!iterations && palette_error < 0 && max_mse < MAX_DIFF) iterations = 1; // otherwise total error is never calculated and MSE limit won't work @@ -1877,6 +1997,11 @@ LIQ_NONNULL static liq_error pngquant_quantize(histogram *hist, const liq_attr * } } + if (hist->size > 5000) {iterations = (iterations*3 + 3)/4;} + if (hist->size > 25000) {iterations = (iterations*3 + 3)/4;} + if (hist->size > 50000) {iterations = (iterations*3 + 3)/4;} + if (hist->size > 100000) {iterations = (iterations*3 + 3)/4; iteration_limit *= 2;} + verbose_print(options, " moving colormap towards local minimum"); double previous_palette_error = MAX_DIFF; @@ -1958,7 +2083,7 @@ LIQ_EXPORT LIQ_NONNULL liq_error liq_write_remapped_image(liq_result *result, li return LIQ_BUFFER_TOO_SMALL; } - unsigned char *rows[input_image->height]; + LIQ_ARRAY(unsigned char *, rows, input_image->height); unsigned char *buffer_bytes = buffer; for(unsigned int i=0; i < input_image->height; i++) { rows[i] = &buffer_bytes[input_image->width * i]; @@ -1998,11 +2123,13 @@ LIQ_EXPORT LIQ_NONNULL liq_error liq_write_remapped_image_rows(liq_result *quant set_rounded_palette(&result->int_palette, result->palette, result->gamma, quant->min_posterization_output); remapping_error = remap_to_palette(input_image, row_pointers, result->palette); } else { - const bool generate_dither_map = result->use_dither_map && (input_image->edges && !input_image->dither_map); + const bool is_image_huge = (input_image->width * input_image->height) > 2000 * 2000; + const bool allow_dither_map = result->use_dither_map == 2 || (!is_image_huge && result->use_dither_map); + const bool generate_dither_map = allow_dither_map && (input_image->edges && !input_image->dither_map); if (generate_dither_map) { // If dithering (with dither map) is required, this image is used to find areas that require dithering remapping_error = remap_to_palette(input_image, row_pointers, result->palette); - update_dither_map(row_pointers, input_image); + update_dither_map(input_image, row_pointers, result->palette); } if (liq_remap_progress(result, result->progress_stage1 * 0.5f)) { diff --git a/libimagequant.h b/libimagequant.h index c86f5a5..c8f84b5 100644 --- a/libimagequant.h +++ b/libimagequant.h @@ -13,8 +13,8 @@ #define LIQ_EXPORT extern #endif -#define LIQ_VERSION 20900 -#define LIQ_VERSION_STRING "2.9.0" +#define LIQ_VERSION 21200 +#define LIQ_VERSION_STRING "2.12.2" #ifndef LIQ_PRIVATE #if defined(__GNUC__) || defined (__llvm__) @@ -60,7 +60,11 @@ typedef enum liq_error { LIQ_UNSUPPORTED, } liq_error; -enum liq_ownership {LIQ_OWN_ROWS=4, LIQ_OWN_PIXELS=8}; +enum liq_ownership { + LIQ_OWN_ROWS=4, + LIQ_OWN_PIXELS=8, + LIQ_COPY_PIXELS=16, +}; typedef struct liq_histogram_entry { liq_color color; @@ -75,6 +79,7 @@ LIQ_EXPORT void liq_attr_destroy(liq_attr *attr) LIQ_NONNULL; LIQ_EXPORT LIQ_USERESULT liq_histogram* liq_histogram_create(const liq_attr* attr); LIQ_EXPORT liq_error liq_histogram_add_image(liq_histogram *hist, const liq_attr *attr, liq_image* image) LIQ_NONNULL; LIQ_EXPORT liq_error liq_histogram_add_colors(liq_histogram *hist, const liq_attr *attr, const liq_histogram_entry entries[], int num_entries, double gamma) LIQ_NONNULL; +LIQ_EXPORT liq_error liq_histogram_add_fixed_color(liq_histogram *hist, liq_color color, double gamma) LIQ_NONNULL; LIQ_EXPORT void liq_histogram_destroy(liq_histogram *hist) LIQ_NONNULL; LIQ_EXPORT liq_error liq_set_max_colors(liq_attr* attr, int colors) LIQ_NONNULL; @@ -107,6 +112,8 @@ typedef void liq_image_get_rgba_row_callback(liq_color row_out[], int row, int w LIQ_EXPORT LIQ_USERESULT liq_image *liq_image_create_custom(const liq_attr *attr, liq_image_get_rgba_row_callback *row_callback, void* user_info, int width, int height, double gamma); LIQ_EXPORT liq_error liq_image_set_memory_ownership(liq_image *image, int ownership_flags) LIQ_NONNULL; +LIQ_EXPORT liq_error liq_image_set_background(liq_image *img, liq_image *background_image) LIQ_NONNULL; +LIQ_EXPORT liq_error liq_image_set_importance_map(liq_image *img, unsigned char buffer[], size_t buffer_size, enum liq_ownership memory_handling) LIQ_NONNULL; LIQ_EXPORT liq_error liq_image_add_fixed_color(liq_image *img, liq_color color) LIQ_NONNULL; LIQ_EXPORT LIQ_USERESULT int liq_image_get_width(const liq_image *img) LIQ_NONNULL; LIQ_EXPORT LIQ_USERESULT int liq_image_get_height(const liq_image *img) LIQ_NONNULL; diff --git a/mediancut.c b/mediancut.c index 6e3e590..447a4af 100644 --- a/mediancut.c +++ b/mediancut.c @@ -1,32 +1,9 @@ /* -** © 2009-2015 by Kornel Lesiński. +** © 2009-2018 by Kornel Lesiński. +** © 1989, 1991 by Jef Poskanzer. +** © 1997, 2000, 2002 by Greg Roelofs; based on an idea by Stefan Schneider. ** -** This file is part of libimagequant. -** -** libimagequant is free software: you can redistribute it and/or modify -** it under the terms of the GNU General Public License as published by -** the Free Software Foundation, either version 3 of the License, or -** (at your option) any later version. -** -** libimagequant is distributed in the hope that it will be useful, -** but WITHOUT ANY WARRANTY; without even the implied warranty of -** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -** GNU General Public License for more details. -** -** You should have received a copy of the GNU General Public License -** along with libimagequant. If not, see . -*/ -/* -** Copyright (C) 1989, 1991 by Jef Poskanzer. -** Copyright (C) 1997, 2000, 2002 by Greg Roelofs; based on an idea by -** Stefan Schneider. -** -** Permission to use, copy, modify, and distribute this software and its -** documentation for any purpose and without fee is hereby granted, provided -** that the above copyright notice appear in all copies and that both that -** copyright notice and this permission notice appear in supporting -** documentation. This software is provided "as is" without express or -** implied warranty. +** See COPYRIGHT file for license. */ #include @@ -141,7 +118,7 @@ inline static unsigned int qsort_partition(hist_item *const base, const unsigned } /** quick select algorithm */ -static void hist_item_sort_range(hist_item *base, unsigned int len, unsigned int sort_start) +static void hist_item_sort_range(hist_item base[], unsigned int len, unsigned int sort_start) { for(;;) { const unsigned int l = qsort_partition(base, len), r = l+1; @@ -157,7 +134,7 @@ static void hist_item_sort_range(hist_item *base, unsigned int len, unsigned int } /** sorts array to make sum of weights lower than halfvar one side, returns edge between halfvar parts of the set */ -static hist_item *hist_item_sort_halfvar(hist_item *base, unsigned int len, double *const lowervar, const double halfvar) +static hist_item *hist_item_sort_halfvar(hist_item base[], unsigned int len, double *const lowervar, const double halfvar) { do { const unsigned int l = qsort_partition(base, len), r = l+1; @@ -198,7 +175,7 @@ typedef struct { static int comparevariance(const void *ch1, const void *ch2) { return ((const channelvariance*)ch1)->variance > ((const channelvariance*)ch2)->variance ? -1 : - (((const channelvariance*)ch1)->variance < ((const channelvariance*)ch2)->variance ? 1 : 0); + (((const channelvariance*)ch1)->variance < ((const channelvariance*)ch2)->variance ? 1 : 0); } /** Finds which channels need to be sorted first and preproceses achv for fast sort */ @@ -216,12 +193,16 @@ static double prepare_sort(struct box *b, hist_item achv[]) qsort(channels, 4, sizeof(channels[0]), comparevariance); - for(unsigned int i=0; i < b->colors; i++) { - const float *chans = (const float *)&achv[b->ind + i].acolor; + const unsigned int ind1 = b->ind; + const unsigned int colors = b->colors; + #pragma omp parallel for if (colors > 25000) \ + schedule(static) default(none) shared(achv, channels) + for(unsigned int i=0; i < colors; i++) { + const float *chans = (const float *)&achv[ind1 + i].acolor; // Only the first channel really matters. When trying median cut many times // with different histogram weights, I don't want sort randomness to influence outcome. - achv[b->ind + i].tmp.sort_value = ((unsigned int)(chans[channels[0].chan]*65535.0)<<16) | - (unsigned int)((chans[channels[2].chan] + chans[channels[1].chan]/2.0 + chans[channels[3].chan]/4.0)*65535.0); + achv[ind1 + i].tmp.sort_value = ((unsigned int)(chans[channels[0].chan]*65535.0)<<16) | + (unsigned int)((chans[channels[2].chan] + chans[channels[1].chan]/2.0 + chans[channels[3].chan]/4.0)*65535.0); } const f_pixel median = get_median(b, achv); @@ -229,6 +210,8 @@ static double prepare_sort(struct box *b, hist_item achv[]) // box will be split to make color_weight of each side even const unsigned int ind = b->ind, end = ind+b->colors; double totalvar = 0; + #pragma omp parallel for if (end - ind > 15000) \ + schedule(static) default(shared) reduction(+:totalvar) for(unsigned int j=ind; j < end; j++) totalvar += (achv[j].color_weight = color_weight(median, achv[j])); return totalvar / 2.0; } @@ -251,7 +234,7 @@ static f_pixel get_median(const struct box *b, hist_item achv[]) /* ** Find the best splittable box. -1 if no boxes are splittable. */ -static int best_splittable_box(struct box* bv, unsigned int boxes, const double max_mse) +static int best_splittable_box(struct box bv[], unsigned int boxes, const double max_mse) { int bi=-1; double maxsum=0; for(unsigned int i=0; i < boxes; i++) { @@ -281,8 +264,8 @@ inline static double color_weight(f_pixel median, hist_item h) return sqrt(diff) * (sqrt(1.0+h.adjusted_weight)-1.0); } -static void set_colormap_from_boxes(colormap *map, struct box* bv, unsigned int boxes, hist_item *achv); -static void adjust_histogram(hist_item *achv, const struct box* bv, unsigned int boxes); +static void set_colormap_from_boxes(colormap *map, struct box bv[], unsigned int boxes, hist_item *achv); +static void adjust_histogram(hist_item *achv, const struct box bv[], unsigned int boxes); static double box_error(const struct box *box, const hist_item achv[]) { @@ -326,8 +309,11 @@ static void box_init(struct box *box, const hist_item *achv, const unsigned int box->colors = colors; box->sum = sum; box->total_error = -1; + box->color = averagepixels(colors, &achv[ind]); + #pragma omp task if (colors > 5000) box->variance = box_variance(achv, box); + #pragma omp task if (colors > 8000) box->max_error = box_max_error(achv, box); } @@ -339,67 +325,78 @@ static void box_init(struct box *box, const hist_item *achv, const unsigned int LIQ_PRIVATE colormap *mediancut(histogram *hist, unsigned int newcolors, const double target_mse, const double max_mse, void* (*malloc)(size_t), void (*free)(void*)) { hist_item *achv = hist->achv; - struct box bv[newcolors]; + LIQ_ARRAY(struct box, bv, newcolors); + unsigned int boxes = 1; /* ** Set up the initial box. */ + #pragma omp parallel + #pragma omp single + { double sum = 0; - for(unsigned int i=0; i < hist->size; i++) { + for(unsigned int i=0; i < hist->size; i++) { sum += achv[i].adjusted_weight; } - box_init(&bv[0], achv, 0, hist->size, sum); + #pragma omp taskgroup + { + box_init(&bv[0], achv, 0, hist->size, sum); + } - unsigned int boxes = 1; - - /* - ** Main loop: split boxes until we have enough. - */ - while (boxes < newcolors) { - - // first splits boxes that exceed quality limit (to have colors for things like odd green pixel), - // later raises the limit to allow large smooth areas/gradients get colors. - const double current_max_mse = max_mse + (boxes/(double)newcolors)*16.0*max_mse; - const int bi = best_splittable_box(bv, boxes, current_max_mse); - if (bi < 0) - break; /* ran out of colors! */ - - unsigned int indx = bv[bi].ind; - unsigned int clrs = bv[bi].colors; /* - Classic implementation tries to get even number of colors or pixels in each subdivision. - - Here, instead of popularity I use (sqrt(popularity)*variance) metric. - Each subdivision balances number of pixels (popular colors) and low variance - - boxes can be large if they have similar colors. Later boxes with high variance - will be more likely to be split. - - Median used as expected value gives much better results than mean. + ** Main loop: split boxes until we have enough. */ + while (boxes < newcolors) { - const double halfvar = prepare_sort(&bv[bi], achv); - double lowervar=0; + // first splits boxes that exceed quality limit (to have colors for things like odd green pixel), + // later raises the limit to allow large smooth areas/gradients get colors. + const double current_max_mse = max_mse + (boxes/(double)newcolors)*16.0*max_mse; + const int bi = best_splittable_box(bv, boxes, current_max_mse); + if (bi < 0) { + break; /* ran out of colors! */ + } - // hist_item_sort_halfvar sorts and sums lowervar at the same time - // returns item to break at …minus one, which does smell like an off-by-one error. - hist_item *break_p = hist_item_sort_halfvar(&achv[indx], clrs, &lowervar, halfvar); - unsigned int break_at = MIN(clrs-1, break_p - &achv[indx] + 1); + unsigned int indx = bv[bi].ind; + unsigned int clrs = bv[bi].colors; - /* - ** Split the box. - */ - double sm = bv[bi].sum; - double lowersum = 0; - for(unsigned int i=0; i < break_at; i++) lowersum += achv[indx + i].adjusted_weight; + /* + Classic implementation tries to get even number of colors or pixels in each subdivision. - box_init(&bv[bi], achv, bv[bi].ind, break_at, lowersum); - box_init(&bv[boxes], achv, indx + break_at, clrs - break_at, sm - lowersum); + Here, instead of popularity I use (sqrt(popularity)*variance) metric. + Each subdivision balances number of pixels (popular colors) and low variance - + boxes can be large if they have similar colors. Later boxes with high variance + will be more likely to be split. - ++boxes; + Median used as expected value gives much better results than mean. + */ - if (total_box_error_below_target(target_mse, bv, boxes, hist)) { - break; + const double halfvar = prepare_sort(&bv[bi], achv); + double lowervar=0; + + // hist_item_sort_halfvar sorts and sums lowervar at the same time + // returns item to break at …minus one, which does smell like an off-by-one error. + hist_item *break_p = hist_item_sort_halfvar(&achv[indx], clrs, &lowervar, halfvar); + unsigned int break_at = MIN(clrs-1, break_p - &achv[indx] + 1); + + /* + ** Split the box. + */ + double sm = bv[bi].sum; + double lowersum = 0; + for(unsigned int i=0; i < break_at; i++) lowersum += achv[indx + i].adjusted_weight; + + #pragma omp taskgroup + { + box_init(&bv[bi], achv, indx, break_at, lowersum); + box_init(&bv[boxes], achv, indx + break_at, clrs - break_at, sm - lowersum); + } + + ++boxes; + + if (total_box_error_below_target(target_mse, bv, boxes, hist)) { + break; + } } } @@ -446,6 +443,8 @@ static f_pixel averagepixels(unsigned int clrs, const hist_item achv[]) { double r = 0, g = 0, b = 0, a = 0, sum = 0; + #pragma omp parallel for if (clrs > 25000) \ + schedule(static) default(shared) reduction(+:a) reduction(+:r) reduction(+:g) reduction(+:b) reduction(+:sum) for(unsigned int i = 0; i < clrs; i++) { const f_pixel px = achv[i].acolor; const double weight = achv[i].adjusted_weight; diff --git a/mempool.c b/mempool.c index cd345a7..cd49f59 100644 --- a/mempool.c +++ b/mempool.c @@ -1,20 +1,9 @@ /* -© 2011-2016 by Kornel Lesiński. - -This file is part of libimagequant. - -libimagequant is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -libimagequant is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with libimagequant. If not, see . +** © 2009-2017 by Kornel Lesiński. +** © 1989, 1991 by Jef Poskanzer. +** © 1997, 2000, 2002 by Greg Roelofs; based on an idea by Stefan Schneider. +** +** See COPYRIGHT file for license. */ #include "libimagequant.h" @@ -32,7 +21,7 @@ struct mempool { void (*free)(void*); struct mempool *next; }; -LIQ_PRIVATE void* mempool_create(mempool *mptr, const unsigned int size, unsigned int max_size, void* (*malloc)(size_t), void (*free)(void*)) +LIQ_PRIVATE void* mempool_create(mempoolptr *mptr, const unsigned int size, unsigned int max_size, void* (*malloc)(size_t), void (*free)(void*)) { if (*mptr && ((*mptr)->used+size) <= (*mptr)->size) { unsigned int prevused = (*mptr)->used; @@ -40,7 +29,7 @@ LIQ_PRIVATE void* mempool_create(mempool *mptr, const unsigned int size, unsigne return ((char*)(*mptr)) + prevused; } - mempool old = *mptr; + mempoolptr old = *mptr; if (!max_size) max_size = (1<<17); max_size = size+ALIGN_MASK > max_size ? size+ALIGN_MASK : max_size; @@ -60,7 +49,7 @@ LIQ_PRIVATE void* mempool_create(mempool *mptr, const unsigned int size, unsigne return mempool_alloc(mptr, size, size); } -LIQ_PRIVATE void* mempool_alloc(mempool *mptr, const unsigned int size, const unsigned int max_size) +LIQ_PRIVATE void* mempool_alloc(mempoolptr *mptr, const unsigned int size, const unsigned int max_size) { if (((*mptr)->used+size) <= (*mptr)->size) { unsigned int prevused = (*mptr)->used; @@ -71,10 +60,10 @@ LIQ_PRIVATE void* mempool_alloc(mempool *mptr, const unsigned int size, const un return mempool_create(mptr, size, max_size, (*mptr)->malloc, (*mptr)->free); } -LIQ_PRIVATE void mempool_destroy(mempool m) +LIQ_PRIVATE void mempool_destroy(mempoolptr m) { while (m) { - mempool next = m->next; + mempoolptr next = m->next; m->free(m); m = next; } diff --git a/mempool.h b/mempool.h index 0797e7c..9b7333b 100644 --- a/mempool.h +++ b/mempool.h @@ -4,10 +4,10 @@ #include struct mempool; -typedef struct mempool *mempool; +typedef struct mempool *mempoolptr; -LIQ_PRIVATE void* mempool_create(mempool *mptr, const unsigned int size, unsigned int capacity, void* (*malloc)(size_t), void (*free)(void*)); -LIQ_PRIVATE void* mempool_alloc(mempool *mptr, const unsigned int size, const unsigned int capacity); -LIQ_PRIVATE void mempool_destroy(mempool m); +LIQ_PRIVATE void* mempool_create(mempoolptr *mptr, const unsigned int size, unsigned int capacity, void* (*malloc)(size_t), void (*free)(void*)); +LIQ_PRIVATE void* mempool_alloc(mempoolptr *mptr, const unsigned int size, const unsigned int capacity); +LIQ_PRIVATE void mempool_destroy(mempoolptr m); #endif diff --git a/nearest.c b/nearest.c index 2f1e926..aeb4dc1 100644 --- a/nearest.c +++ b/nearest.c @@ -1,20 +1,9 @@ /* -© 2011-2015 by Kornel Lesiński. - -This file is part of libimagequant. - -libimagequant is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -libimagequant is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with libimagequant. If not, see . +** © 2009-2015 by Kornel Lesiński. +** © 1989, 1991 by Jef Poskanzer. +** © 1997, 2000, 2002 by Greg Roelofs; based on an idea by Stefan Schneider. +** +** See COPYRIGHT file for license. */ #include "libimagequant.h" @@ -45,7 +34,7 @@ struct nearest_map { vp_node *root; const colormap_item *palette; float nearest_other_color_dist[256]; - mempool mempool; + mempoolptr mempool; }; static void vp_search_node(const vp_node *node, const f_pixel *const needle, vp_search_tmp *const best_candidate); @@ -56,7 +45,7 @@ static int vp_compare_distance(const void *ap, const void *bp) { return a > b ? 1 : -1; } -static void vp_sort_indexes_by_distance(const f_pixel vantage_point, vp_sort_tmp *indexes, int num_indexes, const colormap_item items[]) { +static void vp_sort_indexes_by_distance(const f_pixel vantage_point, vp_sort_tmp indexes[], int num_indexes, const colormap_item items[]) { for(int i=0; i < num_indexes; i++) { indexes[i].distance_squared = colordifference(vantage_point, items[indexes[i].idx].acolor); } @@ -66,7 +55,7 @@ static void vp_sort_indexes_by_distance(const f_pixel vantage_point, vp_sort_tmp /* * Usually it should pick farthest point, but picking most popular point seems to make search quicker anyway */ -static int vp_find_best_vantage_point_index(vp_sort_tmp *indexes, int num_indexes, const colormap_item items[]) { +static int vp_find_best_vantage_point_index(vp_sort_tmp indexes[], int num_indexes, const colormap_item items[]) { int best = 0; float best_popularity = items[indexes[0].idx].popularity; for(int i = 1; i < num_indexes; i++) { @@ -78,7 +67,7 @@ static int vp_find_best_vantage_point_index(vp_sort_tmp *indexes, int num_indexe return best; } -static vp_node *vp_create_node(mempool *m, vp_sort_tmp *indexes, int num_indexes, const colormap_item items[]) { +static vp_node *vp_create_node(mempoolptr *m, vp_sort_tmp indexes[], int num_indexes, const colormap_item items[]) { if (num_indexes <= 0) { return NULL; } @@ -118,10 +107,10 @@ static vp_node *vp_create_node(mempool *m, vp_sort_tmp *indexes, int num_indexes } LIQ_PRIVATE struct nearest_map *nearest_init(const colormap *map) { - mempool m = NULL; + mempoolptr m = NULL; struct nearest_map *handle = mempool_create(&m, sizeof(handle[0]), sizeof(handle[0]) + sizeof(vp_node)*map->colors+16, map->malloc, map->free); - vp_sort_tmp indexes[map->colors]; + LIQ_ARRAY(vp_sort_tmp, indexes, map->colors); for(unsigned int i=0; i < map->colors; i++) { indexes[i].idx = i; diff --git a/pam.c b/pam.c index 74165e1..660f829 100644 --- a/pam.c +++ b/pam.c @@ -1,16 +1,10 @@ /* pam.c - pam (portable alpha map) utility library ** -** Copyright (C) 1989, 1991 by Jef Poskanzer. -** Copyright (C) 1997, 2000, 2002 by Greg Roelofs; based on an idea by -** Stefan Schneider. -** © 2009-2016 by Kornel Lesinski. +** © 2009-2017 by Kornel Lesiński. +** © 1989, 1991 by Jef Poskanzer. +** © 1997, 2000, 2002 by Greg Roelofs; based on an idea by Stefan Schneider. ** -** Permission to use, copy, modify, and distribute this software and its -** documentation for any purpose and without fee is hereby granted, provided -** that the above copyright notice appear in all copies and that both that -** copyright notice and this permission notice appear in supporting -** documentation. This software is provided "as is" without express or -** implied warranty. +** See COPYRIGHT file for license. */ #include @@ -33,11 +27,8 @@ LIQ_PRIVATE bool pam_computeacolorhash(struct acolorhash_table *acht, const rgba /* Go through the entire image, building a hash table of colors. */ for(unsigned int row = 0; row < rows; ++row) { - float boost=1.0; for(unsigned int col = 0; col < cols; ++col) { - if (importance_map) { - boost = 0.5f+ (double)*importance_map++/255.f; - } + unsigned int boost; // RGBA color is casted to long for easier hasing/comparisons union rgba_as_int px = {pixels[row][col]}; @@ -45,12 +36,22 @@ LIQ_PRIVATE bool pam_computeacolorhash(struct acolorhash_table *acht, const rgba if (!px.rgba.a) { // "dirty alpha" has different RGBA values that end up being the same fully transparent color px.l=0; hash=0; - boost = 10; + + boost = 2000; + if (importance_map) { + importance_map++; + } } else { // mask posterizes all 4 channels in one go px.l = (px.l & posterize_mask) | ((px.l & posterize_high_mask) >> (8-ignorebits)); // fancier hashing algorithms didn't improve much hash = px.l % hash_size; + + if (importance_map) { + boost = *importance_map++; + } else { + boost = 255; + } } if (!pam_add_to_hash(acht, hash, boost, px, row, rows)) { @@ -64,7 +65,7 @@ LIQ_PRIVATE bool pam_computeacolorhash(struct acolorhash_table *acht, const rgba return true; } -LIQ_PRIVATE bool pam_add_to_hash(struct acolorhash_table *acht, unsigned int hash, float boost, union rgba_as_int px, unsigned int row, unsigned int rows) +LIQ_PRIVATE bool pam_add_to_hash(struct acolorhash_table *acht, unsigned int hash, unsigned int boost, union rgba_as_int px, unsigned int row, unsigned int rows) { /* head of the hash function stores first 2 colors inline (achl->used = 1..2), to reduce number of allocations of achl->other_items. @@ -160,7 +161,7 @@ LIQ_PRIVATE struct acolorhash_table *pam_allocacolorhash(unsigned int maxcolors, const size_t estimated_colors = MIN(maxcolors, surface/(ignorebits + (surface > 512*512 ? 6 : 5))); const size_t hash_size = estimated_colors < 66000 ? 6673 : (estimated_colors < 200000 ? 12011 : 24019); - mempool m = NULL; + mempoolptr m = NULL; const size_t buckets_size = hash_size * sizeof(struct acolorhist_arr_head); const size_t mempool_size = sizeof(struct acolorhash_table) + buckets_size + estimated_colors * sizeof(struct acolorhist_arr_item); struct acolorhash_table *t = mempool_create(&m, sizeof(*t) + buckets_size, mempool_size, malloc, free); @@ -175,11 +176,15 @@ LIQ_PRIVATE struct acolorhash_table *pam_allocacolorhash(unsigned int maxcolors, return t; } -ALWAYS_INLINE static float pam_add_to_hist(const float *gamma_lut, hist_item *achv, unsigned int j, const struct acolorhist_arr_item *entry, const float max_perceptual_weight) +ALWAYS_INLINE static float pam_add_to_hist(const float *gamma_lut, hist_item *achv, unsigned int *j, const struct acolorhist_arr_item *entry, const float max_perceptual_weight) { - achv[j].acolor = rgba_to_f(gamma_lut, entry->color.rgba); - const float w = MIN(entry->perceptual_weight, max_perceptual_weight); - achv[j].adjusted_weight = achv[j].perceptual_weight = w; + if (entry->perceptual_weight == 0) { + return 0; + } + const float w = MIN(entry->perceptual_weight/128.f, max_perceptual_weight); + achv[*j].adjusted_weight = achv[*j].perceptual_weight = w; + achv[*j].acolor = rgba_to_f(gamma_lut, entry->color.rgba); + *j += 1; return w; } @@ -203,22 +208,27 @@ LIQ_PRIVATE histogram *pam_acolorhashtoacolorhist(const struct acolorhash_table float max_perceptual_weight = 0.1f * acht->cols * acht->rows; double total_weight = 0; - for(unsigned int j=0, i=0; i < acht->hash_size; ++i) { + unsigned int j=0; + for(unsigned int i=0; i < acht->hash_size; ++i) { const struct acolorhist_arr_head *const achl = &acht->buckets[i]; if (achl->used) { - total_weight += pam_add_to_hist(gamma_lut, hist->achv, j++, &achl->inline1, max_perceptual_weight); + total_weight += pam_add_to_hist(gamma_lut, hist->achv, &j, &achl->inline1, max_perceptual_weight); if (achl->used > 1) { - total_weight += pam_add_to_hist(gamma_lut, hist->achv, j++, &achl->inline2, max_perceptual_weight); + total_weight += pam_add_to_hist(gamma_lut, hist->achv, &j, &achl->inline2, max_perceptual_weight); for(unsigned int k=0; k < achl->used-2; k++) { - total_weight += pam_add_to_hist(gamma_lut, hist->achv, j++, &achl->other_items[k], max_perceptual_weight); + total_weight += pam_add_to_hist(gamma_lut, hist->achv, &j, &achl->other_items[k], max_perceptual_weight); } } } } - + hist->size = j; hist->total_perceptual_weight = total_weight; + if (!j) { + pam_freeacolorhist(hist); + return NULL; + } return hist; } diff --git a/pam.h b/pam.h index 6657a14..2ca4327 100644 --- a/pam.h +++ b/pam.h @@ -62,6 +62,12 @@ # define SSE_ALIGN #endif +#ifndef _MSC_VER +#define LIQ_ARRAY(type, var, count) type var[count] +#else +#define LIQ_ARRAY(type, var, count) type* var = (type*)_alloca(sizeof(type)*(count)) +#endif + #if defined(__GNUC__) || defined (__llvm__) #define ALWAYS_INLINE __attribute__((always_inline)) inline #define NEVER_INLINE __attribute__ ((noinline)) @@ -85,7 +91,7 @@ typedef struct { float a, r, g, b; } SSE_ALIGN f_pixel; -static const double internal_gamma = 0.5499; +static const float internal_gamma = 0.5499f; LIQ_PRIVATE void to_f_set_gamma(float gamma_lut[], const double gamma); @@ -238,7 +244,7 @@ typedef struct colormap { struct acolorhist_arr_item { union rgba_as_int color; - float perceptual_weight; + unsigned int perceptual_weight; }; struct acolorhist_arr_head { @@ -260,7 +266,7 @@ LIQ_PRIVATE void pam_freeacolorhash(struct acolorhash_table *acht); LIQ_PRIVATE struct acolorhash_table *pam_allocacolorhash(unsigned int maxcolors, unsigned int surface, unsigned int ignorebits, void* (*malloc)(size_t), void (*free)(void*)); LIQ_PRIVATE histogram *pam_acolorhashtoacolorhist(const struct acolorhash_table *acht, const double gamma, void* (*malloc)(size_t), void (*free)(void*)); LIQ_PRIVATE bool pam_computeacolorhash(struct acolorhash_table *acht, const rgba_pixel *const pixels[], unsigned int cols, unsigned int rows, const unsigned char *importance_map); -LIQ_PRIVATE bool pam_add_to_hash(struct acolorhash_table *acht, unsigned int hash, float boost, union rgba_as_int px, unsigned int row, unsigned int rows); +LIQ_PRIVATE bool pam_add_to_hash(struct acolorhash_table *acht, unsigned int hash, unsigned int boost, union rgba_as_int px, unsigned int row, unsigned int rows); LIQ_PRIVATE void pam_freeacolorhist(histogram *h);