Main Page | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Class Members | File Members

overlayframe.C

Go to the documentation of this file.
00001 #include <math.h>
00002 #include <stdio.h>
00003 #include <string.h>
00004 #include <stdint.h>
00005 #include <stdlib.h>
00006 #include <unistd.h>
00007 
00008 #include "clip.h"
00009 #include "edl.inc"
00010 #include "mutex.h"
00011 #include "overlayframe.h"
00012 #include "units.h"
00013 #include "vframe.h"
00014 
00015 // Easy abstraction of the float and int types.  Most of these are never used
00016 // but GCC expects them.
00017 static int my_abs(int32_t x)
00018 {
00019         return abs(x);
00020 }
00021 
00022 static int my_abs(uint32_t x)
00023 {
00024         return x;
00025 }
00026 
00027 static int my_abs(int64_t x)
00028 {
00029         return llabs(x);
00030 }
00031 
00032 static int my_abs(uint64_t x)
00033 {
00034         return x;
00035 }
00036 
00037 static float my_abs(float x)
00038 {
00039         return fabsf(x);
00040 }
00041 
00042 
00043 
00044 
00045 OverlayFrame::OverlayFrame(int cpus)
00046 {
00047         temp_frame = 0;
00048         blend_engine = 0;
00049         scale_engine = 0;
00050         scaletranslate_engine = 0;
00051         translate_engine = 0;
00052         this->cpus = cpus;
00053 }
00054 
00055 OverlayFrame::~OverlayFrame()
00056 {
00057         if(temp_frame) delete temp_frame;
00058         if(scale_engine) delete scale_engine;
00059         if(translate_engine) delete translate_engine;
00060         if(blend_engine) delete blend_engine;
00061         if(scaletranslate_engine) delete scaletranslate_engine;
00062 }
00063 
00064 
00065 
00066 
00067 
00068 
00069 
00070 
00071 // Verification: 
00072 
00073 // (255 * 255 + 0 * 0) / 255 = 255
00074 // (255 * 127 + 255 * (255 - 127)) / 255 = 255
00075 
00076 // (65535 * 65535 + 0 * 0) / 65535 = 65535
00077 // (65535 * 32767 + 65535 * (65535 - 32767)) / 65535 = 65535
00078 
00079 
00080 // Branch prediction 4 U
00081 
00082 #define BLEND_3(max, temp_type, type, chroma_offset) \
00083 { \
00084         temp_type r, g, b; \
00085  \
00086 /* if(mode != TRANSFER_NORMAL) printf("BLEND mode = %d\n", mode); */ \
00087         switch(mode) \
00088         { \
00089                 case TRANSFER_DIVIDE: \
00090                         r = output[0] ? (((temp_type)input1 * max) / output[0]) : max; \
00091                         if(chroma_offset) \
00092                         { \
00093                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
00094                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
00095                         } \
00096                         else \
00097                         { \
00098                                 g = output[1] ? (temp_type)input2 * max / (temp_type)output[1] : max; \
00099                                 b = output[2] ? (temp_type)input3 * max / (temp_type)output[2] : max; \
00100                         } \
00101                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
00102                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
00103                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
00104                         break; \
00105                 case TRANSFER_MULTIPLY: \
00106                         r = ((temp_type)input1 * output[0]) / max; \
00107                         if(chroma_offset) \
00108                         { \
00109                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
00110                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
00111                         } \
00112                         else \
00113                         { \
00114                                 g = (temp_type)input2 * (temp_type)output[1] / max; \
00115                                 b = (temp_type)input3 * (temp_type)output[2] / max; \
00116                         } \
00117                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
00118                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
00119                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
00120                         break; \
00121                 case TRANSFER_SUBTRACT: \
00122                         r = (temp_type)output[0] - (temp_type)input1; \
00123                         g = ((temp_type)output[1] - (temp_type)chroma_offset) - \
00124                                 ((temp_type)input2 - (temp_type)chroma_offset) + \
00125                                 (temp_type)chroma_offset; \
00126                         b = ((temp_type)output[2] - (temp_type)chroma_offset) - \
00127                                 ((temp_type)input3 - (temp_type)chroma_offset) + \
00128                                 (temp_type)chroma_offset; \
00129                         r = (r * opacity + output[0] * transparency) / max; \
00130                         g = (g * opacity + output[1] * transparency) / max; \
00131                         b = (b * opacity + output[2] * transparency) / max; \
00132                         break; \
00133                 case TRANSFER_ADDITION: \
00134                         r = (temp_type)input1 + output[0]; \
00135                         g = ((temp_type)input2 - chroma_offset) + \
00136                                 ((temp_type)output[1] - chroma_offset) + \
00137                                 (temp_type)chroma_offset; \
00138                         b = ((temp_type)input3 - chroma_offset) + \
00139                                 ((temp_type)output[2] - chroma_offset) + \
00140                                 (temp_type)chroma_offset; \
00141                         r = (r * opacity + output[0] * transparency) / max; \
00142                         g = (g * opacity + output[1] * transparency) / max; \
00143                         b = (b * opacity + output[2] * transparency) / max; \
00144                         break; \
00145                 case TRANSFER_REPLACE: \
00146                         r = input1; \
00147                         g = input2; \
00148                         b = input3; \
00149                         break; \
00150                 case TRANSFER_NORMAL: \
00151                         r = ((temp_type)input1 * opacity + output[0] * transparency) / max; \
00152                         g = ((temp_type)input2 * opacity + output[1] * transparency) / max; \
00153                         b = ((temp_type)input3 * opacity + output[2] * transparency) / max; \
00154                         break; \
00155         } \
00156  \
00157         if(sizeof(type) != 4) \
00158         { \
00159                 output[0] = (type)CLIP(r, 0, max); \
00160                 output[1] = (type)CLIP(g, 0, max); \
00161                 output[2] = (type)CLIP(b, 0, max); \
00162         } \
00163         else \
00164         { \
00165                 output[0] = r; \
00166                 output[1] = g; \
00167                 output[2] = b; \
00168         } \
00169 }
00170 
00171 
00172 
00173 
00174 
00175 // Blending equations are drastically different for 3 and 4 components
00176 #define BLEND_4(max, temp_type, type, chroma_offset) \
00177 { \
00178         temp_type r, g, b, a; \
00179         temp_type pixel_opacity, pixel_transparency; \
00180         temp_type output1 = output[0]; \
00181         temp_type output2 = output[1]; \
00182         temp_type output3 = output[2]; \
00183         temp_type output4 = output[3]; \
00184  \
00185         pixel_opacity = opacity * input4; \
00186         pixel_transparency = (temp_type)max * max - pixel_opacity; \
00187  \
00188         switch(mode) \
00189         { \
00190                 case TRANSFER_DIVIDE: \
00191                         r = output1 ? (((temp_type)input1 * max) / output1) : max; \
00192                         if(chroma_offset) \
00193                         { \
00194                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
00195                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
00196                         } \
00197                         else \
00198                         { \
00199                                 g = output2 ? (temp_type)input2 * max / (temp_type)output2 : max; \
00200                                 b = output3 ? (temp_type)input3 * max / (temp_type)output3 : max; \
00201                         } \
00202                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
00203                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
00204                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
00205                         a = input4 > output4 ? input4 : output4; \
00206                         break; \
00207                 case TRANSFER_MULTIPLY: \
00208                         r = ((temp_type)input1 * output1) / max; \
00209                         if(chroma_offset) \
00210                         { \
00211                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
00212                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
00213                         } \
00214                         else \
00215                         { \
00216                                 g = (temp_type)input2 * (temp_type)output2 / max; \
00217                                 b = (temp_type)input3 * (temp_type)output3 / max; \
00218                         } \
00219                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
00220                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
00221                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
00222                         a = input4 > output4 ? input4 : output4; \
00223                         break; \
00224                 case TRANSFER_SUBTRACT: \
00225                         r = (temp_type)input1 - output1; \
00226                         g = ((temp_type)output2 - chroma_offset) - \
00227                                 ((temp_type)input2 - (temp_type)chroma_offset) + \
00228                                 (temp_type)chroma_offset; \
00229                         b = ((temp_type)output3 - chroma_offset) - \
00230                                 ((temp_type)input3 - (temp_type)chroma_offset) + \
00231                                 (temp_type)chroma_offset; \
00232                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
00233                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
00234                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
00235                         a = input4 > output4 ? input4 : output4; \
00236                         break; \
00237                 case TRANSFER_ADDITION: \
00238                         r = (temp_type)input1 + output1; \
00239                         g = ((temp_type)input2 - chroma_offset) + \
00240                                 ((temp_type)output2 - chroma_offset) + \
00241                                 chroma_offset; \
00242                         b = ((temp_type)input3 - chroma_offset) + \
00243                                 ((temp_type)output3 - chroma_offset) + \
00244                                 chroma_offset; \
00245                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
00246                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
00247                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
00248                         a = input4 > output4 ? input4 : output4; \
00249                         break; \
00250                 case TRANSFER_REPLACE: \
00251                         r = input1; \
00252                         g = input2; \
00253                         b = input3; \
00254                         a = input4; \
00255                         break; \
00256                 case TRANSFER_NORMAL: \
00257                         r = (input1 * pixel_opacity + \
00258                                 output1 * pixel_transparency) / max / max; \
00259                         g = ((input2 - chroma_offset) * pixel_opacity + \
00260                                 (output2 - chroma_offset) * pixel_transparency) \
00261                                 / max / max + \
00262                                 chroma_offset; \
00263                         b = ((input3 - chroma_offset) * pixel_opacity + \
00264                                 (output3 - chroma_offset) * pixel_transparency) \
00265                                 / max / max + \
00266                                 chroma_offset; \
00267                         a = input4 > output4 ? input4 : output4; \
00268                         break; \
00269         } \
00270  \
00271         if(sizeof(type) != 4) \
00272         { \
00273                 output[0] = (type)CLIP(r, 0, max); \
00274                 output[1] = (type)CLIP(g, 0, max); \
00275                 output[2] = (type)CLIP(b, 0, max); \
00276                 output[3] = (type)a; \
00277         } \
00278         else \
00279         { \
00280                 output[0] = r; \
00281                 output[1] = g; \
00282                 output[2] = b; \
00283                 output[3] = a; \
00284         } \
00285 }
00286 
00287 
00288 
00289 // Bicubic algorithm using multiprocessors
00290 // input -> scale nearest integer boundaries -> temp -> translation -> blend -> output
00291 
00292 // Nearest neighbor algorithm using multiprocessors for blending
00293 // input -> scale + translate -> blend -> output
00294 
00295 
00296 int OverlayFrame::overlay(VFrame *output, 
00297         VFrame *input, 
00298         float in_x1, 
00299         float in_y1, 
00300         float in_x2, 
00301         float in_y2, 
00302         float out_x1, 
00303         float out_y1, 
00304         float out_x2, 
00305         float out_y2, 
00306         float alpha,       // 0 - 1
00307         int mode,
00308         int interpolation_type)
00309 {
00310         float w_scale = (out_x2 - out_x1) / (in_x2 - in_x1);
00311         float h_scale = (out_y2 - out_y1) / (in_y2 - in_y1);
00312 
00313 
00314 
00315 
00316 
00317 
00318 
00319 
00320         if(isnan(in_x1) ||
00321                 isnan(in_y1) ||
00322                 isnan(in_x2) ||
00323                 isnan(in_y2) ||
00324                 isnan(out_x1) ||
00325                 isnan(out_y1) ||
00326                 isnan(out_x2) ||
00327                 isnan(out_y2)) return 1;
00328 // printf("OverlayFrame::overlay 1 %f %f %f %f -> %f %f %f %f scale=%f %f\n", in_x1,
00329 // in_y1,
00330 // in_x2,
00331 // in_y2,
00332 // out_x1,
00333 // out_y1,
00334 // out_x2,
00335 // out_y2,
00336 // out_x2 - out_x1, 
00337 // out_y2 - out_y1);
00338 
00339 // Limit values
00340         if(in_x1 < 0)
00341         {
00342                 out_x1 += -in_x1 * w_scale;
00343                 in_x1 = 0;
00344         }
00345         else
00346         if(in_x1 >= input->get_w())
00347         {
00348                 out_x1 -= (in_x1 - input->get_w()) * w_scale;
00349                 in_x1 = input->get_w();
00350         }
00351 
00352         if(in_y1 < 0)
00353         {
00354                 out_y1 += -in_y1 * h_scale;
00355                 in_y1 = 0;
00356         }
00357         else
00358         if(in_y1 >= input->get_h())
00359         {
00360                 out_y1 -= (in_y1 - input->get_h()) * h_scale;
00361                 in_y1 = input->get_h();
00362         }
00363 
00364         if(in_x2 < 0)
00365         {
00366                 out_x2 += -in_x2 * w_scale;
00367                 in_x2 = 0;
00368         }
00369         else
00370         if(in_x2 >= input->get_w())
00371         {
00372                 out_x2 -= (in_x2 - input->get_w()) * w_scale;
00373                 in_x2 = input->get_w();
00374         }
00375 
00376         if(in_y2 < 0)
00377         {
00378                 out_y2 += -in_y2 * h_scale;
00379                 in_y2 = 0;
00380         }
00381         else
00382         if(in_y2 >= input->get_h())
00383         {
00384                 out_y2 -= (in_y2 - input->get_h()) * h_scale;
00385                 in_y2 = input->get_h();
00386         }
00387 
00388         if(out_x1 < 0)
00389         {
00390                 in_x1 += -out_x1 / w_scale;
00391                 out_x1 = 0;
00392         }
00393         else
00394         if(out_x1 >= output->get_w())
00395         {
00396                 in_x1 -= (out_x1 - output->get_w()) / w_scale;
00397                 out_x1 = output->get_w();
00398         }
00399 
00400         if(out_y1 < 0)
00401         {
00402                 in_y1 += -out_y1 / h_scale;
00403                 out_y1 = 0;
00404         }
00405         else
00406         if(out_y1 >= output->get_h())
00407         {
00408                 in_y1 -= (out_y1 - output->get_h()) / h_scale;
00409                 out_y1 = output->get_h();
00410         }
00411 
00412         if(out_x2 < 0)
00413         {
00414                 in_x2 += -out_x2 / w_scale;
00415                 out_x2 = 0;
00416         }
00417         else
00418         if(out_x2 >= output->get_w())
00419         {
00420                 in_x2 -= (out_x2 - output->get_w()) / w_scale;
00421                 out_x2 = output->get_w();
00422         }
00423 
00424         if(out_y2 < 0)
00425         {
00426                 in_y2 += -out_y2 / h_scale;
00427                 out_y2 = 0;
00428         }
00429         else
00430         if(out_y2 >= output->get_h())
00431         {
00432                 in_y2 -= (out_y2 - output->get_h()) / h_scale;
00433                 out_y2 = output->get_h();
00434         }
00435 
00436 
00437 
00438 
00439 
00440 
00441 
00442 
00443 
00444 
00445         float in_w = in_x2 - in_x1;
00446         float in_h = in_y2 - in_y1;
00447         float out_w = out_x2 - out_x1;
00448         float out_h = out_y2 - out_y1;
00449 // Input for translation operation
00450         VFrame *translation_input = input;
00451 
00452 
00453         if(in_w <= 0 || in_h <= 0 || out_w <= 0 || out_h <= 0) return 0;
00454 
00455 
00456 // printf("OverlayFrame::overlay 2 %f %f %f %f -> %f %f %f %f\n", in_x1,
00457 //                      in_y1,
00458 //                      in_x2,
00459 //                      in_y2,
00460 //                      out_x1,
00461 //                      out_y1,
00462 //                      out_x2,
00463 //                      out_y2);
00464 
00465 
00466 
00467 
00468 
00469 // ****************************************************************************
00470 // Transfer to temp buffer by scaling nearest integer boundaries
00471 // ****************************************************************************
00472         if(interpolation_type != NEAREST_NEIGHBOR &&
00473                 (!EQUIV(w_scale, 1) || !EQUIV(h_scale, 1)))
00474         {
00475 // Create integer boundaries for interpolation
00476                 int in_x1_int = (int)in_x1;
00477                 int in_y1_int = (int)in_y1;
00478                 int in_x2_int = MIN((int)ceil(in_x2), input->get_w());
00479                 int in_y2_int = MIN((int)ceil(in_y2), input->get_h());
00480                 int out_x1_int = (int)out_x1;
00481                 int out_y1_int = (int)out_y1;
00482                 int out_x2_int = MIN((int)ceil(out_x2), output->get_w());
00483                 int out_y2_int = MIN((int)ceil(out_y2), output->get_h());
00484 
00485 // Dimensions of temp frame.  Integer boundaries scaled.
00486                 int temp_w = (out_x2_int - out_x1_int);
00487                 int temp_h = (out_y2_int - out_y1_int);
00488                 VFrame *scale_output;
00489 
00490 
00491 
00492 #define NO_TRANSLATION1 \
00493         (EQUIV(in_x1, 0) && \
00494         EQUIV(in_y1, 0) && \
00495         EQUIV(out_x1, 0) && \
00496         EQUIV(out_y1, 0) && \
00497         EQUIV(in_x2, in_x2_int) && \
00498         EQUIV(in_y2, in_y2_int) && \
00499         EQUIV(out_x2, temp_w) && \
00500         EQUIV(out_y2, temp_h))
00501 
00502 
00503 #define NO_BLEND \
00504         (EQUIV(alpha, 1) && \
00505         (mode == TRANSFER_REPLACE || \
00506         (mode == TRANSFER_NORMAL && cmodel_components(input->get_color_model()) == 3)))
00507 
00508 
00509 
00510 
00511 
00512 // Prepare destination for operation
00513 
00514 // No translation and no blending.  The blending operation is built into the
00515 // translation unit but not the scaling unit.
00516 // input -> output
00517                 if(NO_TRANSLATION1 &&
00518                         NO_BLEND)
00519                 {
00520 // printf("OverlayFrame::overlay input -> output\n");
00521 
00522                         scale_output = output;
00523                         translation_input = 0;
00524                 }
00525                 else
00526 // If translation or blending
00527 // input -> nearest integer boundary temp
00528                 {
00529                         if(temp_frame && 
00530                                 (temp_frame->get_w() != temp_w ||
00531                                         temp_frame->get_h() != temp_h))
00532                         {
00533                                 delete temp_frame;
00534                                 temp_frame = 0;
00535                         }
00536 
00537                         if(!temp_frame)
00538                         {
00539                                 temp_frame = new VFrame(0,
00540                                         temp_w,
00541                                         temp_h,
00542                                         input->get_color_model(),
00543                                         -1);
00544                         }
00545 //printf("OverlayFrame::overlay input -> temp\n");
00546 
00547 
00548                         temp_frame->clear_frame();
00549 
00550 // printf("OverlayFrame::overlay 4 temp_w=%d temp_h=%d\n",
00551 //      temp_w, temp_h);
00552                         scale_output = temp_frame;
00553                         translation_input = scale_output;
00554 
00555 // Adjust input coordinates to reflect new scaled coordinates.
00556                         in_x1 = 0;
00557                         in_y1 = 0;
00558                         in_x2 = temp_w;
00559                         in_y2 = temp_h;
00560                 }
00561 
00562 
00563 
00564 //printf("Overlay 1\n");
00565 
00566 // Scale input -> scale_output
00567                 if(!scale_engine) scale_engine = new ScaleEngine(this, cpus);
00568                 scale_engine->scale_output = scale_output;
00569                 scale_engine->scale_input = input;
00570                 scale_engine->w_scale = w_scale;
00571                 scale_engine->h_scale = h_scale;
00572                 scale_engine->in_x1_int = in_x1_int;
00573                 scale_engine->in_y1_int = in_y1_int;
00574                 scale_engine->out_w_int = temp_w;
00575                 scale_engine->out_h_int = temp_h;
00576                 scale_engine->interpolation_type = interpolation_type;
00577 //printf("Overlay 2\n");
00578 
00579 //printf("OverlayFrame::overlay ScaleEngine 1 %d\n", out_h_int);
00580                 scale_engine->process_packages();
00581 //printf("OverlayFrame::overlay ScaleEngine 2\n");
00582 
00583 
00584 
00585         }
00586 
00587 // printf("OverlayFrame::overlay 1  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n", 
00588 //      in_x1, 
00589 //      in_y1, 
00590 //      in_x2, 
00591 //      in_y2, 
00592 //      out_x1, 
00593 //      out_y1, 
00594 //      out_x2, 
00595 //      out_y2);
00596 
00597 
00598 
00599 
00600 
00601 #define NO_TRANSLATION2 \
00602         (EQUIV(in_x1, 0) && \
00603         EQUIV(in_y1, 0) && \
00604         EQUIV(in_x2, translation_input->get_w()) && \
00605         EQUIV(in_y2, translation_input->get_h()) && \
00606         EQUIV(out_x1, 0) && \
00607         EQUIV(out_y1, 0) && \
00608         EQUIV(out_x2, output->get_w()) && \
00609         EQUIV(out_y2, output->get_h())) \
00610 
00611 #define NO_SCALE \
00612         (EQUIV(out_x2 - out_x1, in_x2 - in_x1) && \
00613         EQUIV(out_y2 - out_y1, in_y2 - in_y1))
00614 
00615         
00616 
00617 
00618 //printf("OverlayFrame::overlay 4 %d\n", mode);
00619 
00620 
00621 
00622 
00623         if(translation_input)
00624         {
00625 // Direct copy
00626                 if( NO_TRANSLATION2 &&
00627                         NO_SCALE &&
00628                         NO_BLEND)
00629                 {
00630 //printf("OverlayFrame::overlay direct copy\n");
00631                         output->copy_from(translation_input);
00632                 }
00633                 else
00634 // Blend only
00635                 if( NO_TRANSLATION2 &&
00636                         NO_SCALE)
00637                 {
00638                         if(!blend_engine) blend_engine = new BlendEngine(this, cpus);
00639 
00640 
00641                         blend_engine->output = output;
00642                         blend_engine->input = translation_input;
00643                         blend_engine->alpha = alpha;
00644                         blend_engine->mode = mode;
00645 
00646                         blend_engine->process_packages();
00647                 }
00648                 else
00649 // Scale and translate using nearest neighbor
00650 // Translation is exactly on integer boundaries
00651                 if(interpolation_type == NEAREST_NEIGHBOR ||
00652                         EQUIV(in_x1, (int)in_x1) &&
00653                         EQUIV(in_y1, (int)in_y1) &&
00654                         EQUIV(in_x2, (int)in_x2) &&
00655                         EQUIV(in_y2, (int)in_y2) &&
00656 
00657                         EQUIV(out_x1, (int)out_x1) &&
00658                         EQUIV(out_y1, (int)out_y1) &&
00659                         EQUIV(out_x2, (int)out_x2) &&
00660                         EQUIV(out_y2, (int)out_y2))
00661                 {
00662 //printf("OverlayFrame::overlay NEAREST_NEIGHBOR 1\n");
00663                         if(!scaletranslate_engine) scaletranslate_engine = 
00664                                 new ScaleTranslateEngine(this, cpus);
00665 
00666 
00667                         scaletranslate_engine->output = output;
00668                         scaletranslate_engine->input = translation_input;
00669                         scaletranslate_engine->in_x1 = (int)in_x1;
00670                         scaletranslate_engine->in_y1 = (int)in_y1;
00671 // we need to do this mumbo-jumbo in order to get numerical stability
00672 // other option would be to round all the coordinates
00673                         scaletranslate_engine->in_x2 = (int)in_x1 + (int)(in_x2 - in_x1);
00674                         scaletranslate_engine->in_y2 = (int)in_y1 + (int)(in_y2 - in_y1);
00675                         scaletranslate_engine->out_x1 = (int)out_x1;
00676                         scaletranslate_engine->out_y1 = (int)out_y1;
00677                         scaletranslate_engine->out_x2 = (int)out_x1 + (int)(out_x2 - out_x1);
00678                         scaletranslate_engine->out_y2 = (int)out_y1 + (int)(out_y2 - out_y1);
00679                         scaletranslate_engine->alpha = alpha;
00680                         scaletranslate_engine->mode = mode;
00681 
00682                         scaletranslate_engine->process_packages();
00683                 }
00684                 else
00685 // Fractional translation
00686                 {
00687 // Use fractional translation
00688 // printf("OverlayFrame::overlay temp -> output  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n", 
00689 //      in_x1, 
00690 //      in_y1, 
00691 //      in_x2, 
00692 //      in_y2, 
00693 //      out_x1, 
00694 //      out_y1, 
00695 //      out_x2, 
00696 //      out_y2);
00697 
00698 //printf("Overlay 3\n");
00699                         if(!translate_engine) translate_engine = new TranslateEngine(this, cpus);
00700                         translate_engine->translate_output = output;
00701                         translate_engine->translate_input = translation_input;
00702                         translate_engine->translate_in_x1 = in_x1;
00703                         translate_engine->translate_in_y1 = in_y1;
00704                         translate_engine->translate_in_x2 = in_x2;
00705                         translate_engine->translate_in_y2 = in_y2;
00706                         translate_engine->translate_out_x1 = out_x1;
00707                         translate_engine->translate_out_y1 = out_y1;
00708                         translate_engine->translate_out_x2 = out_x2;
00709                         translate_engine->translate_out_y2 = out_y2;
00710                         translate_engine->translate_alpha = alpha;
00711                         translate_engine->translate_mode = mode;
00712 //printf("Overlay 4\n");
00713 
00714 //printf("OverlayFrame::overlay 5 %d\n", mode);
00715                         translate_engine->process_packages();
00716 
00717                 }
00718         }
00719 //printf("OverlayFrame::overlay 2\n");
00720 
00721         return 0;
00722 }
00723 
00724 
00725 
00726 
00727 
00728 
00729 
00730 ScalePackage::ScalePackage()
00731 {
00732 }
00733 
00734 
00735 
00736 
00737 ScaleUnit::ScaleUnit(ScaleEngine *server, OverlayFrame *overlay)
00738  : LoadClient(server)
00739 {
00740         this->overlay = overlay;
00741         this->engine = server;
00742 }
00743 
00744 ScaleUnit::~ScaleUnit()
00745 {
00746 }
00747 
00748 
00749 
00750 void ScaleUnit::tabulate_reduction(bilinear_table_t* &table,
00751         float scale,
00752         int in_pixel1, 
00753         int out_total,
00754         int in_total)
00755 {
00756         table = new bilinear_table_t[out_total];
00757         bzero(table, sizeof(bilinear_table_t) * out_total);
00758 //printf("ScaleUnit::tabulate_reduction 1 %f %d %d %d\n", scale, in_pixel1, out_total, in_total);
00759         for(int i = 0; i < out_total; i++)
00760         {
00761                 float out_start = i;
00762                 float in_start = out_start * scale;
00763                 float out_end = i + 1;
00764                 float in_end = out_end * scale;
00765                 bilinear_table_t *entry = table + i;
00766 //printf("ScaleUnit::tabulate_reduction 1 %f %f %f %f\n", out_start, out_end, in_start, in_end);
00767 
00768 // Store input fraction.  Using scale to normalize these didn't work.
00769                 entry->input_fraction1 = (floor(in_start + 1) - in_start) /* / scale */;
00770                 entry->input_fraction2 = 1.0 /* / scale */;
00771                 entry->input_fraction3 = (in_end - floor(in_end)) /* / scale */;
00772 
00773                 if(in_end >= in_total - in_pixel1)
00774                 {
00775                         in_end = in_total - in_pixel1 - 1;
00776                         
00777                         int difference = (int)in_end - (int)in_start - 1;
00778                         if(difference < 0) difference = 0;
00779                         entry->input_fraction3 = 1.0 - 
00780                                 entry->input_fraction1 - 
00781                                 entry->input_fraction2 * difference;
00782                 }
00783 
00784 // Store input pixels
00785                 entry->input_pixel1 = (int)in_start;
00786                 entry->input_pixel2 = (int)in_end;
00787 
00788 // Normalize for middle pixels
00789                 if(entry->input_pixel2 > entry->input_pixel1 + 1)
00790                 {
00791                         float total = entry->input_fraction1 + 
00792                                 entry->input_fraction2 * 
00793                                 (entry->input_pixel2 - entry->input_pixel1 - 1) + 
00794                                 entry->input_fraction3;
00795                         entry->input_fraction1 /= total;
00796                         entry->input_fraction2 /= total;
00797                         entry->input_fraction3 /= total;
00798                 }
00799                 else
00800                 {
00801                         float total = entry->input_fraction1 +
00802                                 entry->input_fraction3;
00803                         entry->input_fraction1 /= total;
00804                         entry->input_fraction3 /= total;
00805                 }
00806 
00807 // printf("ScaleUnit::tabulate_reduction 1 %d %d %d %f %f %f %f\n", 
00808 // i,
00809 // entry->input_pixel1, 
00810 // entry->input_pixel2,
00811 // entry->input_fraction1,
00812 // entry->input_fraction2,
00813 // entry->input_fraction3,
00814 // entry->input_fraction1 + 
00815 //      entry->input_fraction2 * 
00816 //      (entry->input_pixel2 - entry->input_pixel1 - 1) + 
00817 //      entry->input_fraction3);
00818 
00819 
00820 // Sanity check
00821                 if(entry->input_pixel1 > entry->input_pixel2)
00822                 {
00823                         entry->input_pixel1 = entry->input_pixel2;
00824                         entry->input_fraction1 = 0;
00825                 }
00826 
00827 // Get total fraction of output pixel used
00828 //              if(entry->input_pixel2 > entry->input_pixel1)
00829                 entry->total_fraction = 
00830                         entry->input_fraction1 +
00831                         entry->input_fraction2 * (entry->input_pixel2 - entry->input_pixel1 - 1) +
00832                         entry->input_fraction3;
00833                 entry->input_pixel1 += in_pixel1;
00834                 entry->input_pixel2 += in_pixel1;
00835         }
00836 }
00837 
00838 void ScaleUnit::tabulate_enlarge(bilinear_table_t* &table,
00839         float scale,
00840         int in_pixel1, 
00841         int out_total,
00842         int in_total)
00843 {
00844         table = new bilinear_table_t[out_total];
00845         bzero(table, sizeof(bilinear_table_t) * out_total);
00846 
00847         for(int i = 0; i < out_total; i++)
00848         {
00849                 bilinear_table_t *entry = table + i;
00850                 float in_pixel = i * scale;
00851                 entry->input_pixel1 = (int)floor(in_pixel);
00852                 entry->input_pixel2 = entry->input_pixel1 + 1;
00853 
00854                 if(in_pixel <= in_total)
00855                 {
00856                         entry->input_fraction3 = in_pixel - entry->input_pixel1;
00857                 }
00858                 else
00859                 {
00860                         entry->input_fraction3 = 0;
00861                         entry->input_pixel2 = 0;
00862                 }
00863 
00864                 if(in_pixel >= 0)
00865                 {
00866                         entry->input_fraction1 = entry->input_pixel2 - in_pixel;
00867                 }
00868                 else
00869                 {
00870                         entry->input_fraction1 = 0;
00871                         entry->input_pixel1 = 0;
00872                 }
00873 
00874                 if(entry->input_pixel2 >= in_total - in_pixel1)
00875                 {
00876                         entry->input_pixel2 = entry->input_pixel1;
00877                         entry->input_fraction3 = 1.0 - entry->input_fraction1;
00878                 }
00879 
00880                 entry->total_fraction = 
00881                         entry->input_fraction1 + 
00882                         entry->input_fraction3;
00883                 entry->input_pixel1 += in_pixel1;
00884                 entry->input_pixel2 += in_pixel1;
00885 // 
00886 // printf("ScaleUnit::tabulate_enlarge %d %d %f %f %f\n",
00887 // entry->input_pixel1,
00888 // entry->input_pixel2,
00889 // entry->input_fraction1,
00890 // entry->input_fraction2,
00891 // entry->input_fraction3);
00892         }
00893 }
00894 
00895 void ScaleUnit::dump_bilinear(bilinear_table_t *table, int total)
00896 {
00897         printf("ScaleUnit::dump_bilinear\n");
00898         for(int i = 0; i < total; i++)
00899         {
00900                 printf("out=%d inpixel1=%d inpixel2=%d infrac1=%f infrac2=%f infrac3=%f total=%f\n", 
00901                         i,
00902                         table[i].input_pixel1,
00903                         table[i].input_pixel2,
00904                         table[i].input_fraction1,
00905                         table[i].input_fraction2,
00906                         table[i].input_fraction3,
00907                         table[i].total_fraction);
00908         }
00909 }
00910 
00911 #define PIXEL_REDUCE_MACRO(type, components, row) \
00912 { \
00913         type *input_row = &in_rows[row][x_entry->input_pixel1 * components]; \
00914         type *input_end = &in_rows[row][x_entry->input_pixel2 * components]; \
00915  \
00916 /* Do first pixel */ \
00917         temp_f1 += input_scale1 * input_row[0]; \
00918         temp_f2 += input_scale1 * input_row[1]; \
00919         temp_f3 += input_scale1 * input_row[2]; \
00920         if(components == 4) temp_f4 += input_scale1 * input_row[3]; \
00921  \
00922 /* Do last pixel */ \
00923 /*      if(input_row < input_end) */\
00924         { \
00925                 temp_f1 += input_scale3 * input_end[0]; \
00926                 temp_f2 += input_scale3 * input_end[1]; \
00927                 temp_f3 += input_scale3 * input_end[2]; \
00928                 if(components == 4) temp_f4 += input_scale3 * input_end[3]; \
00929         } \
00930  \
00931 /* Do middle pixels */ \
00932         for(input_row += components; input_row < input_end; input_row += components) \
00933         { \
00934                 temp_f1 += input_scale2 * input_row[0]; \
00935                 temp_f2 += input_scale2 * input_row[1]; \
00936                 temp_f3 += input_scale2 * input_row[2]; \
00937                 if(components == 4) temp_f4 += input_scale2 * input_row[3]; \
00938         } \
00939 }
00940 
00941 // Bilinear reduction and suboptimal enlargement.
00942 // Very high quality.
00943 #define BILINEAR_REDUCE(max, type, components) \
00944 { \
00945         bilinear_table_t *x_table, *y_table; \
00946         int out_h = pkg->out_row2 - pkg->out_row1; \
00947         type **in_rows = (type**)input->get_rows(); \
00948         type **out_rows = (type**)output->get_rows(); \
00949  \
00950         if(scale_w < 1) \
00951                 tabulate_reduction(x_table, \
00952                         1.0 / scale_w, \
00953                         in_x1_int, \
00954                         out_w_int, \
00955                         input->get_w()); \
00956         else \
00957                 tabulate_enlarge(x_table, \
00958                         1.0 / scale_w, \
00959                         in_x1_int, \
00960                         out_w_int, \
00961                         input->get_w()); \
00962  \
00963         if(scale_h < 1) \
00964                 tabulate_reduction(y_table, \
00965                         1.0 / scale_h, \
00966                         in_y1_int, \
00967                         out_h_int, \
00968                         input->get_h()); \
00969         else \
00970                 tabulate_enlarge(y_table, \
00971                         1.0 / scale_h, \
00972                         in_y1_int, \
00973                         out_h_int, \
00974                         input->get_h()); \
00975 /* dump_bilinear(y_table, out_h_int); */\
00976  \
00977         for(int i = 0; i < out_h; i++) \
00978         { \
00979                 type *out_row = out_rows[i + pkg->out_row1]; \
00980                 bilinear_table_t *y_entry = &y_table[i + pkg->out_row1]; \
00981 /* printf("BILINEAR_REDUCE 2 %d %d %d %f %f %f\n", */ \
00982 /* i, */ \
00983 /* y_entry->input_pixel1, */ \
00984 /* y_entry->input_pixel2, */ \
00985 /* y_entry->input_fraction1, */ \
00986 /* y_entry->input_fraction2, */ \
00987 /* y_entry->input_fraction3); */ \
00988  \
00989                 for(int j = 0; j < out_w_int; j++) \
00990                 { \
00991                         bilinear_table_t *x_entry = &x_table[j]; \
00992 /* Load rounding factors */ \
00993                         float temp_f1; \
00994                         float temp_f2; \
00995                         float temp_f3; \
00996                         float temp_f4; \
00997                         if(sizeof(type) != 4) \
00998                                 temp_f1 = temp_f2 = temp_f3 = temp_f4 = .5; \
00999                         else \
01000                                 temp_f1 = temp_f2 = temp_f3 = temp_f4 = 0; \
01001  \
01002 /* First row */ \
01003                         float input_scale1 = y_entry->input_fraction1 * x_entry->input_fraction1; \
01004                         float input_scale2 = y_entry->input_fraction1 * x_entry->input_fraction2; \
01005                         float input_scale3 = y_entry->input_fraction1 * x_entry->input_fraction3; \
01006                         PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel1) \
01007  \
01008 /* Last row */ \
01009                         if(out_h) \
01010                         { \
01011                                 input_scale1 = y_entry->input_fraction3 * x_entry->input_fraction1; \
01012                                 input_scale2 = y_entry->input_fraction3 * x_entry->input_fraction2; \
01013                                 input_scale3 = y_entry->input_fraction3 * x_entry->input_fraction3; \
01014                                 PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel2) \
01015  \
01016 /* Middle rows */ \
01017                                 if(out_h > 1) \
01018                                 { \
01019                                         input_scale1 = y_entry->input_fraction2 * x_entry->input_fraction1; \
01020                                         input_scale2 = y_entry->input_fraction2 * x_entry->input_fraction2; \
01021                                         input_scale3 = y_entry->input_fraction2 * x_entry->input_fraction3; \
01022                                         for(int k = y_entry->input_pixel1 + 1; \
01023                                                 k < y_entry->input_pixel2; \
01024                                                 k++) \
01025                                         { \
01026                                                 PIXEL_REDUCE_MACRO(type, components, k) \
01027                                         } \
01028                                 } \
01029                         } \
01030  \
01031  \
01032                         if(max != 1.0) \
01033                         { \
01034                                 if(temp_f1 > max) temp_f1 = max; \
01035                                 if(temp_f2 > max) temp_f2 = max; \
01036                                 if(temp_f3 > max) temp_f3 = max; \
01037                                 if(components == 4) if(temp_f4 > max) temp_f4 = max; \
01038                         } \
01039  \
01040                         out_row[j * components    ] = (type)temp_f1; \
01041                         out_row[j * components + 1] = (type)temp_f2; \
01042                         out_row[j * components + 2] = (type)temp_f3; \
01043                         if(components == 4) out_row[j * components + 3] = (type)temp_f4; \
01044                 } \
01045 /*printf("BILINEAR_REDUCE 3 %d\n", i);*/ \
01046         } \
01047  \
01048         delete [] x_table; \
01049         delete [] y_table; \
01050 }
01051 
01052 
01053 
01054 // Only 2 input pixels
01055 #define BILINEAR_ENLARGE(max, type, components) \
01056 { \
01057 /*printf("BILINEAR_ENLARGE 1\n");*/ \
01058         float k_y = 1.0 / scale_h; \
01059         float k_x = 1.0 / scale_w; \
01060         type **in_rows = (type**)input->get_rows(); \
01061         type **out_rows = (type**)output->get_rows(); \
01062         int out_h = pkg->out_row2 - pkg->out_row1; \
01063         int in_h_int = input->get_h(); \
01064         int in_w_int = input->get_w(); \
01065         int *table_int_x1, *table_int_y1; \
01066         int *table_int_x2, *table_int_y2; \
01067         float *table_frac_x_f, *table_antifrac_x_f, *table_frac_y_f, *table_antifrac_y_f; \
01068         int *table_frac_x_i, *table_antifrac_x_i, *table_frac_y_i, *table_antifrac_y_i; \
01069  \
01070         tabulate_blinear_f(table_int_x1,  \
01071                 table_int_x2,  \
01072                 table_frac_x_f,  \
01073                 table_antifrac_x_f,  \
01074                 k_x,  \
01075                 0,  \
01076                 out_w_int, \
01077                 in_x1_int,  \
01078                 in_w_int); \
01079         tabulate_blinear_f(table_int_y1,  \
01080                 table_int_y2,  \
01081                 table_frac_y_f,  \
01082                 table_antifrac_y_f,  \
01083                 k_y,  \
01084                 pkg->out_row1,  \
01085                 pkg->out_row2,  \
01086                 in_y1_int, \
01087                 in_h_int); \
01088  \
01089         for(int i = 0; i < out_h; i++) \
01090         { \
01091                 int i_y1 = table_int_y1[i]; \
01092                 int i_y2 = table_int_y2[i]; \
01093                 float a_f; \
01094         float anti_a_f; \
01095                 uint64_t a_i; \
01096         uint64_t anti_a_i; \
01097                 a_f = table_frac_y_f[i]; \
01098         anti_a_f = table_antifrac_y_f[i]; \
01099                 type *in_row1 = in_rows[i_y1]; \
01100                 type *in_row2 = in_rows[i_y2]; \
01101                 type *out_row = out_rows[i + pkg->out_row1]; \
01102  \
01103                 for(int j = 0; j < out_w_int; j++) \
01104                 { \
01105                         int i_x1 = table_int_x1[j]; \
01106                         int i_x2 = table_int_x2[j]; \
01107                         float output1r, output1g, output1b, output1a; \
01108                         float output2r, output2g, output2b, output2a; \
01109                         float output3r, output3g, output3b, output3a; \
01110                         float output4r, output4g, output4b, output4a; \
01111                         float b_f; \
01112                         float anti_b_f; \
01113                         b_f = table_frac_x_f[j]; \
01114                         anti_b_f = table_antifrac_x_f[j]; \
01115 \
01116                 output1r = in_row1[i_x1 * components]; \
01117                 output1g = in_row1[i_x1 * components + 1]; \
01118                 output1b = in_row1[i_x1 * components + 2]; \
01119                 if(components == 4) output1a = in_row1[i_x1 * components + 3]; \
01120 \
01121                 output2r = in_row1[i_x2 * components]; \
01122                 output2g = in_row1[i_x2 * components + 1]; \
01123                 output2b = in_row1[i_x2 * components + 2]; \
01124                 if(components == 4) output2a = in_row1[i_x2 * components + 3]; \
01125 \
01126                 output3r = in_row2[i_x1 * components]; \
01127                 output3g = in_row2[i_x1 * components + 1]; \
01128                 output3b = in_row2[i_x1 * components + 2]; \
01129                 if(components == 4) output3a = in_row2[i_x1 * components + 3]; \
01130 \
01131                 output4r = in_row2[i_x2 * components]; \
01132                 output4g = in_row2[i_x2 * components + 1]; \
01133                 output4b = in_row2[i_x2 * components + 2]; \
01134                 if(components == 4) output4a = in_row2[i_x2 * components + 3]; \
01135 \
01136                         out_row[j * components] =  \
01137                                 (type)(anti_a_f * (anti_b_f * output1r +  \
01138                                 b_f * output2r) +  \
01139                 a_f * (anti_b_f * output3r +  \
01140                                 b_f * output4r)); \
01141                         out_row[j * components + 1] =   \
01142                                 (type)(anti_a_f * (anti_b_f * output1g +  \
01143                                 b_f * output2g) +  \
01144                 a_f * ((anti_b_f * output3g) +  \
01145                                 b_f * output4g)); \
01146                         out_row[j * components + 2] =   \
01147                                 (type)(anti_a_f * ((anti_b_f * output1b) +  \
01148                                 (b_f * output2b)) +  \
01149                 a_f * ((anti_b_f * output3b) +  \
01150                                 b_f * output4b)); \
01151                         if(components == 4) \
01152                                 out_row[j * components + 3] =   \
01153                                         (type)(anti_a_f * ((anti_b_f * output1a) +  \
01154                                         (b_f * output2a)) +  \
01155                         a_f * ((anti_b_f * output3a) +  \
01156                                         b_f * output4a)); \
01157                 } \
01158         } \
01159  \
01160  \
01161         delete [] table_int_x1; \
01162         delete [] table_int_x2; \
01163         delete [] table_int_y1; \
01164         delete [] table_int_y2; \
01165         delete [] table_frac_x_f; \
01166         delete [] table_antifrac_x_f; \
01167         delete [] table_frac_y_f; \
01168         delete [] table_antifrac_y_f; \
01169  \
01170 /*printf("BILINEAR_ENLARGE 2\n");*/ \
01171 }
01172 
01173 
01174 #define BICUBIC(max, type, components) \
01175 { \
01176         float k_y = 1.0 / scale_h; \
01177         float k_x = 1.0 / scale_w; \
01178         type **in_rows = (type**)input->get_rows(); \
01179         type **out_rows = (type**)output->get_rows(); \
01180         float *bspline_x_f, *bspline_y_f; \
01181         int *bspline_x_i, *bspline_y_i; \
01182         int *in_x_table, *in_y_table; \
01183         int in_h_int = input->get_h(); \
01184         int in_w_int = input->get_w(); \
01185  \
01186         tabulate_bcubic_f(bspline_x_f,  \
01187                 in_x_table, \
01188                 k_x, \
01189                 in_x1_int, \
01190                 out_w_int, \
01191                 in_w_int, \
01192                 -1); \
01193  \
01194         tabulate_bcubic_f(bspline_y_f,  \
01195                 in_y_table, \
01196                 k_y, \
01197                 in_y1_int, \
01198                 out_h_int, \
01199                 in_h_int, \
01200                 1); \
01201  \
01202         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
01203         { \
01204                 for(int j = 0; j < out_w_int; j++) \
01205                 { \
01206                         int i_x = (int)(k_x * j); \
01207                         float output1_f, output2_f, output3_f, output4_f; \
01208                         uint64_t output1_i, output2_i, output3_i, output4_i; \
01209                         output1_f = 0; \
01210                         output2_f = 0; \
01211                         output3_f = 0; \
01212                         if(components == 4) \
01213                                 output4_f = 0; \
01214                         int table_y = i * 4; \
01215  \
01216 /* Kernel */ \
01217                         for(int m = -1; m < 3; m++) \
01218                         { \
01219                                 float r1_f; \
01220                                 uint64_t r1_i; \
01221                                 r1_f = bspline_y_f[table_y]; \
01222                                 int y = in_y_table[table_y]; \
01223                                 int table_x = j * 4; \
01224  \
01225                                 for(int n = -1; n < 3; n++) \
01226                                 { \
01227                                         float r2_f; \
01228                                         uint64_t r2_i; \
01229                                         r2_f = bspline_x_f[table_x]; \
01230                                         int x = in_x_table[table_x]; \
01231                                         float r_square_f; \
01232                                         uint64_t r_square_i; \
01233                                         r_square_f = r1_f * r2_f; \
01234                                         output1_f += r_square_f * in_rows[y][x * components]; \
01235                                         output2_f += r_square_f * in_rows[y][x * components + 1]; \
01236                                         output3_f += r_square_f * in_rows[y][x * components + 2]; \
01237                                         if(components == 4) \
01238                                                 output4_f += r_square_f * in_rows[y][x * components + 3]; \
01239  \
01240                                         table_x++; \
01241                                 } \
01242                                 table_y++; \
01243                         } \
01244  \
01245  \
01246                         out_rows[i][j * components] = (type)output1_f; \
01247                         out_rows[i][j * components + 1] = (type)output2_f; \
01248                         out_rows[i][j * components + 2] = (type)output3_f; \
01249                         if(components == 4) \
01250                                 out_rows[i][j * components + 3] = (type)output4_f; \
01251  \
01252                 } \
01253         } \
01254  \
01255         delete [] bspline_x_f; \
01256         delete [] bspline_y_f; \
01257         delete [] in_x_table; \
01258         delete [] in_y_table; \
01259 }
01260 
01261 
01262 
01263 
01264 // Pow function is not thread safe in Compaqt C
01265 #define CUBE(x) ((x) * (x) * (x))
01266 
01267 float ScaleUnit::cubic_bspline(float x)
01268 {
01269         float a, b, c, d;
01270 
01271         if((x + 2.0F) <= 0.0F) 
01272         {
01273         a = 0.0F;
01274         }
01275         else 
01276         {
01277         a = CUBE(x + 2.0F);
01278         }
01279 
01280 
01281         if((x + 1.0F) <= 0.0F) 
01282         {
01283         b = 0.0F;
01284         }
01285         else 
01286         {
01287         b = CUBE(x + 1.0F);
01288         }    
01289 
01290         if(x <= 0) 
01291         {
01292         c = 0.0F;
01293         }
01294         else 
01295         {
01296         c = CUBE(x);
01297         }  
01298 
01299         if((x - 1.0F) <= 0.0F) 
01300         {
01301         d = 0.0F;
01302         }
01303         else 
01304         {
01305         d = CUBE(x - 1.0F);
01306         }
01307 
01308 
01309         return (a - (4.0F * b) + (6.0F * c) - (4.0F * d)) / 6.0;
01310 }
01311 
01312 
01313 void ScaleUnit::tabulate_bcubic_f(float* &coef_table, 
01314         int* &coord_table,
01315         float scale,
01316         int start, 
01317         int pixels,
01318         int total_pixels,
01319         float coefficient)
01320 {
01321         coef_table = new float[pixels * 4];
01322         coord_table = new int[pixels * 4];
01323         for(int i = 0, j = 0; i < pixels; i++)
01324         {
01325                 float f_x = (float)i * scale;
01326                 float a = f_x - floor(f_x);
01327                 
01328                 for(float m = -1; m < 3; m++)
01329                 {
01330                         coef_table[j] = cubic_bspline(coefficient * (m - a));
01331                         coord_table[j] = (int)(start + (int)f_x + m);
01332                         CLAMP(coord_table[j], 0, total_pixels - 1);
01333                         j++;
01334                 }
01335                 
01336         }
01337 }
01338 
01339 void ScaleUnit::tabulate_bcubic_i(int* &coef_table, 
01340         int* &coord_table,
01341         float scale,
01342         int start, 
01343         int pixels,
01344         int total_pixels,
01345         float coefficient)
01346 {
01347         coef_table = new int[pixels * 4];
01348         coord_table = new int[pixels * 4];
01349         for(int i = 0, j = 0; i < pixels; i++)
01350         {
01351                 float f_x = (float)i * scale;
01352                 float a = f_x - floor(f_x);
01353                 
01354                 for(float m = -1; m < 3; m++)
01355                 {
01356                         coef_table[j] = (int)(cubic_bspline(coefficient * (m - a)) * 0x10000);
01357                         coord_table[j] = (int)(start + (int)f_x + m);
01358                         CLAMP(coord_table[j], 0, total_pixels - 1);
01359                         j++;
01360                 }
01361                 
01362         }
01363 }
01364 
01365 void ScaleUnit::tabulate_blinear_f(int* &table_int1,
01366                 int* &table_int2,
01367                 float* &table_frac,
01368                 float* &table_antifrac,
01369                 float scale,
01370                 int pixel1,
01371                 int pixel2,
01372                 int start,
01373                 int total_pixels)
01374 {
01375         table_int1 = new int[pixel2 - pixel1];
01376         table_int2 = new int[pixel2 - pixel1];
01377         table_frac = new float[pixel2 - pixel1];
01378         table_antifrac = new float[pixel2 - pixel1];
01379 
01380         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
01381         {
01382                 float f_x = (float)i * scale;
01383                 int i_x = (int)floor(f_x);
01384                 float a = (f_x - floor(f_x));
01385 
01386                 table_int1[j] = i_x + start;
01387                 table_int2[j] = i_x + start + 1;
01388                 CLAMP(table_int1[j], 0, total_pixels - 1);
01389                 CLAMP(table_int2[j], 0, total_pixels - 1);
01390                 table_frac[j] = a;
01391                 table_antifrac[j] = 1.0F - a;
01392 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
01393         }
01394 }
01395 
01396 void ScaleUnit::tabulate_blinear_i(int* &table_int1,
01397                 int* &table_int2,
01398                 int* &table_frac,
01399                 int* &table_antifrac,
01400                 float scale,
01401                 int pixel1,
01402                 int pixel2,
01403                 int start,
01404                 int total_pixels)
01405 {
01406         table_int1 = new int[pixel2 - pixel1];
01407         table_int2 = new int[pixel2 - pixel1];
01408         table_frac = new int[pixel2 - pixel1];
01409         table_antifrac = new int[pixel2 - pixel1];
01410 
01411         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
01412         {
01413                 double f_x = (float)i * scale;
01414                 int i_x = (int)floor(f_x);
01415                 float a = (f_x - floor(f_x));
01416 
01417                 table_int1[j] = i_x + start;
01418                 table_int2[j] = i_x + start + 1;
01419                 CLAMP(table_int1[j], 0, total_pixels - 1);
01420                 CLAMP(table_int2[j], 0, total_pixels - 1);
01421                 table_frac[j] = (int)(a * 0xffff);
01422                 table_antifrac[j] = (int)((1.0F - a) * 0x10000);
01423 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
01424         }
01425 }
01426 
01427 void ScaleUnit::process_package(LoadPackage *package)
01428 {
01429         ScalePackage *pkg = (ScalePackage*)package;
01430 
01431 //printf("ScaleUnit::process_package 1\n");
01432 // Arguments for macros
01433         VFrame *output = engine->scale_output;
01434         VFrame *input = engine->scale_input;
01435         float scale_w = engine->w_scale;
01436         float scale_h = engine->h_scale;
01437         int in_x1_int = engine->in_x1_int;
01438         int in_y1_int = engine->in_y1_int;
01439         int out_h_int = engine->out_h_int;
01440         int out_w_int = engine->out_w_int;
01441         int do_yuv = 
01442                 (input->get_color_model() == BC_YUV888 ||
01443                 input->get_color_model() == BC_YUVA8888 ||
01444                 input->get_color_model() == BC_YUV161616 ||
01445                 input->get_color_model() == BC_YUVA16161616);
01446 
01447 //printf("ScaleUnit::process_package 2 %f %f\n", engine->w_scale, engine->h_scale);
01448         if(engine->interpolation_type == CUBIC_CUBIC || 
01449                 (engine->interpolation_type == CUBIC_LINEAR 
01450                         && engine->w_scale > 1 && 
01451                         engine->h_scale