• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files

hvirtual/mpeg2enc/motion.c

Go to the documentation of this file.
00001 /* motion.c, motion estimation                                              */
00002 
00003 /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
00004 
00005 /*
00006  * Disclaimer of Warranty
00007  *
00008  * These software programs are available to the user without any license fee or
00009  * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
00010  * any and all warranties, whether express, implied, or statuary, including any
00011  * implied warranties or merchantability or of fitness for a particular
00012  * purpose.  In no event shall the copyright-holder be liable for any
00013  * incidental, punitive, or consequential damages of any kind whatsoever
00014  * arising from the use of these programs.
00015  *
00016  * This disclaimer of warranty extends to the user of these programs and user's
00017  * customers, employees, agents, transferees, successors, and assigns.
00018  *
00019  * The MPEG Software Simulation Group does not represent or warrant that the
00020  * programs furnished hereunder are free of infringement of any third-party
00021  * patents.
00022  *
00023  * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
00024  * are subject to royalty fees to patent holders.  Many of these patents are
00025  * general enough such that they are unavoidable regardless of implementation
00026  * design.
00027  *
00028  */
00029 
00030 #include <limits.h>
00031 #include <stdio.h>
00032 #include "config.h"
00033 #include "global.h"
00034 #include "cpu_accel.h"
00035 #include "simd.h"
00036 
00037 /* Macro-block Motion compensation results record */
00038 
00039 typedef struct _blockcrd {
00040         int16_t x;
00041         int16_t y;
00042 } blockxy;
00043 
00044 struct mb_motion
00045 {
00046         blockxy pos;        // Half-pel co-ordinates of source block
00047         int sad;                        // Sum of absolute difference
00048         int var;
00049         uint8_t *blk ;          // Source block data (in luminace data array)
00050         int hx, hy;                     // Half-pel offsets
00051         int fieldsel;           // 0 = top 1 = bottom
00052         int fieldoff;       // Offset from start of frame data to first line
00053                                                 // of field.    (top = 0, bottom = width );
00054 };
00055 
00056 typedef struct mb_motion mb_motion_s;
00057 
00058 
00059 struct subsampled_mb
00060 {
00061         uint8_t *mb;            // One pel
00062         uint8_t *fmb;           // Two-pel subsampled
00063         uint8_t *qmb;           // Four-pel subsampled
00064         uint8_t *umb;           // U compoenent one-pel
00065         uint8_t *vmb;       // V component  one-pel
00066 };
00067 
00068 typedef struct subsampled_mb subsampled_mb_s;
00069 
00070 
00071 static void frame_ME (motion_engine_t *engine,
00072                                                                         pict_data_s *picture,
00073                                                                   motion_comp_s *mc,
00074                                                                   int mboffset,
00075                                                                   int i, int j, struct mbinfo *mbi);
00076 
00077 static void field_ME (motion_engine_t *engine,
00078                                                                         pict_data_s *picture,
00079                                                                   motion_comp_s *mc,
00080                                                                   int mboffset,
00081                                                                   int i, int j, 
00082                                                                   struct mbinfo *mbi, 
00083                                                                   int secondfield, 
00084                                                                   int ipflag);
00085 
00086 static void frame_estimate (motion_engine_t *engine,
00087         uint8_t *org,
00088          uint8_t *ref, 
00089          subsampled_mb_s *ssmb,
00090          int i, int j,
00091          int sx, int sy, 
00092           mb_motion_s *bestfr,
00093           mb_motion_s *besttop,
00094           mb_motion_s *bestbot,
00095          int imins[2][2], int jmins[2][2]);
00096 
00097 static void field_estimate (motion_engine_t *engine,
00098                                                         pict_data_s *picture,
00099                                                         uint8_t *toporg,
00100                                                         uint8_t *topref, 
00101                                                         uint8_t *botorg, 
00102                                                         uint8_t *botref,
00103                                                         subsampled_mb_s *ssmb,
00104                                                         int i, int j, int sx, int sy, int ipflag,
00105                                                         mb_motion_s *bestfr,
00106                                                         mb_motion_s *best8u,
00107                                                         mb_motion_s *best8l,
00108                                                         mb_motion_s *bestsp);
00109 
00110 static void dpframe_estimate (motion_engine_t *engine,
00111         pict_data_s *picture,
00112         uint8_t *ref,
00113         subsampled_mb_s *ssmb,
00114         int i, int j, int iminf[2][2], int jminf[2][2],
00115         mb_motion_s *dpbest,
00116         int *imindmvp, int *jmindmvp, 
00117         int *vmcp);
00118 
00119 static void dpfield_estimate (motion_engine_t *engine,
00120         pict_data_s *picture,
00121         uint8_t *topref,
00122         uint8_t *botref, 
00123         uint8_t *mb,
00124         int i, int j, 
00125         int imins, int jmins, 
00126         mb_motion_s *dpbest,
00127         int *vmcp);
00128 
00129 static void fullsearch (motion_engine_t *engine, 
00130         uint8_t *org, uint8_t *ref,
00131         subsampled_mb_s *ssblk,
00132         int lx, int i0, int j0, 
00133         int sx, int sy, int h, 
00134         int xmax, int ymax,
00135         mb_motion_s *motion );
00136 
00137 static void find_best_one_pel( motion_engine_t *engine, 
00138                                                                 uint8_t *org, uint8_t *blk,
00139                                                            int searched_size,
00140                                                            int i0, int j0,
00141                                                            int ilow, int jlow,
00142                                                            int xmax, int ymax,
00143                                                            int lx, int h, 
00144                                                            mb_motion_s *res
00145         );
00146 static int build_sub22_mcomps( motion_engine_t *engine, 
00147                                                                 int i0,  int j0, int ihigh, int jhigh, 
00148                                                                 int null_mc_sad,
00149                                                                 uint8_t *s22org,  uint8_t *s22blk, 
00150                                                            int flx, int fh,  int searched_sub44_size );
00151 
00152 #ifdef X86_CPU
00153 static void find_best_one_pel_mmxe( motion_engine_t *engine, 
00154                                                                 uint8_t *org, uint8_t *blk,
00155                                                            int searched_size,
00156                                                            int i0, int j0,
00157                                                            int ilow, int jlow,
00158                                                            int xmax, int ymax,
00159                                                            int lx, int h, 
00160                                                            mb_motion_s *res
00161         );
00162 
00163 static int build_sub22_mcomps_mmxe( motion_engine_t *engine, int i0,  int j0, int ihigh, int jhigh, 
00164                                                          int null_mc_sad,
00165                                                          uint8_t *s22org,  uint8_t *s22blk, 
00166                                                          int flx, int fh,  int searched_sub44_size );
00167 static int (*pmblock_sub44_dists)( uint8_t *blk,  uint8_t *ref,
00168                                                         int ilow, int jlow,
00169                                                         int ihigh, int jhigh, 
00170                                                         int h, int rowstride, 
00171                                                         int threshold,
00172                                                         mc_result_s *resvec);
00173 #endif
00174 
00175 static int unidir_pred_var( const mb_motion_s *motion, 
00176                                                         uint8_t *mb,  int lx, int h);
00177 static int bidir_pred_var( const mb_motion_s *motion_f,  
00178                                                    const mb_motion_s *motion_b, 
00179                                                    uint8_t *mb,  int lx, int h);
00180 static int bidir_pred_sad( const mb_motion_s *motion_f,  
00181                                                    const mb_motion_s *motion_b, 
00182                                                    uint8_t *mb,  int lx, int h);
00183 
00184 static int variance(  uint8_t *mb, int size, int lx);
00185 
00186 static int dist22 ( uint8_t *blk1, uint8_t *blk2,  int qlx, int qh);
00187 
00188 static int dist44 ( uint8_t *blk1, uint8_t *blk2,  int flx, int fh);
00189 static int dist2_22( uint8_t *blk1, uint8_t *blk2,
00190                                          int lx, int h);
00191 static int bdist2_22( uint8_t *blk1f, uint8_t *blk1b, 
00192                                           uint8_t *blk2,
00193                                           int lx, int h);
00194 
00195 
00196 static void (*pfind_best_one_pel)( motion_engine_t *engine,
00197                                                                 uint8_t *org, uint8_t *blk,
00198                                                            int searched_size,
00199                                                            int i0, int j0,
00200                                                            int ilow, int jlow,
00201                                                            int xmax, int ymax,
00202                                                            int lx, int h, 
00203                                                            mb_motion_s *res
00204         );
00205 static int (*pbuild_sub22_mcomps)( motion_engine_t *engine,
00206                                                                         int i0,  int j0, int ihigh, int jhigh, 
00207                                                                    int null_mc_sad,
00208                                                                    uint8_t *s22org,  uint8_t *s22blk, 
00209                                                                    int flx, int fh,  int searched_sub44_size );
00210 
00211 static int (*pdist2_22)( uint8_t *blk1, uint8_t *blk2,
00212                                                  int lx, int h);
00213 static int (*pbdist2_22)( uint8_t *blk1f, uint8_t *blk1b, 
00214                                                   uint8_t *blk2,
00215                                                   int lx, int h);
00216 
00217 static int dist1_00( uint8_t *blk1, uint8_t *blk2,  int lx, int h, int distlim);
00218 static int dist1_01(uint8_t *blk1, uint8_t *blk2, int lx, int h);
00219 static int dist1_10(uint8_t *blk1, uint8_t *blk2, int lx, int h);
00220 static int dist1_11(uint8_t *blk1, uint8_t *blk2, int lx, int h);
00221 static int dist2 (uint8_t *blk1, uint8_t *blk2,
00222                                                           int lx, int hx, int hy, int h);
00223 static int bdist2 (uint8_t *pf, uint8_t *pb,
00224         uint8_t *p2, int lx, int hxf, int hyf, int hxb, int hyb, int h);
00225 static int bdist1 (uint8_t *pf, uint8_t *pb,
00226                                    uint8_t *p2, int lx, int hxf, int hyf, int hxb, int hyb, int h);
00227 
00228 
00229 static int (*pdist22) ( uint8_t *blk1, uint8_t *blk2,  int flx, int fh);
00230 static int (*pdist44) ( uint8_t *blk1, uint8_t *blk2,  int qlx, int qh);
00231 static int (*pdist1_00) ( uint8_t *blk1, uint8_t *blk2,  int lx, int h, int distlim);
00232 static int (*pdist1_01) ( uint8_t *blk1, uint8_t *blk2, int lx, int h);
00233 static int (*pdist1_10) ( uint8_t *blk1, uint8_t *blk2, int lx, int h);
00234 static int (*pdist1_11) ( uint8_t *blk1, uint8_t *blk2, int lx, int h);
00235 
00236 static int (*pdist2) (uint8_t *blk1, uint8_t *blk2,
00237                                           int lx, int hx, int hy, int h);
00238   
00239   
00240 static int (*pbdist2) (uint8_t *pf, uint8_t *pb,
00241                                            uint8_t *p2, int lx, int hxf, int hyf, int hxb, int hyb, int h);
00242 
00243 static int (*pbdist1) (uint8_t *pf, uint8_t *pb,
00244                                            uint8_t *p2, int lx, int hxf, int hyf, int hxb, int hyb, int h);
00245 
00246 
00247 /* DEBUGGER...
00248 static void check_mc( const mb_motion_s *motion, int rx, int ry, int i, int j, char *str )
00249 {
00250         rx *= 2; ry *= 2;
00251         if( motion->sad < 0 || motion->sad > 0x10000 )
00252         {
00253                 printf( "SAD ooops %s\n", str );
00254                 exit(1);
00255         }
00256         if( motion->pos.x-i*2 < -rx || motion->pos.x-i*2 >= rx )
00257         {
00258                 printf( "X MC ooops %s = %d [%d]\n", str, motion->pos.x-i*2,rx );
00259                 exit(1);
00260         }
00261         if( motion->pos.y-j*2 < -ry || motion->pos.y-j*2 >= ry )
00262         {
00263                 printf( "Y MC ooops %s %d [%d]\n", str, motion->pos.y-j*2, ry );
00264                 exit(1);
00265         }
00266 }
00267 
00268 static void init_mc( mb_motion_s *motion )
00269 {
00270         motion->sad = -123;
00271         motion->pos.x = -1000;
00272         motion->pos.y = -1000;
00273 }
00274 */
00275 
00276 /* unidir_NI_var_sum
00277    Compute the combined variance of luminance and chrominance information
00278    for a particular non-intra macro block after unidirectional
00279    motion compensation...  
00280 
00281    Note: results are scaled to give chrominance equal weight to
00282    chrominance.  The variance of the luminance portion is computed
00283    at the time the motion compensation is computed.
00284 
00285    TODO: Perhaps we should compute the whole thing in fullsearch not
00286    seperate it.  However, that would involve a lot of fiddling with
00287    field_* and until its thoroughly debugged and tested I think I'll
00288    leave that alone. Furthermore, it is unclear if its really worth
00289    doing these computations for B *and* P frames.
00290 
00291    TODO: BUG: ONLY works for 420 video...
00292 
00293 */
00294 
00295 static int unidir_chrom_var_sum( mb_motion_s *lum_mc, 
00296                                                           uint8_t **ref, 
00297                                                           subsampled_mb_s *ssblk,
00298                                                           int lx, int h )
00299 {
00300         int uvlx = (lx>>1);
00301         int uvh = (h>>1);
00302         /* N.b. MC co-ordinates are computed in half-pel units! */
00303         int cblkoffset = (lum_mc->fieldoff>>1) +
00304                 (lum_mc->pos.x>>2) + (lum_mc->pos.y>>2)*uvlx;
00305         
00306         return  ((*pdist2_22)( ref[1] + cblkoffset, ssblk->umb, uvlx, uvh) +
00307                          (*pdist2_22)( ref[2] + cblkoffset, ssblk->vmb, uvlx, uvh))*2;
00308 }
00309 
00310 /*
00311   bidir_NI_var_sum
00312    Compute the combined variance of luminance and chrominance information
00313    for a particular non-intra macro block after unidirectional
00314    motion compensation...  
00315 
00316    Note: results are scaled to give chrominance equal weight to
00317    chrominance.  The variance of the luminance portion is computed
00318    at the time the motion compensation is computed.
00319 
00320    Note: results scaled to give chrominance equal weight to chrominance.
00321   
00322   TODO: BUG: ONLY works for 420 video...
00323 
00324   NOTE: Currently unused but may be required if it turns out that taking
00325   chrominance into account in B frames is needed.
00326 
00327  */
00328 
00329 int bidir_chrom_var_sum( mb_motion_s *lum_mc_f, 
00330                                            mb_motion_s *lum_mc_b, 
00331                                            uint8_t **ref_f, 
00332                                            uint8_t **ref_b,
00333                                            subsampled_mb_s *ssblk,
00334                                            int lx, int h )
00335 {
00336         int uvlx = (lx>>1);
00337         int uvh = (h>>1);
00338         /* N.b. MC co-ordinates are computed in half-pel units! */
00339         int cblkoffset_f = (lum_mc_f->fieldoff>>1) + 
00340                 (lum_mc_f->pos.x>>2) + (lum_mc_f->pos.y>>2)*uvlx;
00341         int cblkoffset_b = (lum_mc_b->fieldoff>>1) + 
00342                 (lum_mc_b->pos.x>>2) + (lum_mc_f->pos.y>>2)*uvlx;
00343         
00344         return  (
00345                 (*pbdist2_22)( ref_f[1] + cblkoffset_f, ref_b[1] + cblkoffset_b,
00346                                            ssblk->umb, uvlx, uvh ) +
00347                 (*pbdist2_22)( ref_f[2] + cblkoffset_f, ref_b[2] + cblkoffset_b,
00348                                            ssblk->vmb, uvlx, uvh ))*2;
00349 
00350 }
00351 
00352 static int chrom_var_sum( subsampled_mb_s *ssblk, int h, int lx )
00353 {
00354         return (variance(ssblk->umb,(h>>1),(lx>>1)) + 
00355                         variance(ssblk->vmb,(h>>1),(lx>>1))) * 2;
00356 }
00357 
00358 /*
00359  * frame picture motion estimation
00360  *
00361  * org: top left pel of source reference frame
00362  * ref: top left pel of reconstructed reference frame
00363  * ssmb:  macroblock to be matched
00364  * i,j: location of mb relative to ref (=center of search window)
00365  * sx,sy: half widths of search window
00366  * besfr: location and SAD of best frame prediction
00367  * besttop: location of best field pred. for top field of mb
00368  * bestbo : location of best field pred. for bottom field of mb
00369  */
00370 
00371 static void frame_estimate(motion_engine_t *engine, 
00372         uint8_t *org,
00373         uint8_t *ref,
00374         subsampled_mb_s *ssmb,
00375         int i, int j, int sx, int sy,
00376         mb_motion_s *bestfr,
00377         mb_motion_s *besttop,
00378         mb_motion_s *bestbot,
00379         int imins[2][2],
00380         int jmins[2][2]
00381         )
00382 {
00383         subsampled_mb_s  botssmb;
00384         mb_motion_s topfld_mc;
00385         mb_motion_s botfld_mc;
00386 
00387         botssmb.mb = ssmb->mb+width;
00388         botssmb.fmb = ssmb->mb+(width>>1);
00389         botssmb.qmb = ssmb->qmb+(width>>2);
00390         botssmb.umb = ssmb->umb+(width>>1);
00391         botssmb.vmb = ssmb->vmb+(width>>1);
00392 
00393         /* frame prediction */
00394         fullsearch(engine, org,ref,ssmb,width,i,j,sx,sy,16,width,height,
00395                                                   bestfr );
00396         bestfr->fieldsel = 0;
00397         bestfr->fieldoff = 0;
00398 
00399         /* predict top field from top field */
00400         fullsearch(engine, org,ref,ssmb,width<<1,i,j>>1,sx,sy>>1,8,width,height>>1,
00401                            &topfld_mc);
00402 
00403         /* predict top field from bottom field */
00404         fullsearch(engine, org+width,ref+width,ssmb, width<<1,i,j>>1,sx,sy>>1,8,
00405                            width,height>>1, &botfld_mc);
00406 
00407         /* set correct field selectors... */
00408         topfld_mc.fieldsel = 0;
00409         botfld_mc.fieldsel = 1;
00410         topfld_mc.fieldoff = 0;
00411         botfld_mc.fieldoff = width;
00412 
00413         imins[0][0] = topfld_mc.pos.x;
00414         jmins[0][0] = topfld_mc.pos.y;
00415         imins[1][0] = botfld_mc.pos.x;
00416         jmins[1][0] = botfld_mc.pos.y;
00417 
00418         /* select prediction for top field */
00419         if (topfld_mc.sad<=botfld_mc.sad)
00420         {
00421                 *besttop = topfld_mc;
00422         }
00423         else
00424         {
00425                 *besttop = botfld_mc;
00426         }
00427 
00428         /* predict bottom field from top field */
00429         fullsearch(engine, org,ref,&botssmb,
00430                                         width<<1,i,j>>1,sx,sy>>1,8,width,height>>1,
00431                                         &topfld_mc);
00432 
00433         /* predict bottom field from bottom field */
00434         fullsearch(engine, org+width,ref+width,&botssmb,
00435                                         width<<1,i,j>>1,sx,sy>>1,8,width,height>>1,
00436                                         &botfld_mc);
00437 
00438         /* set correct field selectors... */
00439         topfld_mc.fieldsel = 0;
00440         botfld_mc.fieldsel = 1;
00441         topfld_mc.fieldoff = 0;
00442         botfld_mc.fieldoff = width;
00443 
00444         imins[0][1] = topfld_mc.pos.x;
00445         jmins[0][1] = topfld_mc.pos.y;
00446         imins[1][1] = botfld_mc.pos.x;
00447         jmins[1][1] = botfld_mc.pos.y;
00448 
00449         /* select prediction for bottom field */
00450         if (botfld_mc.sad<=topfld_mc.sad)
00451         {
00452                 *bestbot = botfld_mc;
00453         }
00454         else
00455         {
00456                 *bestbot = topfld_mc;
00457         }
00458 }
00459 
00460 /*
00461  * field picture motion estimation subroutine
00462  *
00463  * toporg: address of original top reference field
00464  * topref: address of reconstructed top reference field
00465  * botorg: address of original bottom reference field
00466  * botref: address of reconstructed bottom reference field
00467  * ssmmb:  macroblock to be matched
00468  * i,j: location of mb (=center of search window)
00469  * sx,sy: half width/height of search window
00470  *
00471  * bestfld: location and distance of best field prediction
00472  * best8u: location of best 16x8 pred. for upper half of mb
00473  * best8lp: location of best 16x8 pred. for lower half of mb
00474  * bdestsp: location and distance of best same parity field
00475  *                    prediction (needed for dual prime, only valid if
00476  *                    ipflag==0)
00477  */
00478 
00479 static void field_estimate (motion_engine_t *engine,
00480         pict_data_s *picture,
00481         uint8_t *toporg,
00482         uint8_t *topref, 
00483         uint8_t *botorg, 
00484         uint8_t *botref,
00485         subsampled_mb_s *ssmb,
00486         int i, int j, int sx, int sy, int ipflag,
00487         mb_motion_s *bestfld,
00488         mb_motion_s *best8u,
00489         mb_motion_s *best8l,
00490         mb_motion_s *bestsp)
00491 
00492 {
00493         mb_motion_s topfld_mc;
00494         mb_motion_s botfld_mc;
00495         int dt, db;
00496         int notop, nobot;
00497         subsampled_mb_s botssmb;
00498 
00499         botssmb.mb = ssmb->mb+width;
00500         botssmb.umb = ssmb->umb+(width>>1);
00501         botssmb.vmb = ssmb->vmb+(width>>1);
00502         botssmb.fmb = ssmb->fmb+(width>>1);
00503         botssmb.qmb = ssmb->qmb+(width>>2);
00504 
00505         /* if ipflag is set, predict from field of opposite parity only */
00506         notop = ipflag && (picture->pict_struct==TOP_FIELD);
00507         nobot = ipflag && (picture->pict_struct==BOTTOM_FIELD);
00508 
00509         /* field prediction */
00510 
00511         /* predict current field from top field */
00512         if (notop)
00513                 topfld_mc.sad = dt = 65536; /* infinity */
00514         else
00515                 fullsearch(engine, toporg,topref,ssmb,width<<1,
00516                                    i,j,sx,sy>>1,16,width,height>>1,
00517                                    &topfld_mc);
00518         dt = topfld_mc.sad;
00519         /* predict current field from bottom field */
00520         if (nobot)
00521                 botfld_mc.sad = db = 65536; /* infinity */
00522         else
00523                 fullsearch(engine, botorg,botref,ssmb,width<<1,
00524                                    i,j,sx,sy>>1,16,width,height>>1,
00525                                    &botfld_mc);
00526         db = botfld_mc.sad;
00527         /* Set correct field selectors */
00528         topfld_mc.fieldsel = 0;
00529         botfld_mc.fieldsel = 1;
00530         topfld_mc.fieldoff = 0;
00531         botfld_mc.fieldoff = width;
00532 
00533         /* same parity prediction (only valid if ipflag==0) */
00534         if (picture->pict_struct==TOP_FIELD)
00535         {
00536                 *bestsp = topfld_mc;
00537         }
00538         else
00539         {
00540                 *bestsp = botfld_mc;
00541         }
00542 
00543         /* select field prediction */
00544         if (dt<=db)
00545         {
00546                 *bestfld = topfld_mc;
00547         }
00548         else
00549         {
00550                 *bestfld = botfld_mc;
00551         }
00552 
00553 
00554         /* 16x8 motion compensation */
00555 
00556         /* predict upper half field from top field */
00557         if (notop)
00558                 topfld_mc.sad = dt = 65536;
00559         else
00560                 fullsearch(engine, toporg,topref,ssmb,width<<1,
00561                                    i,j,sx,sy>>1,8,width,height>>1,
00562                                     &topfld_mc);
00563         dt = topfld_mc.sad;
00564         /* predict upper half field from bottom field */
00565         if (nobot)
00566                 botfld_mc.sad = db = 65536;
00567         else
00568                 fullsearch(engine, botorg,botref,ssmb,width<<1,
00569                                    i,j,sx,sy>>1,8,width,height>>1,
00570                                     &botfld_mc);
00571         db = botfld_mc.sad;
00572 
00573         /* Set correct field selectors */
00574         topfld_mc.fieldsel = 0;
00575         botfld_mc.fieldsel = 1;
00576         topfld_mc.fieldoff = 0;
00577         botfld_mc.fieldoff = width;
00578 
00579         /* select prediction for upper half field */
00580         if (dt<=db)
00581         {
00582                 *best8u = topfld_mc;
00583         }
00584         else
00585         {
00586                 *best8u = botfld_mc;
00587         }
00588 
00589         /* predict lower half field from top field */
00590         /*
00591           N.b. For interlaced data width<<4 (width*16) takes us 8 rows
00592           down in the same field.  
00593           Thus for the fast motion data (2*2
00594           sub-sampled) we need to go 4 rows down in the same field.
00595           This requires adding width*4 = (width<<2).
00596           For the 4*4 sub-sampled motion data we need to go down 2 rows.
00597           This requires adding width = width
00598          
00599         */
00600         if (notop)
00601                 topfld_mc.sad = dt = 65536;
00602         else
00603                 fullsearch(engine, toporg,topref,&botssmb,
00604                                                 width<<1,
00605                                                 i,j+8,sx,sy>>1,8,width,height>>1,
00606                                    /* &imint,&jmint, &dt,*/ &topfld_mc);
00607         dt = topfld_mc.sad;
00608         /* predict lower half field from bottom field */
00609         if (nobot)
00610                 botfld_mc.sad = db = 65536;
00611         else
00612                 fullsearch(engine, botorg,botref,&botssmb,width<<1,
00613                                                 i,j+8,sx,sy>>1,8,width,height>>1,
00614                                    /* &iminb,&jminb, &db,*/ &botfld_mc);
00615         db = botfld_mc.sad;
00616         /* Set correct field selectors */
00617         topfld_mc.fieldsel = 0;
00618         botfld_mc.fieldsel = 1;
00619         topfld_mc.fieldoff = 0;
00620         botfld_mc.fieldoff = width;
00621 
00622         /* select prediction for lower half field */
00623         if (dt<=db)
00624         {
00625                 *best8l = topfld_mc;
00626         }
00627         else
00628         {
00629                 *best8l = botfld_mc;
00630         }
00631 }
00632 
00633 static void dpframe_estimate (motion_engine_t *engine,
00634         pict_data_s *picture,
00635         uint8_t *ref,
00636         subsampled_mb_s *ssmb,
00637         
00638         int i, int j, int iminf[2][2], int jminf[2][2],
00639         mb_motion_s *best_mc,
00640         int *imindmvp, int *jmindmvp,
00641         int *vmcp)
00642 {
00643         int pref,ppred,delta_x,delta_y;
00644         int is,js,it,jt,ib,jb,it0,jt0,ib0,jb0;
00645         int imins,jmins,imint,jmint,iminb,jminb,imindmv,jmindmv;
00646         int vmc,local_dist;
00647 
00648         /* Calculate Dual Prime distortions for 9 delta candidates
00649          * for each of the four minimum field vectors
00650          * Note: only for P pictures!
00651          */
00652 
00653         /* initialize minimum dual prime distortion to large value */
00654         vmc = INT_MAX;
00655 
00656         for (pref=0; pref<2; pref++)
00657         {
00658                 for (ppred=0; ppred<2; ppred++)
00659                 {
00660                         /* convert Cartesian absolute to relative motion vector
00661                          * values (wrt current macroblock address (i,j)
00662                          */
00663                         is = iminf[pref][ppred] - (i<<1);
00664                         js = jminf[pref][ppred] - (j<<1);
00665 
00666                         if (pref!=ppred)
00667                         {
00668                                 /* vertical field shift adjustment */
00669                                 if (ppred==0)
00670                                         js++;
00671                                 else
00672                                         js--;
00673 
00674                                 /* mvxs and mvys scaling*/
00675                                 is<<=1;
00676                                 js<<=1;
00677                                 if (picture->topfirst == ppred)
00678                                 {
00679                                         /* second field: scale by 1/3 */
00680                                         is = (is>=0) ? (is+1)/3 : -((-is+1)/3);
00681                                         js = (js>=0) ? (js+1)/3 : -((-js+1)/3);
00682                                 }
00683                                 else
00684                                         continue;
00685                         }
00686 
00687                         /* vector for prediction from field of opposite 'parity' */
00688                         if (picture->topfirst)
00689                         {
00690                                 /* vector for prediction of top field from bottom field */
00691                                 it0 = ((is+(is>0))>>1);
00692                                 jt0 = ((js+(js>0))>>1) - 1;
00693 
00694                                 /* vector for prediction of bottom field from top field */
00695                                 ib0 = ((3*is+(is>0))>>1);
00696                                 jb0 = ((3*js+(js>0))>>1) + 1;
00697                         }
00698                         else
00699                         {
00700                                 /* vector for prediction of top field from bottom field */
00701                                 it0 = ((3*is+(is>0))>>1);
00702                                 jt0 = ((3*js+(js>0))>>1) - 1;
00703 
00704                                 /* vector for prediction of bottom field from top field */
00705                                 ib0 = ((is+(is>0))>>1);
00706                                 jb0 = ((js+(js>0))>>1) + 1;
00707                         }
00708 
00709                         /* convert back to absolute half-pel field picture coordinates */
00710                         is += i<<1;
00711                         js += j<<1;
00712                         it0 += i<<1;
00713                         jt0 += j<<1;
00714                         ib0 += i<<1;
00715                         jb0 += j<<1;
00716 
00717                         if (is >= 0 && is <= (width-16)<<1 &&
00718                                 js >= 0 && js <= (height-16))
00719                         {
00720                                 for (delta_y=-1; delta_y<=1; delta_y++)
00721                                 {
00722                                         for (delta_x=-1; delta_x<=1; delta_x++)
00723                                         {
00724                                                 /* opposite field coordinates */
00725                                                 it = it0 + delta_x;
00726                                                 jt = jt0 + delta_y;
00727                                                 ib = ib0 + delta_x;
00728                                                 jb = jb0 + delta_y;
00729 
00730                                                 if (it >= 0 && it <= (width-16)<<1 &&
00731                                                         jt >= 0 && jt <= (height-16) &&
00732                                                         ib >= 0 && ib <= (width-16)<<1 &&
00733                                                         jb >= 0 && jb <= (height-16))
00734                                                 {
00735                                                         /* compute prediction error */
00736                                                         local_dist = (*pbdist2)(
00737                                                                 ref + (is>>1) + (width<<1)*(js>>1),
00738                                                                 ref + width + (it>>1) + (width<<1)*(jt>>1),
00739                                                                 ssmb->mb,             /* current mb location */
00740                                                                 width<<1,       /* adjacent line distance */
00741                                                                 is&1, js&1, it&1, jt&1, /* half-pel flags */
00742                                                                 8);             /* block height */
00743                                                         local_dist += (*pbdist2)(
00744                                                                 ref + width + (is>>1) + (width<<1)*(js>>1),
00745                                                                 ref + (ib>>1) + (width<<1)*(jb>>1),
00746                                                                 ssmb->mb + width,     /* current mb location */
00747                                                                 width<<1,       /* adjacent line distance */
00748                                                                 is&1, js&1, ib&1, jb&1, /* half-pel flags */
00749                                                                 8);             /* block height */
00750 
00751                                                         /* update delta with least distortion vector */
00752                                                         if (local_dist < vmc)
00753                                                         {
00754                                                                 imins = is;
00755                                                                 jmins = js;
00756                                                                 imint = it;
00757                                                                 jmint = jt;
00758                                                                 iminb = ib;
00759                                                                 jminb = jb;
00760                                                                 imindmv = delta_x;
00761                                                                 jmindmv = delta_y;
00762                                                                 vmc = local_dist;
00763                                                         }
00764                                                 }
00765                                         }  /* end delta x loop */
00766                                 } /* end delta y loop */
00767                         }
00768                 }
00769         }
00770 
00771         /* TODO: This is now likely to be obsolete... */
00772         /* Compute L1 error for decision purposes */
00773         local_dist = (*pbdist1)(
00774                 ref + (imins>>1) + (width<<1)*(jmins>>1),
00775                 ref + width + (imint>>1) + (width<<1)*(jmint>>1),
00776                 ssmb->mb,
00777                 width<<1,
00778                 imins&1, jmins&1, imint&1, jmint&1,
00779                 8);
00780 //printf("motion 1 %p\n", pbdist1);
00781         local_dist += (*pbdist1)(
00782                 ref + width + (imins>>1) + (width<<1)*(jmins>>1),
00783                 ref + (iminb>>1) + (width<<1)*(jminb>>1),
00784                 ssmb->mb + width,
00785                 width<<1,
00786                 imins&1, jmins&1, iminb&1, jminb&1,
00787                 8);
00788 //printf("motion 2\n");
00789 
00790         best_mc->sad = local_dist;
00791         best_mc->pos.x = imins;
00792         best_mc->pos.y = jmins;
00793         *vmcp = vmc;
00794         *imindmvp = imindmv;
00795         *jmindmvp = jmindmv;
00796 //printf("motion 2\n");
00797 }
00798 
00799 static void dpfield_estimate(motion_engine_t *engine,
00800         pict_data_s *picture,
00801         uint8_t *topref,
00802         uint8_t *botref, 
00803         uint8_t *mb,
00804         int i, int j, int imins, int jmins, 
00805         mb_motion_s *bestdp_mc,
00806         int *vmcp
00807         )
00808 
00809 {
00810         uint8_t *sameref, *oppref;
00811         int io0,jo0,io,jo,delta_x,delta_y,mvxs,mvys,mvxo0,mvyo0;
00812         int imino,jmino,imindmv,jmindmv,vmc_dp,local_dist;
00813 
00814         /* Calculate Dual Prime distortions for 9 delta candidates */
00815         /* Note: only for P pictures! */
00816 
00817         /* Assign opposite and same reference pointer */
00818         if (picture->pict_struct==TOP_FIELD)
00819         {
00820                 sameref = topref;    
00821                 oppref = botref;
00822         }
00823         else 
00824         {
00825                 sameref = botref;
00826                 oppref = topref;
00827         }
00828 
00829         /* convert Cartesian absolute to relative motion vector
00830          * values (wrt current macroblock address (i,j)
00831          */
00832         mvxs = imins - (i<<1);
00833         mvys = jmins - (j<<1);
00834 
00835         /* vector for prediction from field of opposite 'parity' */
00836         mvxo0 = (mvxs+(mvxs>0)) >> 1;  /* mvxs / / */
00837         mvyo0 = (mvys+(mvys>0)) >> 1;  /* mvys / / 2*/
00838 
00839                         /* vertical field shift correction */
00840         if (picture->pict_struct==TOP_FIELD)
00841                 mvyo0--;
00842         else
00843                 mvyo0++;
00844 
00845                         /* convert back to absolute coordinates */
00846         io0 = mvxo0 + (i<<1);
00847         jo0 = mvyo0 + (j<<1);
00848 
00849                         /* initialize minimum dual prime distortion to large value */
00850         vmc_dp = 1 << 30;
00851 
00852         for (delta_y = -1; delta_y <= 1; delta_y++)
00853         {
00854                 for (delta_x = -1; delta_x <=1; delta_x++)
00855                 {
00856                         /* opposite field coordinates */
00857                         io = io0 + delta_x;
00858                         jo = jo0 + delta_y;
00859 
00860                         if (io >= 0 && io <= (width-16)<<1 &&
00861                                 jo >= 0 && jo <= (height2-16)<<1)
00862                         {
00863                                 /* compute prediction error */
00864                                 local_dist = (*pbdist2)(
00865                                         sameref + (imins>>1) + width2*(jmins>>1),
00866                                         oppref  + (io>>1)    + width2*(jo>>1),
00867                                         mb,             /* current mb location */
00868                                         width2,         /* adjacent line distance */
00869                                         imins&1, jmins&1, io&1, jo&1, /* half-pel flags */
00870                                         16);            /* block height */
00871 
00872                                 /* update delta with least distortion vector */
00873                                 if (local_dist < vmc_dp)
00874                                 {
00875                                         imino = io;
00876                                         jmino = jo;
00877                                         imindmv = delta_x;
00878                                         jmindmv = delta_y;
00879                                         vmc_dp = local_dist;
00880                                 }
00881                         }
00882                 }  /* end delta x loop */
00883         } /* end delta y loop */
00884 
00885         /* Compute L1 error for decision purposes */
00886         bestdp_mc->sad =
00887                 (*pbdist1)(
00888                         sameref + (imins>>1) + width2*(jmins>>1),
00889                         oppref  + (imino>>1) + width2*(jmino>>1),
00890                         mb,             /* current mb location */
00891                         width2,         /* adjacent line distance */
00892                         imins&1, jmins&1, imino&1, jmino&1, /* half-pel flags */
00893                         16);            /* block height */
00894 
00895         bestdp_mc->pos.x = imindmv;
00896         bestdp_mc->pos.x = imindmv;
00897         *vmcp = vmc_dp;
00898 }
00899 
00900 
00901 /* 
00902  *   Vectors of motion compensations 
00903  */
00904 
00905 /*
00906         Take a vector of motion compensations and repeatedly make passes
00907         discarding all elements whose sad "weight" is above the current mean weight.
00908 */
00909 
00910 static void sub_mean_reduction( mc_result_s *matches, int len, 
00911                                                                 int times,
00912                                                             int *newlen_res, int *minweight_res)
00913 {
00914         int i,j;
00915         int weight_sum;
00916         int mean_weight;
00917         int min_weight = 100000;
00918         if( len == 0 )
00919         {
00920                 *minweight_res = 100000;
00921                 *newlen_res = 0;
00922                 return;
00923         }
00924 
00925         for(;;)
00926         {
00927                 weight_sum = 0;
00928                 for( i = 0; i < len ; ++i )
00929                         weight_sum += matches[i].weight;
00930                 mean_weight = weight_sum / len;
00931                 
00932                 if( times <= 0)
00933                         break;
00934                         
00935                 j = 0;
00936                 for( i =0; i < len; ++i )
00937                 {
00938                         if( matches[i].weight <= mean_weight )
00939                         {
00940                                 if( times == 1)
00941                                 {
00942                                         min_weight = matches[i].weight ;
00943                                 }
00944                                 matches[j] = matches[i];
00945                                 ++j;
00946                         }
00947                 }
00948                 len = j;
00949                 --times;
00950         }
00951         *newlen_res = len;
00952         *minweight_res = mean_weight;
00953 }
00954 
00955 /*
00956   Build a vector of the top   4*4 sub-sampled motion compensations in the box
00957   (ilow,jlow) to (ihigh,jhigh).
00958 
00959         The algorithm is as follows:
00960         1. coarse matches on an 8*8 grid of positions are collected that fall below
00961         a (very conservative) sad threshold (basically 50% more than moving average of
00962         the mean sad of such matches).
00963         
00964         2. The worse than-average matches are discarded.
00965         
00966         3. The remaining coarse matches are expanded with the left/lower neighbouring
00967         4*4 grid matches. Again only those better than a threshold (this time the mean
00968         of the 8*8 grid matches are retained.
00969         
00970         4. Multiple passes are made discarding worse than-average matches.  The number of
00971         passes is specified by the user.  The default it 2 (leaving roughly 1/4 of the matches).
00972         
00973         The net result is very fast and find good matches if they're to be found.  I.e. the
00974         penalty over exhaustive search is pretty low.
00975         
00976         NOTE: The "discard below average" trick depends critically on having some variation in
00977         the matches.  The slight penalty imposed for distant matches (reasonably since the 
00978         motion vectors have to be encoded) is *vital* as otherwise pathologically bad
00979         performance results on highly uniform images.
00980         
00981         TODO: We should probably allow the user to eliminate the initial thinning of 8*8
00982         grid matches if ultimate quality is demanded (e.g. for low bit-rate applications).
00983 
00984 */
00985 
00986 
00987 static int build_sub44_mcomps(motion_engine_t *engine,
00988          int ilow, int jlow, int ihigh, int jhigh, 
00989                                                         int i0, int j0,
00990                                                                 int null_mc_sad,
00991                                                         uint8_t *s44org, uint8_t *s44blk, int qlx, int qh )
00992 {
00993         uint8_t *s44orgblk;
00994         int istrt = ilow-i0;
00995         int jstrt = jlow-j0;
00996         int iend = ihigh-i0;
00997         int jend = jhigh-j0;
00998         int mean_weight;
00999         int threshold;
01000 
01001 #ifdef X86_CPU
01002 
01003         /*int rangex, rangey;
01004         static int rough_num_mcomps;
01005         static mc_result_s rough_mcomps[MAX_44_MATCHES];
01006         int k;
01007         */
01008 #else
01009         int i,j;
01010         int s1;
01011         uint8_t *old_s44orgblk;
01012 #endif
01013         /* N.b. we may ignore the right hand block of the pair going over the
01014            right edge as we have carefully allocated the buffer oversize to ensure
01015            no memory faults.  The later motion compensation calculations
01016            performed on the results of this pass will filter out
01017            out-of-range blocks...
01018         */
01019 
01020 
01021         engine->sub44_num_mcomps = 0;
01022         
01023         threshold = 6*null_mc_sad / (4*4*mc_44_red);
01024         s44orgblk = s44org+(ilow>>2)+qlx*(jlow>>2);
01025         
01026         /* Exhaustive search on 4*4 sub-sampled data.  This is affordable because
01027                 (a)     it is only 16th of the size of the real 1-pel data 
01028                 (b) we ignore those matches with an sad above our threshold.    
01029         */
01030 #ifndef X86_CPU
01031 
01032                 /* Invariant:  s44orgblk = s44org+(i>>2)+qlx*(j>>2) */
01033                 s44orgblk = s44org+(ilow>>2)+qlx*(jlow>>2);
01034                 for( j = jstrt; j <= jend; j += 4 )
01035                 {
01036                         old_s44orgblk = s44orgblk;
01037                         for( i = istrt; i <= iend; i += 4 )
01038                         {
01039                                 s1 = ((*pdist44)( s44orgblk,s44blk,qlx,qh) & 0xffff) + abs(i-i0) + abs(j-j0);
01040                                 if( s1 < threshold )
01041                                 {
01042                                         engine->sub44_mcomps[engine->sub44_num_mcomps].x = i;
01043                                         engine->sub44_mcomps[engine->sub44_num_mcomps].y = j;
01044                                         engine->sub44_mcomps[engine->sub44_num_mcomps].weight = s1 ;
01045                                         ++engine->sub44_num_mcomps;
01046                                 }
01047                                 s44orgblk += 1;
01048                         }
01049                         s44orgblk = old_s44orgblk + qlx;
01050                 }
01051                         
01052 #else
01053 
01054                 engine->sub44_num_mcomps
01055                         = (*pmblock_sub44_dists)( s44orgblk, s44blk,
01056                                                                   istrt, jstrt,
01057                                                                   iend, jend, 
01058                                                                   qh, qlx, 
01059                                                                   threshold,
01060                                                                   engine->sub44_mcomps);
01061 
01062 #endif  
01063                 /* If we're really pushing quality we reduce once otherwise twice. */
01064                         
01065                 sub_mean_reduction( engine->sub44_mcomps, engine->sub44_num_mcomps, 1+(mc_44_red>1),
01066                                                     &engine->sub44_num_mcomps, &mean_weight);
01067 
01068 
01069         return engine->sub44_num_mcomps;
01070 }
01071 
01072 
01073 /* Build a vector of the best 2*2 sub-sampled motion
01074   compensations using the best 4*4 matches as starting points.  As
01075   with with the 4*4 matches We don't collect them densely as they're
01076   just search start