unsigned int b1get(a,b) register char * a; register unsigned int b; {
  a += b / 8;
  return (*a >> (7 - (b & 7))) & 1;
}

unsigned int b32get(a,b,c)
     register char * a;
     register unsigned int b;
     register unsigned char c; {
     register unsigned char t;
     if (c > 32) abort();

     b += (((int) a) & 3) << 3;
     a = (char *) (((int) a) & ~3);
     a += (b >>5) << 2;
     b = b & 31;

     t = b + c;
     if (t <= 32) { /* long word load */
       return ((* (unsigned long *) a) << b) >> (32 - c);
     }
     else { /* spans long word */
       return (((* (unsigned long *) a) << b) |
	      ((* (unsigned long *) (a+4)) >> (32-b))) >> (32 - c);
     }
     
}

unsigned int b32put(a,b,c,d)
     char * a;
     unsigned int b;
     unsigned char c;
     register unsigned int d; {
     unsigned char t;
     if (c > 32) abort();

     b += (((int) a) & 3) << 3;
     a = (char *) (((int) a) & ~3);
     a += (b >>5) << 2;
     b = b & 31;

     t = b + c;
     if (t <= 32) { /* long word load */
       d = (d << (32 - t)) & (1 << (32 - b) - 1);
       b = (* (unsigned long *) a) & ~d;
       (* (unsigned long *) a) = b | d;
     }
     else { /* spans long word */
       int u = (d >> (t - 32)) & (- (1 << (32 - b)));
       int v = (* (unsigned long *) a) & ~u;
       (* (unsigned long *) a) = v | u;
       u = d << (64 - t);
       a += 4;
       v = (* (unsigned long *) a) & ~u;
       (* (unsigned long *) a) = v | u;
     }
}

typedef unsigned int cell;
bN(a,b,d,e,c)
     register cell * a; /* source address */
     int b;             /* bit offset */
     cell c;            /* bit count */
     register cell * d; /* destination address */
     int e;             /* bit offset */
{
  register cell next, old, wordc;
  register unsigned char valid, Nvalid;

  
    /* normalize a and b */
    b += (((int) a) & 3) << 3;
    a = (cell *) (((int) a) & ~3);
    a += (b >>5);
    b = b & 31;
    
    /* normalize d and e */
    e += (((int) d) & 3) << 3;
    d = (cell *) (((int) d) & ~3);
    d += (e >>5);
    e = e & 31;

    /** check direction of move **/
    if (a >= d /* backwards */ ||
       (d - a) > ((c >> 5) + 1) /* non-overlapping */)
      {
	if (b < e)
	  { /* We have extra source bits */
	    valid = e - b;
	    old = *a++;
	    next = (old << b) >> e;
	    /* next has what we are ready to store from */
	    /* old has the left over bits at the lower part of the word */
	  }
	else
	  { /* We have insufficient source bits (maybe) */
	    valid = 32 + e - b;
	    next = (*a++ << b) >> e;
	    if (c > 32 - b)
	      { /* We need more bits */
		old = *a++;
		next = next | (old >> valid);
	      }
	    
	  }
	/* At this point, NEXT has what we are ready to store,
	   and OLD has VALID bits in it. */
	/* We need to store the bits into *D, masking appropriately. */
	if (c > (32 - e))
	  {
	    *d = ((*d >> (32 - e)) << (32 - e)) | next;
	    d++;
	    c -= (32 - e);
	    wordc = c >> 5;
	    c = c & 31;
	    Nvalid = 32 - valid;
	    /* A is ready, D is ready, OLD is ready, VALID is ready,
	       WORDC is ready, C is ready. */
	    while (wordc > 0) {
	      next = *a++;
	      old = (old << Nvalid) | (next >> valid);
	      *d++ = old;
	      old = next;
	      wordc = wordc - 1;
	    }
	    /* We've got less than 32 bits left to store */
	    if (c > valid)
	      {
		/* We need extra bits */
		next = *a;
		next = (old << Nvalid) | (next >> valid);
	      }
	    else
	      next = old << Nvalid;
	    /* Now we need to store c bits in high half of *D */
	    wordc = - (1 << (32 - c));
	  }
	else
	  {
	    /* Just store a little bit -- first form mask */
	    wordc = (1 << (32 - e)) - (1 << (32 - e - c));
	  }
	next = next & wordc;
	*d = (*d & ~wordc) | next;
	return;
      }
    else /* overlapping forwards */
      {
	/* Backnormalize */
	b = b + c;
	e = e + c;

	/* normalize a and b */
        a += (b >>5);
        b = b & 31;
	
	/* normalize d and e */
        d += (e >>5);
        e = e & 31;

	b = (32 - b);
	e = (32 - e);

	if (b < e)
	  { /* We have extra source bits */
	    valid = e - b;
	    old = *a--;
	    next = (old >> b) << e;
	    /* next has what we are ready to store from */
	    /* old has the left over bits at the high part of the word */
	  }
	else
	  { /* We have insufficient source bits (maybe) */
	    valid = 32 + e - b;
	    next = (*a-- >> b) << e;
	    if (c > 32 - b)
	      { /* We need more bits */
		old = *a--;
		next = next | (old << valid);
	      }
	    
	  }

	/* At this point, NEXT has what we are ready to store,
	   and OLD has VALID bits in it. */
	/* We need to store the bits into *D, masking appropriately. */
	if (c > (32 - e)) /* Is there enough to store to the boundary? */
	  {
	    *d = ((*d << (32 - e)) >> (32 - e)) | next;
	    d--;
	    c -= (32 - e);
	    wordc = c >> 5;
	    c = c & 31;
	    Nvalid = 32 - valid;

	    /* A is ready, D is ready, OLD is ready, VALID is ready,
	       WORDC is ready, C is ready. */
	    while (wordc > 0) {
	      next = *a--;
	      old = (old >> Nvalid) | (next << valid);
	      *d-- = old;
	      old = next;
	      wordc = wordc - 1;
	    }
	    /* We've got less than 32 bits left to store */
	    if (c > valid)
	      {
		/* We need extra bits */
		next = *a;
		next = (old >> Nvalid) | (next << valid);
	      }
	    else
	      next = old >> Nvalid;
	    /* Now we need to store c bits in low half of *D */
	    wordc = (1 << c) - 1;
	  }
	else
	  {
	    /* Just store a little bit -- first form mask */
	    wordc = (1 << (e + c)) - (1 << e) ;
	  }
	next = next & wordc;
	*d = (*d & ~wordc) | next;
	return;
      } 
}

#ifdef TEST
char chars[] = "0123456789abcdef";
report(a) char * a; {
  printf("%s\n", a);
}

long dummy;
char sample[] =
"1234567890abcdefghijABCDEFGHIJ1234567890";

main() {
  char a[100];
  char b[100];
  char c[100];
  char d[100];

  int i,j,k;
  
  report(sample);
  
  for (i = 0; i < 128; i ++) { /* iterate over size */
    for (j = 0; j < 64; j ++) { /* iterate over source */
      for (k = -32; k < 64; k ++) { /* iterate over destination */
      bN(sample, 0, b, 0, 320);
      bN(sample, 0, a, 0, 320);
      bN(a, j, a + 4, k,i);
      bN(a + 4, k, b, j, i);
      if (strcmp(sample, b) != 0)
	printf("Failed at i = %d, j = %d, k = %d, b = \n%s\n", i, j, k, b);
      }
    }
    printf("Succeeded at size = %d\n", i);
  }
}
#endif
