#include #include #include #define BAD_VALUE 17 #define MAX_ARG_VALUE 4294967295llu /* This is 2^32 - 1 */ #define MAX_UCS_VALUE 0x7FFFFFFFllu enum type { LITERAL , EMPTY , CODE , UNICODE } ; enum arg { newarg , prevarg } ; enum lcase { nocase , uppercase , lowercase } ; static int argno ; static const char * util_name ; #define MAX_BYTES 6 static int bytes[MAX_BYTES] , number_of_bytes ; // FUNCTIONS unsigned int hexdigit(const char , const enum arg) ; unsigned int digitvalue(const char , unsigned int , const enum arg) ; long unsigned int argvalue(const char * , const unsigned int) ; long unsigned int getargvalue(char *) ; void generate_utf8_encoding(long unsigned) ; // void generate_utf8_encoding(long unsigned code_point) { long unsigned mask1 = 0x7FF ; int i , mask2 = 128 , mask3 = 64 ; if (code_point <= 127) { number_of_bytes = 1 ; bytes[0] = code_point ; return ; } for (number_of_bytes = 2 ; code_point > mask1 ; number_of_bytes++) { mask1 = (mask1 << 5) | 31 ; } for (i = number_of_bytes - 1 ; i > 0 ; i--) { bytes[i] = 128 | (code_point & 63) ; code_point >>= 6 ; mask2 |= mask3 ; mask3 >>= 1 ; } bytes[0] = mask2 | code_point ; } long unsigned int getargvalue(char * p) { /* p is meant to point to the beginning of the numerical part of an argument given on the command line. Furthermore if we reached here then the argument is not the empty string. */ unsigned int b ; char *q = p+1 ; while (*q) q++ ; /**/ q-- ; if ( *q >= '0' && *q <= '9' ) return argvalue(p,10) ; if ( *q != 'd' && *q != 'b' && *q != 'h' && *q != 'o' ) { fprintf(stderr,"%s : Unknown base in argument number %d\nAcceptable suffixes " "are d(ecimal) , h(exadecimal) , o(ctal) , b(inary)\n" , util_name , argno) ; exit(EXIT_FAILURE) ; } if (q == p) { fprintf(stderr,"%s : Argument number %d contained no digits\n", util_name,argno) ; exit(EXIT_FAILURE) ; } if ( *q == 'b' ) b = 2 ; else if ( *q == 'o' ) b = 8 ; else if ( *q == 'd' ) b = 10 ; else b = 16 ; *q = 0 ; return argvalue(p , b) ; } long unsigned int argvalue(const char * p , const unsigned int base) { unsigned long long a ; a = digitvalue(*p , base , newarg) ; p++ ; while (*p) { a = base * a + digitvalue(*p , base , prevarg) ; if (a > MAX_ARG_VALUE) { fprintf(stderr,"%s : Value of argument number %d is too large \n",util_name,argno) ; exit(EXIT_FAILURE) ; } p++ ; } return a ; } unsigned int digitvalue(const char c , const unsigned int base , const enum arg a) { unsigned int d ; #define invalid_digit(z,s) { \ fprintf(stderr,"%s : Invalid %s digit %c in argument number %d\n", \ util_name,s,(z),argno) ; \ exit(EXIT_FAILURE) ; \ } switch(base) { case 2 : if (c == '0') return 0 ; if (c == '1') return 1 ; invalid_digit(c ,"binary") ; case 8 : if (c >= '0' && c < '8') return c - '0' ; invalid_digit(c , "octal") ; case 10 : if (c >= '0' && c <= '9') return c - '0' ; invalid_digit(c , "decimal") ; case 16 : d = hexdigit(c,a) ; if (d == BAD_VALUE) invalid_digit(c , "hexadecimal") ; return d ; } } unsigned int hexdigit(const char c , const enum arg a) { static enum lcase pc ; #define comp_case(z) { \ if (pc == nocase) pc = z ; \ else { \ if (z != pc) { \ fprintf(stderr,"%s : Both upper and lower case hex digits appear in" \ " argument number %d\n",util_name,argno) ; \ exit(EXIT_FAILURE) ; \ } \ } \ } if (a == newarg) pc = nocase ; if (c >= '0' && c <= '9') return c - '0' ; if (c == 'a') { comp_case(lowercase) ; return 10 ; } if (c == 'b') { comp_case(lowercase) ; return 11 ; } if (c == 'c') { comp_case(lowercase) ; return 12 ; } if (c == 'd') { comp_case(lowercase) ; return 13 ; } if (c == 'e') { comp_case(lowercase) ; return 14 ; } if (c == 'f') { comp_case(lowercase) ; return 15 ; } if (c == 'A') { comp_case(uppercase) ; return 10 ; } if (c == 'B') { comp_case(uppercase) ; return 11 ; } if (c == 'C') { comp_case(uppercase) ; return 12 ; } if (c == 'D') { comp_case(uppercase) ; return 13 ; } if (c == 'E') { comp_case(uppercase) ; return 14 ; } if (c == 'F') { comp_case(uppercase) ; return 15 ; } return BAD_VALUE ; } int main(int argc , char **argv) { char *d ; enum type *kind ; long unsigned int *a ; int show_dec , show_hex , printed_previously = 0 , i , j , offset ; if (argc == 0) return 0 ; util_name = argv[0] ; if ( MAX_ARG_VALUE > ULONG_MAX ) { fprintf(stderr , "%s , WARNING :\nThe maximum value for an unsigned long on this machine " "is %lu\nThis utility is designed for a maximum value of at least %llu " "therefore it may not work correctly\n" , util_name , ULONG_MAX , MAX_ARG_VALUE) ; } show_dec = show_hex = 0 ; for (argno = 1 ; 1 ; argno++) { if (argno >= argc) return 0 ; d = argv[argno] ; if (*d != '-') { offset = argno ; break ; } if ( *(d+1) == 'd' && *(d+2) == 0) { show_dec = 1 ; show_hex = 0 ; } else if ( *(d+1) == 'h' && *(d+2) == 0) { show_dec = 0 ; show_hex = 1 ; } else { fprintf(stderr , "%s : Option %s not recognised\n" , util_name , d) ; exit(EXIT_FAILURE) ; } } kind = malloc((argc-offset) * sizeof(enum type)) ; a = malloc((argc-offset) * sizeof(long unsigned int)) ; if (kind == 0 || a == 0) { fprintf(stderr , "%s : malloc() failure :-(\n" , util_name) ; exit(EXIT_FAILURE) ; } for (i = 0 ; argno < argc ; argno++ , i++) { d = argv[argno] ; if ( *d == '-' ) { fprintf(stderr , "%s : Argument number %d starts with - .Options are not " "allowed after non option arguments have been " "encountered\n" , util_name , argno) ; exit(EXIT_FAILURE) ; } if ( *d == 0 ) { kind[i] = EMPTY ; } else if ( *d == 'l' ) { if ( *(d+1) == 0 ) kind[i] = EMPTY ; else kind[i] = LITERAL ; } else if ( *d == 'u' ) { if (*(d+1) == 0) { fprintf(stderr , "%s : In argument number %d the initial 'u' was " "not followed by anything\n" , util_name , argno) ; exit(EXIT_FAILURE) ; } kind[i] = UNICODE ; a[i] = getargvalue(d+1) ; if (a[i] > MAX_UCS_VALUE) { fprintf(stderr , "%s : In argument number %d the UCS value was too " "large ; maximum allowed is %llu\n" , util_name , argno , MAX_UCS_VALUE) ; exit(EXIT_FAILURE) ; } } else { kind[i] = CODE ; a[i] = getargvalue(d) ; } } if (show_dec + show_hex > 0) { for (j = 0 ; j < argc - offset ; j++) { if ( kind[j] == EMPTY ) continue ; if (printed_previously) putchar(' ') ; else printed_previously = 1 ; if ( kind[j] == LITERAL ) { for (d = argv[j + offset] + 1 ; 1 ;) { if (show_dec) { printf("%u" , (unsigned int) (unsigned char) *d) ; // [2] } else printf("%.2X" , (unsigned int) (unsigned char) *d) ; // [2] d++ ; if (*d) putchar(' ') ; else break ; } continue ; } if (kind[j] == CODE) { if (show_dec) printf("%lu" , a[j]) ; else printf("%.2lX" , a[j]) ; continue ; } if (kind[j] == UNICODE) { generate_utf8_encoding(a[j]) ; i = 0 ; while (1) { if (show_dec) printf("%d" , bytes[i]) ; else printf("%.2X" , (unsigned int) bytes[i]) ; i++ ; if (i < number_of_bytes) putchar(' ') ; else break ; } } } putchar('\n') ; return 0 ; } for (j = 0 ; j < argc - offset ; j++) { if (kind[j] == EMPTY) continue ; if (kind[j] == LITERAL) { printf("%s" , argv[j + offset] + 1) ; } else if (kind[j] == UNICODE) { generate_utf8_encoding(a[j]) ; for (i = 0 ; i < number_of_bytes ; i++) putchar(bytes[i]) ; } else { if (a[j] > INT_MAX) { fprintf(stderr,"%s : Argument number %d is too large ; skipping\n" , util_name , j + offset) ; } else putchar( (int) a[j] ) ; } } return 0 ; } /* NOTES [1] [2] The "(unsigned char)" exists because without it on a platform where char has the same range as , negative values of *d would result in printing values which are very likely bogus.For example consider a platform where negative numbers are represented using two's complement , char has range -128 to 127 , range 0 to 2**32 - 1 and the command line from where we call this utility uses encoding ISO 8859-1.Then if we call the utility using literal -d l then without "(unsigned char)" it would print 4294967203 (the ISO 8859-1 encoding of the pound symbol is 163 , the 8 bit two's complement value of the bit pattern corresponding to 163 is -128 + 163-128 = -93 and 2**32 + (-93) = 4294967203 ). Of course , if there are any character encodings which use negative values then the addition of "(unsigned char)" might cause the utility to print (using the -d or -h options) bogus values but I doubt there are any such encodings. */