Last week, I tried to use Valgrind to identify potential memory related bugs, since segmentation faults occured randomly in a Python C library. However, Valgrind failed to start and displayed the following error message:

继续阅读

vector 测试

2014年9月03日 16:11

写个简单的东西来测试一下数据很多时候几种创建 vector 的方法,结果有点意思:

继续阅读

 

继续阅读

test.c

2014年9月03日 00:14

test.c
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <unistd.h>

void dump_line(const unsigned char* buf, int w, int l)
{
#define YYYGET(X)       ( X >= 32 && X <= 126) ? X : '.'
    unsigned int i = 0;
    printf("%08x: ", l);
    for (; i < w; ++i)
        printf((i % 8 == 7) ? "%02x  " : "%02x ", *(buf+i));
    if (w < 0x10)
    {
        for (i = 0; i < 0x10 - w; ++i) printf("   ");
        printf("  ");
    }
    printf ("|");
    for (i = 0; i < w; ++i)
        printf ("%c", YYYGET(*(buf+i)));
    if (w < 0x10)
        for (i = 0; i < 0x10 - w; ++i) printf(" ");
    printf ("|\n");
#undef YYYGET
}

void dump_buffer(const unsigned char* buf, int max)
{
    int l = max / 0x10 + ((max % 0x10) ? 1 : 0);
    printf ("l = %d\n",l);
    int i = 0;
    int w = l - i > 1 ? 0x10 : max;
    const unsigned char* ptr = buf;
    for (; i < l; ++i,w = l - i > 1 ? 0x10 : max - 0x10 * i)
    {
        dump_line(ptr, w, i);
        ptr += w;
    }
}


int main(int argc, char *argv[])
{
    if (argc < 3)
    {
        printf ("Usage: %s FileName, number.\n", argv[0]);
        exit(1);
    }

    int fd = open(argv[1], O_RDONLY);
    if (fd != -1)
    {
        int s = atoi(argv[2]);
        unsigned char* buf = malloc(s+1);
        memset(buf, 0, s+1);
        read(fd, buf, s);
        dump_buffer(buf, s);
    }
    return 0;
}

Hex Dump In Many Programming Languages

See also: ArraySumInManyProgrammingLanguages, CounterInManyProgrammingLanguages, DotProductInManyProgrammingLanguages, WardNumberInManyProgrammingLanguages, NinetyNineBottlesOfBeerOnTheWall, ProgrammingChrestomathy

 

Please use spaces rather than tabs in any code samples below (ConvertSpacesToTabsNotForCode).

 

A new ProgrammingChrestomathy challenge: hexdump a file, given a starting and ending offset in the file, and number of bytes to display per outline line. Newly recommended is to show the current offset at the start of each output line.

 

SchemeLanguage: (alternative versions, or critiques of this one, would be welcome) [Note: I've removed many of the comments from my original version; ironically, it is now much clearer and easier to read.]

 

 ;; (hex-dump) => #t OR identifier
 ;; To use: 
 ;;	start the Scheme interpreter
 ;;	(load 'hexdump)
 ;;	(hexdump)
 ;;
 ;;  Special Instructions:
 ;;	The filename must be enclosed in double quotes.	
 (define (hexdump)
	(let ((file	(begin (display "Filename: ") (read)))
	 (start  (begin (display "Start: ")(read)))
	 (finish (begin (display "End: ") (read)))
	 (width  (begin (display "Bytes per Line: ") (read))))
	(dump-file file width start finish)))

 

(define (dump-file filename line-length start finish) (cond ((not (string? filename)) 'err-no-filename) ((> 0 start) 'err-negative-start) ((>= start finish) 'err-startpoint-after-endpoint) (else (let ((src (open-input-file filename)) (width (if (> line-length 0) ; default line width to 16 line-length 16)) (size (- finish start))) ; total size of dump in bytes (if (not (port? src)) ; check for valid port 'err-no-file (if (seek src start) (display-dump src width size) 'err-file-length-exceeded)))))) ; file ends before reaching start

 

(define (seek src entrypoint) (let find ((count entrypoint)) (cond ((>= 0 count) #t) ((eof-object? (read-char src)) ; if file ends before reaching entrypoint, (close-input-port src) ; close the file and return false #f) (else (find (- count 1))))))

 

;; (display-dump port integer integer) => #t OR identifier ;; Print out a hexdump ;; ;; The function accumulate-lines is used to collect ;; a group of lines up to blocksize in length to be printed ;; together as a single string. The purpose for this is to minimize ;; the number of calls to (display), a very high-overhead function. (define (display-dump src width size) (let ((blocksize 64) (height (/ size width))) (do ((blockcount height (- blockcount blocksize)) ; decrement by blocksize each pass (dumpsize size (- dumpsize (* blocksize width)))) ((or (<= blockcount 0) (eof-object? (peek-char src))) (close-input-port src) #t) (display (accumulate-lines src width blocksize dumpsize)))))

 

(define (accumulate-lines src width blocksize dumpsize) (let accumulate ((count blocksize) (tailing dumpsize)) (if (or (< count 0) ; if we've reached the end of the block, (< tailing 0)) ; or the end of the dump region, "" ; return an empty string ; else, read a new line of data; if it is at the end of the region, ; only get the remaining part of the line, otherwise get a full line (let ((line (read-dump-line src (min tailing width)))) (if (null? line) ; if we've reached the end of the file, "" ; return an empty string ; otherwise, continue accumulating lines (string-append (format-line line width) (accumulate (- count 1) (- tailing width))))))))

 

(define (read-dump-line src width) (let read-data ((count width)) (if (>= 0 count) '() (let ((next (read-char src))) (if (eof-object? next) '() (cons next (read-data (- count 1))))))))

 

 

(define (format-line dump width) (string-append (format-dump-line dump) (pad-string 3 (- width (length dump))) ": " (format-values-line dump) line-marker))

 

(define line-marker (make-string 1 #\newline))

 

(define (pad-string pad-size padding) (if (< 0 padding) (make-string (* pad-size padding) #\space) ""))

 

 

(define (format-values-line dump) (list->string (map char-value dump)))

 

 

(define (format-dump-line dump) (string-list-concat (map char->hex-string dump) " "))

 

(define (string-list-concat string-list separator) (let cat ((stringlist string-list)) (cond ((null? stringlist) "") ((list? stringlist) (string-append (car stringlist) separator (cat (cdr stringlist)))) (else stringlist))))

 

(define (char->hex-string c) (let* ((charval (char->integer c)) (hexval (number->string charval 16))) (if (> 16 charval) (string-append "0" hexval) hexval)))

 

 

(define (char-value c) (if (char-printable? c) c #\.))

 

(define (char-printable? c) (and (char<? #\space c) (char<? c #\~)))

 

A sample printout, using it's own source file:

 

 > (hexdump)
 Filename: "hexdump.scm"
 Start: 128
 End: 256
 Bytes per Line: 16
 20 53 70 65 63 69 61 6c 20 49 6e 73 74 72 75 63 : .Special.Instruc
 74 69 6f 6e 73 3a 0d 0a 3b 3b 20 20 20 20 54 68 : tions:..;;....Th
 65 20 66 69 6c 65 6e 61 6d 65 20 6d 75 73 74 20 : e.filename.must.
 62 65 20 65 6e 63 6c 6f 73 65 64 20 69 6e 20 64 : be.enclosed.in.d
 6f 75 62 6c 65 20 71 75 6f 74 65 73 2e 20 20 20 : ouble.quotes....
 20 0d 0a 0d 0a 28 64 65 66 69 6e 65 20 28 68 65 : .....(define.(he
 78 64 75 6d 70 29 0d 0a 20 20 28 6c 65 74 20 28 : xdump)....(let.(
 28 66 69 6c 65 20 20 20 28 62 65 67 69 6e 20 28 : (file...(begin.(
 #t

HaskellLanguage:

 

 import System
 import IO
 import Data.Char

 

hexdump start end width = unlines . map hexify . addLineNumber . splitGroups . takeRange where takeRange = take (end - start) . drop start splitGroups [] = [] splitGroups xs = ys : splitGroups zs where (ys,zs) = splitAt width xs in addLineNumber = zip $ map (pad . show) $ iterate (+ width) start where pad str = replicate (6 - length str) '0' ++ str hexify (num, char) = num ++ ": " ++ (unwords $ map toHexDigit char) ++ ": " ++ map literal char toHexDigit = stringify . (`divMod` 16) . ord where stringify (x,y) = [intToDigit x, intToDigit y] literal c = if isPrint c then c else '.'

 

withFile file func = readFile file >>= putStrLn . func main = do args <- getArgs let intArg = read . (args !!) in withFile (args !! 0) $ hexdump (intArg 1) (intArg 2) (intArg 3)

 

Output:

 

 debian:/home/jdtang/vm/spike# ./hexdump hexdump.hs 0 150 15
 000000: 69 6d 70 6f 72 74 20 53 79 73 74 65 6d 0a 69: import System.i
 000015: 6d 70 6f 72 74 20 49 4f 0a 69 6d 70 6f 72 74: mport IO.import
 000030: 20 44 61 74 61 2e 43 68 61 72 0a 0a 68 65 78:  Data.Char..hex
 000045: 64 75 6d 70 20 73 74 61 72 74 20 65 6e 64 20: dump start end
 000060: 77 69 64 74 68 20 3d 20 75 6e 6c 69 6e 65 73: width = unlines
 000075: 20 2e 20 6d 61 70 20 68 65 78 69 66 79 20 2e:  . map hexify .
 000090: 20 61 64 64 4c 69 6e 65 4e 75 6d 62 65 72 20:  addLineNumber
 000105: 2e 20 73 70 6c 69 74 47 72 6f 75 70 73 20 2e: . splitGroups .
 000120: 20 74 61 6b 65 52 61 6e 67 65 0a 20 20 20 20:  takeRange.
 000135: 77 68 65 72 65 20 74 61 6b 65 52 61 6e 67 65: where takeRange

 

-- JonathanTang

 


 

VisualBasicNine

 

The following direct/imperative-style program does the trick

 

 Imports System
 Imports System.IO

 

Module InManyLanguages Public Sub HexDump(ByVal filename As String, _ ByVal startOffset As Integer, _ ByVal endOffset As Integer, _ ByVal width As Integer) Using fs = New FileStream(filename, FileMode.Open, FileAccess.Read) Console.WriteLine("Filename: {0}", filename) Console.WriteLine("Start: {0}", startOffset) Console.WriteLine("End: {0}", endOffset) Console.WriteLine("Bytes per line: {0}", width) If (fs.Length > startOffset) Then fs.Position = startOffset Dim buffer(width - 1) As Byte Dim bytesRead = 0 Dim maxBytes = endOffset - startOffset Dim done = False While Not done Dim bytesToRead = IIf(maxBytes - bytesRead < width, maxBytes - bytesRead, width) Dim cb = fs.Read(buffer, 0, bytesToRead) bytesRead += cb done = cb < width Or bytesRead = maxBytes

 

For index As Integer = 0 To cb - 1 Console.Write("{0:X2} ", buffer(index)) Next For index As Integer = cb To width - 1 Console.Write(" ") Next Console.Write(": ") For index As Integer = 0 To cb - 1 Dim b = buffer(index) Console.Write(IIf(b > 32 And b < 126, ChrW(b), ".")) Next Console.WriteLine() End While End If End Using End Sub End Module

 

Which prints out the following:

 

 Filename: InManyLanguages.vb
 Start: 128
 End: 256
 Bytes per line: 16
 41 73 20 49 6E 74 65 67 65 72 2C 20 42 79 56 61 : As.Integer,.ByVa
 6C 20 65 6E 64 4F 66 66 73 65 74 20 41 73 20 49 : l.endOffset.As.I
 6E 74 65 67 65 72 2C 20 42 79 56 61 6C 20 77 69 : nteger,.ByVal.wi
 64 74 68 20 41 73 20 49 6E 74 65 67 65 72 29 0D : dth.As.Integer).
 0A 20 20 20 20 20 20 20 20 55 73 69 6E 67 20 66 : .........Using.f
 73 20 3D 20 4E 65 77 20 46 69 6C 65 53 74 72 65 : s.=.New.FileStre
 61 6D 28 66 69 6C 65 6E 61 6D 65 2C 20 46 69 6C : am(filename,.Fil
 65 4D 6F 64 65 2E 4F 70 65 6E 2C 20 46 69 6C 65 : eMode.Open,.File

 

The imperative printing code within the loop could be replaced by this functional LINQ expression:

 

 Dim s = String.Join(" ", (From index In Enumerable.Range(0, cb) _
         Select buffer(index).ToString?("X2")).Concat( _
         Enumerable.Repeat("  ", width - cb)).ToArray()) _
         & " : " & _
         New String((From index In Enumerable.Range(0, cb) _
         Let b = buffer(index) _
         Select CType(IIf(b > 32 And b < 126, ChrW(b), "."c), Char)).ToArray())
 Console.WriteLine?(s)

 

This isn't necessarily more maintainable or readable, but it proves the point.

 


PythonLanguage:

 

 import sys

 

def hexdump( chars, sep, width ): while chars: line = chars[:width] chars = chars[width:] line = line.ljust( width, '\000' ) print "%s%s%s" % ( sep.join( "%02x" % ord(c) for c in line ), sep, quotechars( line ))

 

def quotechars( chars ): return ''.join( ['.', c][c.isalnum()] for c in chars )

 

def file_section( name, start, end ): contents = open( name, "rb" ).read() return contents[start:end]

 

if __name__ == '__main__': hexdump( file_section( sys.argv[1], int( sys.argv[2] ), int( sys.argv[3] )), ' ', int( sys.argv[4] ))

 

Run output:

 

 $ python hexdump.py hexdump.py 0 60 16
 0a 69 6d 70 6f 72 74 20 73 79 73 0a 0a 64 65 66 .import.sys..def
 20 68 65 78 64 75 6d 70 28 20 63 68 61 72 73 2c .hexdump..chars.
 20 73 65 70 2c 20 77 69 64 74 68 20 29 3a 0a 20 .sep..width.....
 20 20 77 68 69 6c 65 20 63 68 61 72 00 00 00 00 ..while.char....

 

Should this one lose points for reading the whole file into memory?

 

I don't think so. After all, memory-mapped files are a very powerful paradigm. There have been operating systems that only offered that option, and their proponents were very disdainful of the other approach.

 

Long ago on systems with tiny RAM, this might have been crippling, since one couldn't view large files, but these days with big RAM and huge swap areas, it seems unlikely to be a pragmatic issue.

 

There is something to be said for stream-based solutions as part of a DataFlowProgramming system, but that doesn't seem a big deal on this particular page.

 

The above version is buggy - it doesn't print brackets, etc, properly. Here's an idiomatic version that handles large files. It's less terse than it could be (but that's because it's more readable than it could be ;-)

 

 import sys

 

def hexdump(fname, start, end, width): for line in get_lines(fname, int(start), int(end), int(width)): nums = ["%02x" % ord(c) for c in line] txt = [fixchar(c) for c in line] print " ".join(nums), "".join(txt)

 

def fixchar(char): from string import printable if char not in printable[:-5]: return "." return char

 

def get_lines(fname, start, end, width): f = open(fname, "rb") f.seek(start) chunk = f.read(end-start) gap = width - (len(chunk) % width) chunk += gap * '\000' while chunk: yield chunk[:width] chunk = chunk[width:]

 

if __name__ == '__main__': try: hexdump(*sys.argv[1:5]) except TypeError?: hexdump("hexdump.py", 0, 100, 16)

 


 

RubyLanguage:

 

 def hexdump(filename, start = 0, finish = nil, width = 16)
   ascii = ''
   counter = 0
   print '%06x  ' % start
   File.open(filename).each_byte do |c|
     if counter >= start
       print '%02x ' % c
       ascii << (c.between?(32, 126) ? c : ?.)
       if ascii.length >= width
         puts ascii 
         ascii = ''
         print '%06x  ' % (counter + 1)
       end
     end
     throw :done if finish && finish <= counter
     counter += 1
   end rescue :done
   puts '   ' * (width - ascii.length) + ascii
 end

 

if $0 == __FILE__ if ARGV.empty? hexdump $0 else filename = ARGV.shift hexdump filename, *(ARGV.map {|arg| arg.to_i }) end end

 

% hexdump.rb hexdump.rb 0 58 16 000000 64 65 66 20 68 65 78 64 75 6d 70 28 66 69 6c 65 def hexdump(file 000010 6e 61 6d 65 2c 20 73 74 61 72 74 20 3d 20 30 2c name, start = 0, 000020 20 66 69 6e 69 73 68 20 3d 20 6e 69 6c 2c 20 77 finish = nil, w 000030 69 64 74 68 20 3d 20 31 36 29 0a idth = 16).

 


 

CeeLanguage or maybe CeePlusPlus - Not actually CeeLanguage - There are CeePlusPlus specifics used(like declaring variables in the middle of a function for readability). Why do people think CeePlusPlus means one must use class. Sigh. I guess the CeeLanguage/CeePlusPlus are popular enough that people must post follow ups with their own visions. An OO solution is probably overkill, but it wouldn't be unreasonable to ask that something purporting to be a CeePlusPlus example use things like the CeePlusPlus standard library, rather than the CeeLanguage standard library... (or if you're going to use the CeeLanguage library, at least use the standard CeePlusPlus headers for it [that's #include <cstdio> and #include <cstdlib>, rather than <stdio.h> and <stdlib.h>])... - Fair enough, we'll call it a draw - if just to avoid a LanguageHolyWar?.

 

I posted one of the straight C language solutions below, which should tell you that I'm not a rabid foaming-at-the-mouth OO bigot, however I was the one that said it was C, not C++, and I stand by that. Aside from utterly trivial issues, it could compile with a C compiler. And yes, I would in fact want to see use of classes or templates, although as someone said above, at least using native C++ facilities for IO would put you in a more defensible position.

 

Re: "follow up with their own visions" - there's room for multiple solutions in each language. I like to go for very terse solutions because I think that's a virtue in itself. Others obviously have somewhat different philosophies that they like to illustrate. -- dm

 

Not that I'm disagreeing with you - I do have strong CeeLanguage influences, but just to explain where I was coming from - Can you imagine the ThreadMess if someone really did post a "true" CeePlusPlus example?

 

You could guarantee a HolyWar over:
  • Whether writing to a generic stream is better than writing to cout.
  • Whether it was encapsulated enough or had had too many "Get/Put" methods.
  • What design patterns it followed incorrectly or *gasp*didn't use at all.
  • The missing or excessive use of interfaces/templates/overloading.

     

I don't think this little exercises risk major HolyWars?. Anyway,
  • generic streams make it more reusable, so that is more desirable. I'd agree, but I would also consider the decision subjective
    • On this page, yes, but in a real project, inevitably you want to direct something to cerr, not just cout, so I wouldn't call it subjective in general.

       

  • Get/Put has of course been discussed, and I don't think there's any question: it's a Smell, but not necessarily wrong.
  • Failing to use a known DesignPattern - now you're being silly. If you can cite any instances of that on the entire web by an apparently sane person, I'd be interested in seeing it. True. . . and I do love being silly
  • Operator overloading ideally should be used only on numeric operators to implement new numeric types, except that Stroustrup brain damage means that it also should be used to extend standard operators like << and >>.
  • Redefining other operators like ()[] etc usually should be done if and only if one is redefining essentially all operators transparently in order to play tricks like reference-counted garbage collection, and otherwise it's a major smell.
  • Templates should be used any time there is a need to parameterize an algorithm by type. They should not be used to play other kinds of tricks -- except that by now there are a lot of C++ specific patterns on using them in bizarre ways in order to work around limitations of the language, so there are some peculiar cases where you've got a point.
  • What do you mean by missing/overuse of interfaces?
    • ''I meant whether the objects should have abstract interfaces, be wrapped in COM layers, etc - but I guess that goes back to the design patterns.
    • Ah.

       

Even in straight C, getting to the elegant minimum is fiercely debated. Perhaps the posts of "true" CeePlusPlus examples are so large that they could be the cause of the denied access problems we've been having. ;)
  • Perhaps. I've been thinking about doing one, and have refrained so far because of the mental image. ;-)

     

 /*
  This entry contains certain functionality the others may not.
  • The stop-start span does not need to be equal to width
  • The command-line is checked for missing arguments
 */
 #include <stdio.h>
 #include <stdlib.h>

 

void hexdump(unsigned char *buffer, unsigned long index, unsigned long width) { unsigned long i; for (i=0;i<index;i++) { printf("%02x ",buffer[i]); } for (unsigned long spacer=index;spacer<width;spacer++) printf(" "); printf(": "); for (i=0;i<index;i++) { if (buffer[i] < 32) printf("."); else printf("%c",buffer[i]); } printf("\n"); }

 

int hexdump_file(FILE *infile,unsigned long start, unsigned long stop, unsigned long width) { char ch; unsigned long f_index=0; unsigned long bb_index=0; unsigned char *byte_buffer = malloc(width); if (byte_buffer == NULL) { printf("Could not allocate memory for byte_buffer\n"); return -1; } while (!feof(infile)) { ch = getc(infile); if ((f_index >= start)&&(f_index <= stop)) { byte_buffer[bb_index] = ch; bb_index++; } if (bb_index >= width) { hexdump(byte_buffer,bb_index,width); bb_index=0; } f_index++; } if (bb_index) hexdump(byte_buffer,bb_index,width); fclose(infile); free(byte_buffer); return 0; }

 

int main(int argc, char *argv[]) { if (argc != 5) { printf("Usage: hexdump <infile> <start> <end> <width>\n"); return 0; } FILE *infile=fopen(argv[1],"rb"); if (infile==(FILE *)NULL) { printf("Error opening input file %s\n",argv[1]); return 0; } printf("Filename: \"%s\"\n", argv[1]); printf("Start : %lu\n", atoi(argv[2])); printf("End : %lu\n", atoi(argv[3])); printf("Bytes per Line: %lu\n",atoi(argv[4])); int result = hexdump_file(infile,atoi(argv[2]),atoi(argv[3]),atoi(argv[4])); return 0; }

 

Run output:
 $ hexdump hexdump.c 0 100 16
 Filename: "hexdump.c"
 Start : 0
 End	: 100
 Bytes per Line: 16
 2f 2a 0d 0a 20 20 54 68 69 73 20 65 6e 74 72 79 : /*..  This entry
 20 63 6f 6e 74 61 69 6e 73 20 63 65 72 74 61 69 :  contains certai
 6e 20 66 75 6e 63 74 69 6f 6e 61 6c 69 74 79 20 : n functionality
 74 68 65 20 6f 74 68 65 72 73 20 6d 61 79 20 6e : the others may n
 6f 74 2e 0d 0a 20 20 20 20 2a 20 54 68 65 20 73 : ot...	* The s
 74 6f 70 2d 73 74 61 72 74 20 73 70 61 6e 20 64 : top-start span d
 6f 65 73 20 6e				  : oes n

 

-- LayneThomas

 


 

ForthLanguage

 

DUMP is in the ANS Tools word set, e.g. GNU Forth:

 

 HERE 32 DUMP
   27790: 80 C0 0C CC  C3 33 0C 30 - CC 33 0C C3  30 C3 0C CC  .....3.0.3..0...
   277A0: CC CC CC CC  CC CC CC CC - C3 0C CC CC  CC CC CC CC  ................

 

Here's an implementation that meets the new spec. (Usage: "start end width HEXDUMP filename")

 

 0 value fileid
 : .pos ( ud -- )
   <# # # # #S #> type ." : " ;
 : .hex ( addr len -- )
   over + swap do I c@ S>D <# # # #> type space loop ;
 : .spacer ( width read -- )
   - 3 * spaces ." : " ;
 : .chars ( addr len -- )
   over + swap do I c@ dup 127 bl within if drop [char] . then emit loop ;
 : (hd) ( start end width "filename" -- )
   R/O OPEN-FILE throw  to fileid
   >R over - R> rot ( count width start )
   S>D fileid REPOSITION-FILE throw ( count width )
   begin  cr fileid FILE-POSITION throw .pos
          2dup min pad swap fileid READ-FILE throw ?dup
   while  pad over .hex	2dup .spacer  pad over .chars
          over =          \ end of file?
   while  tuck - tuck 0>  \ end of range?
   while repeat then then 2drop
   fileid CLOSE-FILE throw cr ;
 : hexdump ( start end width "filename" -- )
   bl parse  base @ >R hex  ['] (hd) catch  R> base !  throw ;

 

Sample output, testing EOF handling and partial last line :

 

 930 950 8 hexdump hexdump.f 
 03A2: 61 73 65 20 21 20 20 74 : ase !  t
 03AA: 68 72 6F 77 20 3B 0A    : hrow ;.

 

I believe that <# #> is underrated even in the Forth world, and certainly outside it. People seem to think it's pretty much equivalent to formatting in C or Lisp, but I think it is instead an example of an entire paradigm of programming that is mostly unexplored, although alluded to by ChuckMoore.

 

Could you expand on this? Forth pictured numerics are simple, but not exactly the friendliest of constructs (you are specifying digits in reverse order, for example). Here is GNU Forth's simple implementation (though I added digit>char for clarity).

 

 variable holdptr
 variable holdend
 create holdbuf 64 allot
 here constant holdbuf-end
 : <# ( ud -- ud ) holdbuf-end dup holdptr ! holdend ! ;
 : hold ( c -- ) -1 holdptr +! holdptr @  dup holdbuf u< -17 and throw  c! ;
 : digit>char ( 0-35 -- [0-9A-Z] ) 9 over < IF 7 + THEN [char] 0 + ;
 : # ( ud -- ud/base ) base @ ud/mod rot digit>char hold ;
 : #s ( ud -- 0d ) BEGIN # 2dup or 0= UNTIL ;
 : #> ( ud -- addr len ) 2drop holdptr @ holdend @ over - ;

 

Yeah, and many in the Forth community actually regard them as obsolete. But I'm not talking about their literal use for formatting, I'm talking about them as an example of a different way of approaching doing programming. Recall anything that ChuckMoore has said about how he approaches problem solving in Forth?

 

Here's one example of what Moore said:

 

"The whole point of Forth was that you didn't write programs in Forth you wrote vocabularies in Forth. When you devised an application you wrote a hundred words or so that discussed the application and you used those hundred words to write a one line definition to solve the application. It is not easy to find those hundred words, but they exist, they always exist."

 

"I wish I knew what to tell you that would lead you to write good Forth. I can demonstrate. I have demonstrated in the past, ad nauseam, applications where I can reduce the amount of code by 90% percent and in some cases 99%. It can be done, but in a case by case basis. The general principle still eludes me." http://www.ultratechnology.com/1xforth.htm

 

He doesn't know how to explain how he goes about finding just the right set of words. I think <# #> serves as an illustration, not of the ultimate way to format, but of the paradigm he exercises unconsciously. -- dm

 


 

ColorForth

 

The wiki doesn't do color, so these are the mappings:
  • red words are definitions
  • green words and numbers are normal
  • white words are (commentary)
  • yellow words are executed [immediately]
  • purple words are in-source variables (var x). The numbers shown after them are their actual contents.
  • dark green hex constants are normal here
  • color implementation and semantics are described on http://www.colorforth.com/parsed.html
    • I see your new links on ColorForth, but I still don't see a pointer to a list of ColorForth words. Previously I would have assumed that I could have guessed some of them, like "dup", since it seems vaguely similar to constructs in other versions of Forth. :-) But since you assure me ColorForth is so very different (and since Moore has such disdain for ANS Forth), I'd better not take that on faith. Maybe "dup" stands for "DuPont?".
      • Many are the same (dup, drop, swap, @, !, +, etc.), but many others are different from their Forth counterparts (or, if, -, ;), some words are renamed (push, pop, minus), and many more are unique to the system (show, screen, keyboard). It is dangerous to assume too much when reading ColorForth source.

         

        • Huh? I don't want to assume anything; I keep asking if you know where there is a list of ColorForth word definitions. If the answer is "no, you have to infer it from reading their implementation in x86 assembly or in terms of the 27 forth chip primitives", ok, then that's the way it is, and if I ever truly need to figure it out, I'll do that. But it seems more likely to me that there is in fact a word list in English somewhere, and it's merely in an obscure location. Also, if the answer is "I don't know offhand, and I don't have time to search it out for you", of course that's fine too. :-) I just thought it was potentially interesting, and oddly hard to find. Then again, the Forth world is strangely allergic to anything resembling standards, it seems, ANS Forth notwithstanding, since so many seem to despise it, not just Moore.

           

Other Strange Things:
  • (I didn't find the definition for h., so it's probably in the assembly kernel.)
    • At first I thought perhaps h. was doing a 4 bit to hex conversion per call, but every two calls are followed by cr, so I must be misunderstanding the context.

       

  • or drop if: unlike Forth, if uses the processor zero flag, not the top stack item. or sets the flag for if, drop clears the unneeded numeric result. Since or is an exclusive-or (don't ask), the clause is equivalent to <> if in standard Forth.
  • dump falls through into r and u falls through into +xy to avoid a function call (a common AssemblyLanguage idiom)
  • dump, r, and cmp set the background task (straight dump or diff of two regions). The displayed dump is active and displays changes to memory in real time.
  • dump, byte, u, d, and +xy update the address to show.
  • The dumps don't show ASCII because ColorForth uses a Huffman encoding for text. The format per line for dump is "<hex word> <hex address>" and for cmp it is "<hex x> <hex y> <[red] offset>" with the x and y base addresses listed at the top. The offset is printed red if the contents differ.

     

{block 32}

 

 (dump) var x (200000) var y (201200)
 one dup @ h. space dup h. cr ;
 lines for one -1 + next drop ;
 dump x !
 r show black screen x @ 15 + 16 text lines keyboard ;
 it @ + @ dup h. space ;
 lines for white i [x] it i [y] it or drop if red then i . cr -next ;
 cmp show blue screen text 19 lines red [x] @ h. space [y] @ h. keyboard ;
 u 16
 +xy dup [x] +! [y] +! ;
 d -16 +xy ;
 ati F4100000 (ff7fc000) or
 byte 4 / dump ;
 fix for 0 over ! 1 + next ; [dump]

 

{block 33} this is the shadow block, commentary on the previous block. An editor command flips between a block and its shadow.

 

 (does not say empty, compiles on top of application)
 x -a (current address)
 one a-a (line of display)
 lines an
 dump a (background task continually displays memory)
 u (increment address)
 d (decrement)
 ati (address of AGP graphic registers)
 byte (a byte address dump)
 fix an-a (test word)

 

I just looked (for way too long) and couldn't find a list of the standard (well - common) colorforth words anywhere at all (Moore has an annoying page purporting to help programmers that basically says "try it! I'll add documentation some year. Maybe"). Where can I find one?

 

ColorForth is primarily ChuckMoore's personal programming system. As a working engineer, he hasn't had much time or inclination to document his evolving experiment. He admits communication is one of his weaknesses. Others have written better documented variants that keep the flavor of ColorForth; I'll try to add more references to the ColorForth page. This particular snippet came from some webified source on http://www.merlintec.com/download/color.html

 

You've already largely translated it out of colorforth; may as well continue until it's 100% one of the regular forths. After all, there seem to be, what, 20 regular colorforth users, and 1 colorforth expert, in the world? :-)

 

Actually, I haven't changed it a whit; it's a direct mapping from color to markup. ColorForth has very different semantics from standard Forth. They're even further apart than SchemeLanguage is from CommonLisp.

 

Magenta variables are interesting, since they're basically a bizarre way of doing persistence.

 


 

JavaLanguage:

 

 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;

 

public class HexDump {

 

public static void main(String[] args) throws IOException { String inputFileName = args[0]; int start = Integer.parseInt(args[1]); int end = Integer.parseInt(args[2]); int width = Integer.parseInt(args[3]); byte[] bytes = read(inputFileName, start, end); for (int index = 0; index < bytes.length; index += width) { printHex(bytes, index, width); printAscii(bytes, index, width); } }

 

private static byte[] read(String inputFileName, int start, int end) throws FileNotFoundException, IOException { File theFile = new File(inputFileName); FileInputStream input = new FileInputStream(theFile); int skipped = 0; while (skipped < start) { skipped += input.skip(start - skipped); } int length = (int) (Math.min(end, theFile.length()) - start); byte[] bytes = new byte[length]; int bytesRead = 0; while (bytesRead < bytes.length) { bytesRead = input.read(bytes, bytesRead, bytes.length - bytesRead); if (bytesRead == -1) { break; } } return bytes; }

 

private static void printHex(byte[] bytes, int offset, int width) { for (int index = 0; index < width; index++) { if (index + offset < bytes.length) { System.out.printf("%02x ", bytes[index + offset]); } else { System.out.print(" "); } } }

 

private static void printAscii(byte[] bytes, int index, int width) throws UnsupportedEncodingException? { if (index < bytes.length) { width = Math.min(width, bytes.length - index); System.out.println( ":" + new String(bytes, index, width, "UTF-8").replaceAll("\r\n", " ").replaceAll( "\n", " ")); } else { System.out.println(); } } }

 

 

 Arguments: HexDump.java 0 100 16

 

69 6d 70 6f 72 74 20 6a 61 76 61 2e 69 6f 2e 46 :import java.io.F 69 6c 65 3b 0d 0a 69 6d 70 6f 72 74 20 6a 61 76 :ile; import jav 61 2e 69 6f 2e 46 69 6c 65 49 6e 70 75 74 53 74 :a.io.FileInputSt 72 65 61 6d 3b 0d 0a 69 6d 70 6f 72 74 20 6a 61 :ream; import ja 76 61 2e 69 6f 2e 46 69 6c 65 4e 6f 74 46 6f 75 :va.io.FileNotFou 6e 64 45 78 63 65 70 74 69 6f 6e 3b 0d 0a 69 6d :ndException; im 70 6f 72 74 :port
And another JavaLanguage example factored differently:

 

 package main;
 import java.io.*;
 import java.util.*;

 

public class Dump {

 

public static void main(String[] args) { byte[] bytes = new ContentFile?(args[0]).content().getBytes(); int start = integer(args[1]); bytes = slice(bytes, start, integer(args[2])); int perLine = integer(args[3]); List<Line> chunks = chunks(start, bytes ,perLine); for(Line x : chunks) x.writeLn(perLine); }

 

private static String leftPadded(String s, int num) {while (s.length() < num) s = " " + s; return s; } private static int integer(String s) { return Integer.parseInt(s); }

 

private static List<Line> chunks(int start, byte[] bytes, int perLine) { List<Line> result = new ArrayList<Line>(); while (bytes.length > 0) { result.add(new Line(start, slice(bytes, 0, perLine))); bytes = slice(bytes, perLine, bytes.length); start += perLine; } return result; }

 

private static byte[] slice(byte[] bytes, int start, int end) { if (end > bytes.length) end = bytes.length; if (end - start <= 0) return new byte[0]; byte[] result = new byte[end - start]; System.arraycopy(bytes, start, result, 0, end - start); return result; }

 

static class ContentFile? { File file_; public ContentFile?(String name) { file_ = new File(name); } public String content() { try { StringBuffer buf = new StringBuffer(); BufferedReader? in = new BufferedReader?(new FileReader?(file_)); for(String s = in.readLine(); s != null; s = in.readLine()) buf.append(s); return buf.toString(); } catch (Exception e) { throw new RuntimeException("couldn't get content", e); } } }

 

static class Line { final int start; final byte[] bytes; public Line(int start, byte[] bytes) { this.bytes = bytes; this.start = start; } public void writeLn(int perLine) { System.out.println("O: " + leftPadded("" + start, 4) + " " + bytesText(perLine) + ":" + text()); } private String text() { String result = ""; for(int i = 0; i < bytes.length; i++) { char c = (char) bytes[i]; result += Character.isLetterOrDigit(c) || Character.isSpace(c) ? c : '.'; } return result; } private String bytesText(int perLine) { String result = ""; for(int i = 0; i < bytes.length; i++) { result += String.format("%02x ", bytes[i]); } while (perLine-- > bytes.length) result += " "; return result; } } }

 

C:\eclipserc3\eclipse\workspace\hex\bin>java -cp . main.Dump ..\src\main\Dump.java 0 100 16 O: 00 70 61 63 6b 61 67 65 20 6d 61 69 6e 3b 69 6d 70 :package main.imp O: 16 6f 72 74 20 6a 61 76 61 2e 69 6f 2e 2a 3b 69 6d :ort java.io...im O: 32 70 6f 72 74 20 6a 61 76 61 2e 75 74 69 6c 2e 2a :port java.util.. O: 48 3b 70 75 62 6c 69 63 20 63 6c 61 73 73 20 44 75 :.public class Du O: 64 6d 70 20 7b 20 20 20 20 70 75 62 6c 69 63 20 73 :mp . public s O: 80 74 61 74 69 63 20 76 6f 69 64 20 6d 61 69 6e 28 :tatic void main. O: 96 53 74 72 69 :Stri

 

C:\eclipserc3\eclipse\workspace\hex\bin>

 

I wanted to factor the chunks method into the Line class by creating a line with the entire source byte array, and then calling a method subdivide, which would call a method line() and a method rest(). Think that would pretty up a bit more, but hey - real work calls.

 

-- JeffBay

 


 

BourneShell:
 #!/bin/sh

 

if [ $# -ne 4 ] then echo "Usage: $0 <source> <start> <end> <width>" 1>&2 exit 2 fi

 

SOURCE="$1" START="$2" END="$3" WIDTH="$4" LENGTH=`expr $END - $START`

 

hexdump -s $START -n $LENGTH -e $WIDTH'/1 "%02X "' -e '" : "' -e '"%_p"' -e '"\n"' $SOURCE ...HaHaOnlySerious!

 


 

PerlLanguage:
 #!/usr/bin/perl -w
 use Fcntl qw(:seek);
 use List::Util qw(min);
 use strict;
 if(@ARGV != 4) {
	print STDERR "Usage: $0 <source> <start> <end> <width>\n";
	exit(2);
 }
 my ($source, $start, $end, $width) = @ARGV;
 open(SOURCE, "<", $source) or die("Can't open $source");
 seek(SOURCE, $start, SEEK_SET) or die("Can't seek to $start");
 my $block;
 for(my $left = $end - $start; $left > 0 && read(SOURCE, $block, min($width, $left)); $left -= length($block)) {
	my $hex = join(" ", (map { sprintf("%02X", ord($_)) } split(//, $block)));
	$hex .= '	' x ($width - length($block));
	my $plain = join("", (map { printable($_) ? $_ : "." } split(//, $block)));
	print "$hex: $plain\n";
 }
 sub printable { my $o = ord($_[0]); return $o >= 33 && $o <= 126; }

 


 

CeeLanguage:
 #include <stdio.h>
 #include <stdlib.h>
 #include <ctype.h>
 int min(int a, int b) { return a < b ? a : b; }
 int main(int argc, char *argv[]) {
	FILE *fp;
	int i, start, end, width, left, count;
	unsigned char *buf;
	if(argc != 5) {
	fprintf(stderr, "Usage: %s <source> <start> <end> <width>\n", argv[0]);
	exit(2);
	}
	if((fp = fopen(argv[1], "r")) == NULL) { perror("fopen"); exit(2); }
	start = atoi(argv[2]);
	end	= atoi(argv[3]);
	width = atoi(argv[4]);
	left = end - start;
	if((buf = malloc(width)) == NULL) {
	fprintf(stderr, "No memory?!?\n");
	exit(2);
	}
	if(fseek(fp, start, SEEK_SET)) { perror("fseek"); exit(2); }
	while(left > 0 && (count = fread(buf, 1, min(left, width), fp))) {
	for(i = 0; i < count; i++)	 printf("%02X ", buf[i]);
	for(i = 0; i < width - count; i++) printf("	");
	printf(": ");
	for(i = 0; i < count; i++) printf("%c", isgraph(buf[i]) ? buf[i] : '.');
	printf("\n");
	left -= count;
	}
      free(buf);
	return 0;
 }

 

Hmm, this doesn't appear to be totally debugged:

 

 ./a.out a.out 0 50 16

 

7F 45 4C 46 01 01 01 00 00 00 00 00 00 00 00 00 : .ELF............ 02 00 03 00 01 00 00 00 FFFFFFB0 FFFFFF84 04 08 34 00 00 00 : ............4... FFFFFFC0 23 00 00 00 00 00 00 34 00 20 00 06 00 28 00 : .#......4.....(. 23 00 : #.

 

You need to add a mask: printf("%02X ", buf[i] & 0xff);

 

Minor detail.

 

Ah. Signed chars are sign extended into signed ints... Fixed by making the chars unsigned.

 

You beat me to the edit, and with a shorter solution, it looks like.

 


 

CeeLanguage:

 

After great struggle, I transformed the natural but verbose approach of C closures, reflection, and metaclasses into something a little more terse:

 

 #include	<assert.h>
 #include	<stdio.h>
 #include	<ctype.h>
 void displayHexRecord(char *data, int count, int record_length) {
	int	i;
	for (i=0; i < count; i++) printf("%02x ", data[i] & 0xff);
	for (; i < record_length; i++) printf("	");
	printf(": ");
	for (i=0; i < count; i++) {
		if (isgraph(data[i])) putchar(data[i]);
		else		  putchar('.');
	}
	putchar('\n');
 }
 void hexDump(FILE *f, long cur_addr, long end, int bytes_per_line) {
	int	record_length, read_count;
	char	*data;
	assert(data = (char*) calloc(sizeof(data[0]), bytes_per_line));
	if (-1 == fseek(f, cur_addr, SEEK_SET)) perror("fseek");
	while (1) {
		printf("%05x  ", cur_addr);
		read_count = end - cur_addr;
		if (read_count > bytes_per_line) read_count = bytes_per_line;
		record_length = fread(data, sizeof(char), read_count, f);
		displayHexRecord(data, record_length, bytes_per_line);
		if (record_length < bytes_per_line) break;
		if (end <= (cur_addr += record_length)) break;
	}
	free(data);
 }

 

Output for hexDump(f, 0, 55, 16) of its own executable:

 

 00000  7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 : .ELF............
 00010  02 00 03 00 01 00 00 00 f0 83 04 08 34 00 00 00 : ............4...
 00020  44 35 00 00 00 00 00 00 34 00 20 00 06 00 28 00 : D5......4.....(.
 00030  23 00 20 00 06 00 00				: #......

 

There's something strange about powerful languages having longer examples than powerless languages...

 

where's the main() and why no checking of command line arguments - it wouldn't produce a full executable without some other harness, so. . . where is it?

 

It's a library function. Requiring a full program violates the spirit of these ProgrammingChrestomathy pages; someone has already commented on searching and hex editing, and why exactly are we leaving out the GUI? :-P That's all foolishness, so I created a library routine. I also left out e.g. printing the filename, because that violates principles outlined in TheArtOfUnixProgramming; that, too, is an inappropriate requirement, although obviously it only adds one line of code to a program in any language.

 

Printing the filename wasn't a requirement. The Scheme program prompts the user for the filename, etc., from the console because command-line arguments aren't part of the standard language. Apparently, the person who wrote the C++ example (which, as someone else pointed it, is actually plain C) mistook the Scheme program prompting the user for input as printing the arguments out.
  • Ah, I think I see. Incidentally, I was the one who pointed out the C++ was actually C, so it wasn't "someone else", it was just "someone". :-)

     

Anyone who fails to grok these points should immediately go download and examine the (huge) GNU hello world distribution, and ponder how it is that hello world got to be so big.

 

(It got to be that big because it's an example of how to release packages The GNU Way and they wanted to give an example of every bell, whistle, and gong you can accomplish with standard GNU packaging tools. In other words, it got that big because it's demonstrating a nontrivial solution set to a highly nontrivial problem.)

 

But if you just want to try it, a trivial main will do it:
      #include <stdio.h>

 

int main(void) { FILE *f = fopen("a.out", "r"); if (f == NULL) { perror("a.out"); exit(1); } hexDump(f, 0, 55, 16); return 0; } Parse command line args? What's that got to do with hex dump Chrestomathy? How about a test harness? I never saw a Marketing Requirements doc, either. Where's our sysadmin plan? Budget? HR-required ergonomic classes? Shall I go on? :-)

 

It's not really terse if it's missing a main, and puts printfs on the same line as the for loop. How many corners can you cut and claim "but it's terse!"? :->

 

(Bug fixed, thanks.)

 

How many corners can you cut and claim "but it's terse"? That's a good question. But you know, textbooks and magazine columns that illustrate algorithms almost always leave out error checks, because they get in the way of seeing the algorithm that is the true point.

 

The question of whether to sometimes put trivial bodies on the same line as the control construct, or whether to always put them on separate lines, is purely stylistic, and obviously HolyWars have been fought about that. Some people claim that it makes a huge difference in readability, but I personally don't think it matters too much either way in most cases.

 

The other CeeLanguage author did similar things - while finding an even more terse algorithm kernel as well.

 

Well, every programmer thinks their coding style is the "pragmatic optimum", so I won't argue that. The whole Chrestomathy idea should apply to samples within the same language too. I guess the "real-world" origins of this page have changed, but now it's kind of a loose problem. How does the file get read? What does it link with? Does it require a host environment? Unfortunately "hex dumping" isn't really an algorithmic problem. . . so . . I guess I have no point than to say I'm not sure what the criteria for a good example is anymore.

 

It's not a huge algorithmic problem; none of these chrestomathy pages are. But it's an interesting small algorithmic problem. I still need to finish examining why it is that the other author got a smaller kernel than I did, so as to keep in mind any points I observe for future use. Look for general principles whenever possible, I say.

 

P.S. I thought I already addressed the issue of the missing main(), but to be somewhat more explicit: it would be all but trivially the same for all of these chrestomathy pages. It's not interesting, it's not illustrative, it doesn't vary, but it certainly adds clutter. And also, come now, I added one after someone implied they didn't want to be bothered writing one to test my code...but I still don't see that it is part of the chrestomathy proper. A proper Hello World illustrates half of how to do it in C, and an example of any use of getopts illustrates the other half.

 


 

JavaScript: (JScript running under WSH - Windows Script Host - uses ActiveXscripting for its file API)

 

note about ActiveXscripting: one way or another you don't have direct access to file I/O (Input Output) inside an ECMAscript implementation (meaning the core language definition), the file I/O depends on the environment where the language is implemented. Here in WSH, Microsoft has provided external COM objects for file I/O access, in another environment like FESI ( http://www.lugrin.ch/fesi/fioext.html ) you could use another way to access file I/O with the File constructor, anyway the code provided can be easyly ported to different ECMAscript environment just edit the readFile method in HexDump.

 

usage: cscript hexdump.js <filename> <start> <end> <width>

 

limitation: this work only for ASCII files

 

'''if you want to edit the code edit it correctly, or make a fork with your own version, if I have formated or given a particular structure to this script there are good reasons for that:'''
  • code readability
  • code reuse
if you're not happy with it make your own entry and dont't garbage my own.

 

/* Function: trace */
 function trace( /*String*/ txt )
    {
    WScript.Echo( txt.toString() );
    }

 

/* Singleton: Application */ Application = {}; Application.arguments = [];

 

Application.getArguments = function()
    {
    var args, i;
    args = WScript.Arguments;

 

for( i=0; i<args.length; i++ ) { this.arguments.push( args(i) ); }

 

return this.arguments; }

 

/* Class: HexDump */
 HexDump = function( /*String*/ source, /*Int*/ start, /*Int*/ end, /*Int*/ width )
    {
    this.source = source;
    this.start  = start;
    this.end    = end;
    this.width  = width;

 

this.file = ""; this.hex = []; this.dump = [];

 

this.parse(); }

 

HexDump.prototype.toString = function() { var data, CRLF; data = ""; CRLF = "\r\n";

 

data += "HexDump: \""+this.source+"\" ["+this.start+"-"+this.end+"] ("+this.width+")" + CRLF; data += this.dump.join( CRLF ); return data; }

 

HexDump.prototype.readFile = function( /*String*/ filepath ) { var FSO, file, data; FSO = new ActiveXObject( "Scripting.FileSystemObject?" ); file = FSO.OpenTextFile?( filepath, /*FileIOmode.reading*/ 1, false, /*FileFormat?.ascii*/ false ); data = file.ReadAll?(); file.Close();

 

return data; }

 

HexDump.prototype.formatText = function( /*String*/ txt ) { txt = txt.split( " " ).join( "." ); txt = txt.split( "\n" ).join( " " ); txt = txt.split( "\r" ).join( " " ); return txt; }

 

HexDump.prototype.charToHex = function( /*Char*/ c ) { var hex; hex = c.charCodeAt( 0 ).toString( 16 ); return( (hex.length==2)? hex: "0"+hex ); }

 

HexDump.prototype.parse = function() { var i, j, char, hexLine, fileLine;

 

this.file = this.readFile( this.source ); this.file = this.file.substring( this.start, this.end );

 

for( i=0; i<this.file.length; i++ ) { char = this.file.charAt( i ); this.hex.push( this.charToHex( char ) ); }

 

j = 0; hexLine = ""; fileLine = ""; while( j < this.file.length ) { hexLine = this.hex.slice( j, j+this.width+1 ); while( hexLine.length <= this.width ) { hexLine.push( " " ); } hexLine = hexLine.join( " " );

 

fileLine = this.file.slice( j, j+this.width+1 ); fileLine = this.formatText( fileLine );

 

this.dump.push( hexLine + " : " + fileLine );

 

j += this.width; }

 

}

 

/* MAIN ENTRY POINT */
 Application.main = function()
   {
    var args, dump;

 

args = this.getArguments();

 

if( args[3] == null ) { args[3] = 16; //default value }

 

if( args[2] == null ) { args[2] = 100; //default value }

 

if( args[1] == null ) { args[1] = 0; //default value }

 

if( args[0] == null ) { trace( "usage:\r\ncscript hexdump.js <filename> <start> <end> <width>" ); return; }

 

dump = new HexDump( args[0], parseInt(args[1]), parseInt(args[2]), parseInt(args[3]) );

 

trace( dump ); }

 

Application.main();

 

output for cscript hexdump.js hexdump.js 0 100 16

 

 HexDump: "hexdump.js" [0-100] (16)
 2f 2a 20 46 75 6e 63 74 69 6f 6e 3a 20 74 72 61 63 : /*.Function:.trac
 63 65 0d 0a 2a 2f 0d 0a 74 72 61 63 65 20 3d 20 66 : ce  */  trace.=.f
 66 75 6e 63 74 69 6f 6e 28 20 2f 2a 53 74 72 69 6e : function(./*Strin
 6e 67 2a 2f 20 74 78 74 20 20 29 0d 0a 20 20 20 20 : ng*/.txt..)  ....
 20 7b 0d 0a 20 20 20 20 57 53 63 72 69 70 74 2e 45 : .{  ....WScript.E
 45 63 68 6f 28 20 74 78 74 2e 74 6f 53 74 72 69 6e : Echo(.txt.toStrin
 6e 67 28 29					: ng()

 

(My 1st participation in this great wiki ;) -- ZwetanKjukov [sideNote: I can provide also a oneliner totally obscured and unreadable but I don't really see any usefulness to that.]

 

Neither does anyone else. Layne did that to be sarcastic because I wrote something like:
   if (x) y();
where he thought style required always writing
   if (x) {
      y();
   }
That's the only reason that he posted his unreadable oneliners; they were an editorial comment.

 


 

Here's a CommonLisp version without pretense of efficiency or command-line parsing:

 

 (defun dump-line (in start width count)
   (let ((bytes (loop for i below count
                      for byte = (read-byte in nil nil)
                      while byte collect byte)))
 	(format t "~&~8,'0X ~{~2,'0X ~}~A | ~{~C~}"
             start
             bytes
             (make-string (* 3 (- width (length bytes))) 
                          :initial-element #\Space)
             (loop for byte in bytes
                   for char = (code-char byte)
                   when (graphic-char-p char) collect char
                   else collect #\.))))

 

(defun dump (file start end width) (with-open-file (in file :direction :input :element-type '(unsigned-byte 8)) (when (file-position in start) (loop with true-end = (min end (file-length in)) for i from start below true-end by width do (dump-line in i width (min width (- true-end i)))))) (format t "~%"))

 

 

Sample output, generated by evaluating (dump "dump.lisp" 3 37 16):

 

 00000003 66 75 6E 20 64 75 6D 70 2D 6C 69 6E 65 20 28 69  | fun dump-line (i
 00000013 6E 20 73 74 61 72 74 20 63 6F 75 6E 74 20 77 69  | n start count wi
 00000023 64 74                                            | dt

 

-- DanMuller

 

Standard CommonLisp doesn't provide a way to get command-line arguments. Guess I'll have to add interactive argument acquisition.

 


 

[Historical note: The original version of this page stated that other chrestomathy pages "do not demonstrate the use of the languages in a way that is applicable to real-world programming", with implication that this one would. It also placed several requirements on the program which were out of place in this sort of demonstration (that it had to be a stand-alone program, that read user input from the either the command line or the console, etc.). This naturally enough garnered several scornful remarks. When the problem definition was refactored, the origianl version and the subsequent discussion were moved here for reference sake.]

 

While there are several ProgrammingChrestomathy pages already, most of them deal with a task that is either trivial (printimg "Hello, World!" or the lyrics to "99 Bottles of Beer", encrypting a cleartext in RotThirteen), or of primarily theoretical or abstract importance (the 8-Queens Problem, calculating dot-products). While these are all valuable in their own way, one could argue that they do not demonstrate the use of the languages in a way that is applicable to real-world programming.

 

To this end, I am hereby proposing a new chrestomathy challenge: a program in the format

 

hexdump source start end width

 

which reads the file source and prints a HexDump of the bytes in the file from start to end, formatted to show width bytes separated by space, followed by a separator, then followed by the ASCII values of the displayed bytes, in the order they exist in the file. The program include the code to read these arguments, either from the command line, or from user input. Either console or GUI programs are acceptable, though portable code is preferred where possible. The code should be testable by independent third parties. Any instructions needed for testing the code (target environment, special requirements) should be listed in the program comments.

 

I wouldn't say that this problem is any more or less trivial than the other chrestomathy (great word!) pages. Depends what your real world is, I guess.

 

There are a host of "real world" factors ignored here too:
  • Reusability - There is no incentive here to make the code reus

继续阅读

Effective STL 学习笔记 39 ~ 41

2014年9月03日 00:14

Effective STL 学习笔记 39 ~ 41

继续阅读

Http PipeLining

2014年9月03日 00:14

Http PipeLining

继续阅读

Effective STL 学习笔记 Item 38 : Design functor classes for pass-by-value

严格来讲, C 和 C++ 都不支持将函数作为参数,真正作为参数的,实际上是 pass-by-value 的函数指针, 作为函数指针的模拟, Functor 作为参数时候也是按值传递的,有些 STL 的实现中,将 Functor 作为引用传递甚至不能通过编译。这也就要求我们将 Functor 设计得当,以满足传值:

  1. Functor 应该尽量小:
    否则的话, Functor 的拷贝将会变得昂贵。
  2. Functor 应该为单态 (monomorphic): —— 不用使用虚函数
    如果算法需要的是基类 Functor,但传入的是子类的话,拷贝过程中可能会引起 切片问题

但,离开了多态与继承的 C++ ,也就失去了后面的 ++, 变成了 C 。这里有一个办法可以将庞大的多态 Functor 分解成符合需求的 Functor,即将数据以及多态的部分封装成单独的类,然后在 Functor 中存储该类的指针,换句话说: Bridge Mode。

例如下面的这个 BPFC (Big Polymoriphic Functor Class):

template <typename T>
class BPFC :  public  unary_functor<T, void>()
{
public:
    // XXX: This is virtual function, may cause slicing issue.
    virtual void operator()(const T& val) const;

private:
    Widget w;
    int    x;
};

我们可以将其中的数据和多态部分拆分,形成下面的 Functor:

template <typename T>
class BPFCImpl
{
public:
    virtual void operator(const T& val) const;
    virtual ~BPFCImpl();
private:
    Widget w;
    int x;
};

template <typename T>
class BPFC : public unary_functor<T,void>
{
public:
    void operator()(const T& val) const
    {
        pImpl->operator(val); // forward it to BPFCImpl
    }
private:
    BPFCImpl<T>* pImpl;
};

切记: Make functors small and monomorphic!

Effective STL 学习笔记 Item 34: 了解哪些算法希望输入有序数据

有些个算法对有序的和无序的数据都能应用,但多数情况下,他们在输入数据有序时才最有用。

下列算法要求输入数据必须有序:

  • binary_search, upper_bound, lower_bound, equal_range

    这些算法均使用了二分查找 (binary_search) 以期达到 logarithmic-time lookups,要求数据必须有序。

  • set_union, set_intersection, set_difference, set_symmeteric_difference

    这些算法要求保证时间复杂度为线性,所以输入数据必须有序。

  • merge, inplace_merge

    这两个算法内部使用 merge sort 来完成运算且要求线性时间,也要求输入必须有序。

  • includes

    也要求线性时间,输入须有序。

下列算法要求对数据顺序无强制要求,但最好有序:

  • unique
  • unique_copy

STL 允许我们自己定义排序算法,为了让程序正确的运行,我们必须保证排序时候所用的比较算法和上述的算法中使用的比较算法相同,例如下面的例子中:

vector<int> v;
//...  putting values to this vector.

sort(v.begin(), v.end(), greater<int>); // Sorted in descending order.

bool a4Exists =
        binary_search(v.begin(), v.end(), 5); // Assumes vector sorted in ascending range

试图从降序排列的数据中按照升序算法去找一个数据,很有可能会出问题,而下面的表达式中,在 binary_search 中指定比较算法为排序算法中所使用的比较算法,则没有问题:

bool ret = binary_search(v.begin(), v.end(), 5, greater<int>());

下面是完成的测试代码:

#include <vector>
#include <algorithm>
#include <iostream>

using namespace std;

#define N       100

#define show(s,m) cout<< m ;if (s) { cout << " 5 exists!" << endl; } else { cout << " 5  not existed!" << endl; }


int main(int argc, char *argv[])
{
    srand(time(NULL));
    vector<int> v(N);
    for (int i = 0; i < N; ++i)
    {
        v[i] = i;
    }

    random_shuffle(v.begin(), v.end());

    sort(v.begin(), v.end(), greater<int>());

    bool ret = binary_search(v.begin(), v.end(), 5);
    show(ret, "Searching in different compare function:");

    ret=binary_search(v.begin(), v.end(), 5, greater<int>());
    show(ret, "Searching in same compare function:");
    return 0;
}

下面是输出:

Welcome to the Emacs shell

~/Documents/MetaWebBlog/org $ ~/tmp $ ./test
Searching in different compare function:5  not existed!
Searching in same compare function:5 exists!

Effective STL 学习笔记 32 ~ 33

2014年9月03日 00:13

Effective STL 学习笔记 32 ~ 33

继续阅读