Day 6: the getline function and misc. AWK code snippets ... the getline function ... worth reading: http://awk.freeshell.org/AllAboutGetline (comp.lang.awk FAQ) geline provides an alternative to the implicit pattern-action input loop => getline syntax doesn't match the other AWK functions / statements in BEGIN / END getline allows reading sources not tied to FILENAME in main body getline can read from FILENAME, and/or other sources getline returns 1 if record is present, 0 if EOF, -1 on read error note: if used as part of an expression, getline is evaluated first i.e. while (getline < "foo" > 0) == while((getline < "foo") > 0) how getline is used decides what variable(s) are set: # Table A-10 Getline Function # ======================================= # expression variables set # --------------------------------------- # getline $0, NF, NR, FNR (default call) # getline var var, NR, FNR # getline best to use 'getline var' ; changing FS &/or using split() can help Some examples: # ex. collect workshop participants from pcom log before processing # (assumes each participant posted at least one comment; # since this is only for obfuscation it works fine) # # BEGIN { # ... # RawLog = "pcom-raw.log" # FS = "[[\\]]" # while (getline < RawLog > 0) # if ($2 !~ /[[:space:]]/ && $0 ~ /^[[][[:alpha:]]/ && !($2 in Usrs)) # Usrs[$2] # <= array containing unique UIDs # close (RawLog) # FS = " " # } # # where # - calling getline in while() loop reads RawLog to EOF => 0 returned # - BEGIN{} + close(EawLog) => state of FILENAME, $0, etc. no problem # - if we did NOT close(RawLog) it would still be at EOF in main body # - need to reset FS back to default (" ") to avoid later weirdness.. # # ex. using getline to test file readability prior to actual reading: # # # fstate.awk - attempt to discern readability of file(s) # BEGIN { # Fmt = "%20s => %s\n" # for (i=1 ; i < ARGC ; i++) { # # note: the test order matters! # if (getline < ARGV[i] > 0) { # close (ARGV[i]) # printf Fmt, ARGV[i], "readable w/ content" # } else if (getline < ARGV[i] < 0) { # printf Fmt, ARGV[i], "unreadable" # } else if (getline < ARGV[i] == 0) { # close (ARGV[i]) # printf Fmt, ARGV[i], "readable & empty" # } # } # # $ awk -f fstate.awk test.regular test.empty test.unreadable # test.regular => readable w/ content # test.empty => readable & empty # test.unreadable => unreadable # # where # - above should run without producing any errors # - limitation: can't test writability => use test(1) # - no close() after 2nd test => file never opened # although AWk lacks an eval() function one can be constructed: # ex. creating a quasi-eval() function: # # $ cat eval.awk # function eval(code, arr, _cmd, _i) { # _cmd = "awk 'BEGIN{" code "}'" # while (_cmd | getline arr[++_i] > 0) ; # close (_cmd) # return _i # } # BEGIN { # printf "enter awk oneliner: " # getline Str < "/dev/tty" # N = eval(Str, Results) # for(i=1 ; i getline within BEGIN block allows conventional procedural style: # ex. menu-based command selector: # # #! /usr/bin/mawk -Wp,i,e # # run_cmds.awk # BEGIN { # # ANSI escapes: # Clr = "\033[H\033[2J" ; Grn = "\033[1;32m" ; Nrm = "\033[0m" # N = split ("date time uptime", Tags) # M = split ("date '+%x':date '+%X':uptime", Cmds,":") # while (1) { # print Clr, "\n Run commands:\n" # for(i=1; i<=N; i++) # printf "%4d)%4s%s\n", i, "", Grn Tags[i] Nrm # printf "\n enter # ; 'q' quits: " # getline Aws < "/dev/tty" # if (Aws ~ /^[qQ]$/) break # if (Aws in Cmds) { # Cmds[Aws] |getline Str # close (Cmds[Aws]) # print "\n =>", Grn Str Nrm # } else # print "\n =>", Grn "invalid choice.." Nrm # system ("sleep 2") # } print "" # } # # note: test terminal capabilities before using ANSI escapes # ... misc. AWK code snippets ... insertion sort (The AWK Programming Language, 1st ed., p154): # # A = called array[keys] (unsorted) ; n = size of A # function isort(A, n, i, j, tmp) { # for (i = 2; i <= n; i++) # for (j = i; j > 1 && A[j-1] > A[j]; j--) { # tmp = A[j-1] ; A[j-1] = A[j] ; A[j] = tmp # } # } # # where # - above sorts A assending and ignores duplicate lines # - for descending simply print sorted array in reverse # - to skip duplicates add test, ie. 'if (A[i] != A[i-1]) ...' # a POSIX getchar for AWK - similar to bash 'read -n1': # # returns _char up to length 2: # function getchar( _cmd, _char) { # system ("stty -icanon") # put TTY in "raw" mode # _cmd = "dd bs=6 count=1 2>/dev/null" # _cmd | getline _char # close (_cmd) # system ("stty icanon") # put TTY in "normal" mode # return _char # } # # where # - "stty -icanon" => TTY in "raw" mode # - "dd bs=6 count=1" => reads 6 bytes from stdin ; # using bs=6 seems to work well for most unicode # - "stty icanon" => TTY in "normal" mode # combine above w/ ANSI escapes for nicer interactive apps POSIXly obtaining time in secs since Unix epoch: # # returns seconds since epoch: # function t_epoch() { # srand() ; return srand() # } # # where # - srand() obtains seed from system time by default # - 2nd call to srand() returns seed # End of Workshop => you are all AWK experts!