# split.awk --- do split in awk # # Requires getopt() library function. # # Arnold Robbins, arnold@skeeve.com, Public Domain # May 1993 # Revised slightly, May 2014 # Rewritten September 2020 function usage( common) { common = "[-a suffix-len] [file [outname]]" printf("usage: split [-l count] %s\n", common) > "/dev/stderr" printf(" split [-b N[k|m]] %s\n", common) > "/dev/stderr" exit 1 } BEGIN { # Set defaults: Suffix_length = 2 Line_count = 1000 Byte_count = 0 Outfile = "x" parse_arguments() init_suffix_data() Output = (Outfile compute_suffix()) } function parse_arguments( i, c, l, modifier) { while ((c = getopt(ARGC, ARGV, "a:b:l:")) != -1) { if (c == "a") Suffix_length = Optarg + 0 else if (c == "b") { Byte_count = Optarg + 0 Line_count = 0 l = length(Optarg) modifier = substr(Optarg, l, 1) if (modifier == "k") Byte_count *= 1024 else if (modifier == "m") Byte_count *= 1024 * 1024 } else if (c == "l") { Line_count = Optarg + 0 Byte_count = 0 } else usage() } # Clear out options for (i = 1; i < Optind; i++) ARGV[i] = "" # Check for filename if (ARGV[Optind]) { Optind++ # Check for different prefix if (ARGV[Optind]) { Outfile = ARGV[Optind] ARGV[Optind] = "" if (++Optind < ARGC) usage() } } } function compute_suffix( i, result, letters) { # Logical step 3 if (Reached_last) { printf("split: too many files!\n") > "/dev/stderr" exit 1 } else if (on_last_file()) Reached_last = 1 # fail when wrapping after 'zzz' # Logical step 1 result = "" letters = "abcdefghijklmnopqrstuvwxyz" for (i = 1; i <= Suffix_length; i++) result = result substr(letters, Suffix_ind[i], 1) # Logical step 2 for (i = Suffix_length; i >= 1; i--) { if (++Suffix_ind[i] > 26) { Suffix_ind[i] = 1 } else break } return result } function init_suffix_data( i) { for (i = 1; i <= Suffix_length; i++) Suffix_ind[i] = 1 Reached_last = 0 } function on_last_file( i, on_last) { on_last = 1 for (i = 1; i <= Suffix_length; i++) { on_last = on_last && (Suffix_ind[i] == 26) } return on_last } Line_count > 0 { if (++tcount > Line_count) { close(Output) Output = (Outfile compute_suffix()) tcount = 1 } print > Output } Byte_count > 0 { # `+ 1' is for the final newline if (tcount + length($0) + 1 > Byte_count) { # would overflow # compute leading bytes leading_bytes = Byte_count - tcount # write leading bytes printf("%s", substr($0, 1, leading_bytes)) > Output # close old file, open new file close(Output) Output = (Outfile compute_suffix()) # set up first bytes for new file $0 = substr($0, leading_bytes + 1) # trailing bytes tcount = 0 } # write full record or trailing bytes tcount += length($0) + 1 print > Output } END { close(Output) }