#!/bin/sh # Check if paragraph IDs are unique, print next available ID fn=$1 if test ! -f "$fn" then echo file not found exit 1 fi awk ' { rest = $0 out = "" while(match(rest, "[{][a-z]*[0-9]*:[^}]*[}]")) { id = substr(rest, RSTART+1, RLENGTH-2) split(id, A, ":") curr = int(A[2]) if (curr > last) last = curr if (doc == "") doc = A[1] else if (doc != A[1]) print "Error: invalid doc id:", id, "in line", NR, "(should be", doc, ")" if (id in SEEN) print "Error: invalid doc id:", id, "in line", NR, "reused (first used in line", SEEN[id], ")" else SEEN[id] = NR rest = substr(rest, RSTART+RLENGTH, length(rest)) } } END { print "Info: next available ID is: {" doc ":" last+1 "}" } ' < $fn