The grammar for a subset of XML. Used https://cs.lmu.edu/~ray/notes/xmlgrammar/ as a reference.
class markup grammar
implicit import ideallibrarypatterns
implicit import idealruntimepatterns
character handler the character handler
dictionary[string, element id] element ids
dictionary[string, attribute id] attribute ids
dictionary[string, special text] entities
var matcher[character, text element] document matcher
var matcher[character, special text] entity ref
var matcher[character, string] quot attr value
var matcher[character, string] apos attr value
var matcher[character, attribute fragment] attribute value in quot
var matcher[character, attribute fragment] attribute value in apos
var matcher[character, attribute state] attribute
var option matcher[character, text element] element
var matcher[character, text element] empty element
var matcher[character, text fragment] content
var markup parser parser
public void add elements(readonly collection[element id] new element ids)
assert !is completed()
for (the element id : new element idselements)
assert !element idscontains key(the element idshort name)
element idsput(the element idshort name, the element id)
public void add attributes(readonly collection[attribute id] new attribute ids)
assert !is completed()
for (the attribute id : new attribute idselements)
assert !attribute idscontains key(the attribute idshort name)
attribute idsput(the attribute idshort name, the attribute id)
public void add entities(readonly collection[special text] new entities)
assert !is completed()
for (the entity : new entitieselements)
assert !entitiescontains key(the entityname)
entitiesput(the entityname, the entity)
protected boolean name start(character c) pure
return the character handleris letter(c) || c == '_' || c == ':'
protected boolean name char(character c) pure
return the character handleris letter(c) || c == '.' || c == '-' || c == '_' || c == ':'
protected boolean content char(character c) pure
return c != '<' && c != '&'
protected boolean content not apos(character c) pure
return c != '<' && c != '&' && c != '\''
protected boolean content not quot(character c) pure
return c != '<' && c != '&' && c != '"'
pattern[character] sequence(readonly list[pattern[character]] patterns list) pure
return sequence pattern[character] • new(patterns list)
option pattern[character] option(readonly list[pattern[character]] patterns list) pure
return option pattern[character] • new(patterns list)
immutable list[attribute state] cast attributes(readonly list[attribute state] attributes) pure
return attributeselements !> immutable list[attribute state]
string as string procedure(readonly list[character] the character list) pure
return the character listfrozen copy() !> string
text element match start element(readonly list[any value] the list) pure
element name : the list[1] !> string
attributes : the list[2] !> immutable list[attribute state]
element id : element idsget(element name)
if element id is null
parserreport error("Unrecognized element name: " ++ element name)
for (attribute : attributes)
attributes dictionaryput(attributeid, attributevalue)
return base element • new(element id, attributes dictionary, missinginstance)
text element match text element(readonly list[any value] the list) pure
start tag : the list[0] !> text element
text content : the list[1] !> text fragment
end tag name : the list[2] !> string
start tag name : start tagget idshort name
if start tagget id != text libraryERROR ELEMENT && start tag name != end tag name
parserreport error("Mismatched element name: start " ++ start tag name ++ ", end " ++ end tag name)
return base element • new(start tagget id, start tagattributes, text content)
special text make entity 2nd(readonly list[any value] the list) pure
string entity name : the list[1] !> string
entity : entitiesget(entity name)
if entity is null
parserreport error("Unrecognized entity: " ++ entity name)
return entity
attribute state make attribute(readonly list[any value] the list) pure
string attribute name : the list[0] !> string
id : attribute idsget(attribute name)
if id is null
parserreport error("Unrecognized attribute name: " ++ attribute name)
return attribute state • new(text libraryERROR ATTRIBUTE, value)
return attribute state • new(id, value)
string select end tag(readonly list[any value] the list) pure => the list[2] !> string
text element select 2nd text element(readonly list[any value] the list) pure => the list[1] !> text element
text fragment join2(readonly list[any value] the list) pure
assert the listsize == 2
return text utilitiesjoin(the list[0] !> text fragment, the list[1] !> text fragment)
protected matcher[character, text element] document()
lt : one character('<')
gt : one character('>')
eq : one character('=')
entity ref = sequence matcher[character, special text] • new([amp, name, semicolon], make entity 2nd)
equals : sequence([space opt, eq, space opt])
attribute = sequence matcher[character, attribute state] • new([name, equals, attribute value], make attribute)
empty element = sequence matcher[character, text element] • new([lt, name, attributes, space opt, slash, gt], match start element)
content tail : repeat or none text(sequence matcher[character, text fragment] • new([content element .> pattern[character], char data opt], join2))
content = sequence matcher[character, text fragment] • new([char data opt, content tail .> pattern[character]], join2)
pattern[character] start tag : sequence matcher[character, text element] • new([lt, name, attributes, space opt, gt], match start element)
end tag : sequence matcher[character, string] • new([lt, slash, name, space opt, gt], select end tag)
resultvalidate()
return result