-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathxml.bat
executable file
·59 lines (50 loc) · 2.27 KB
/
xml.bat
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
::#! 2>/dev/null || echo "
@echo off
call scala -savecompiled %~f0 %*
goto :eof
" >//null
#!/bin/sh
exec scala -savecompiled "$0" "$@"
::!#
val rootElemName = args match {
case Array() => None
case Array(e) => Some(e)
case _ => {
println("""
| Keep only well-formed XML
| Usage:
| xml
| Examples:
| echo "a<b>c</b>d" | ./xml.bat
| <b>c</b>
""".stripMargin)
exit
}
}
import scala.util.parsing.combinator._
import scala.util.parsing.input._
import scala.io.Source
import scala.collection.immutable.PagedSeq
object P extends RegexParsers {
override val skipWhitespace = false
def nameStartChar = "[A-Z_a-z:]".r
def nameChar = "[A-Z_a-z:.0-9-]".r
def attributeName = nameStartChar ~ rep(nameChar)
def attributeValue = "[\"']".r >> { quote => ("[^" + quote + "]*").r ~ quote ^^ { case content ~ quote => quote + content + quote }}
def attribute = attributeName ~ "=" ~ attributeValue ^^ { case n1 ~ n ~ eq ~ value => n1 + n.mkString + eq + value }
def elementName(restrict: Option[String]): Parser[String] = restrict match {
case Some(n) => literal(n)
case None => "[^?>\\s]+".r
}
def elemShort(restrict: Option[String]) = "<" ~> elementName(restrict) ~ rep("\\s+".r | attribute) ~ "/" <~ ">" ^^ { case name ~ attributes => (name, attributes.mkString) }
def elemOpen(restrict: Option[String]) = "<" ~> elementName(restrict) ~ rep("\\s+".r | attribute) <~ ">" ^^ { case name ~ attributes => (name, attributes.mkString) }
def elemClose(elemName: String) = "<" ~ "/" ~ elemName ~ ">"
def elemContent: Parser[String] = rep("[^<]+".r | elem()) ^^ { _.mkString }
def elem(restrict: Option[String] = None) = elemShort(restrict) | elemOpen(restrict) >> { case (name, attrs) => elemContent <~ elemClose(name) ^^ { case content => "<" + name + attrs + ">" + content + "</" + name + ">" } }
def apply(rootElement: Option[String], input: Reader[Char]): Unit = parse(elem(rootElement), input) match {
case Success(result, next) => println(result); if (!next.atEnd) apply(rootElement, next)
case NoSuccess(_, next) if !next.atEnd => apply(rootElement, next.rest)
case _ =>
}
}
P(rootElemName, new PagedSeqReader(PagedSeq.fromSource(Source.stdin)))