Golang encoding/xml: Difference between revisions
No edit summary |
|||
(15 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
go's builtin library for parsing xml.<br> | go's builtin library for parsing xml.<br> | ||
See also: [[golang encoding]], [[xml]] | See also: [[golang encoding]], [[xml]], [[golang x/net]] | ||
A more detailed introduction to go's encoding interface can be seen in [[golang encoding/json]]. | A more detailed introduction to go's encoding interface can be seen in [[golang encoding/json]]. | ||
{{ NOTE | | {{ NOTE | | ||
Golang's builtin xml library does not support | Golang's builtin xml library does not support schema validation. }} | ||
= Documentation = | = Documentation = | ||
Line 53: | Line 52: | ||
</blockquote><!-- Struct Tags --> | </blockquote><!-- Struct Tags --> | ||
= | = Serialization = | ||
<blockquote> | <blockquote> | ||
== Basics == | == Basics == | ||
Line 68: | Line 67: | ||
fmt.Println(string(bytes)) // <User name="will"><id>123</id></User> | fmt.Println(string(bytes)) // <User name="will"><id>123</id></User> | ||
} | } | ||
</syntaxhighlight> | |||
You can also marshall with an indent | |||
<syntaxhighlight lang="go"> | |||
xml.MarshallIndent( | |||
&user, | |||
" ", // (2) indent entire object this many spaces | |||
" ", // (4) indent-width | |||
) | |||
//| | <-- 2x spaces | |||
// <User name="will"> | |||
// <id>123</id> // <-- indented 4x spaces | |||
// </User> | |||
</syntaxhighlight> | </syntaxhighlight> | ||
</blockquote><!-- Basics --> | </blockquote><!-- Basics --> | ||
Line 83: | Line 96: | ||
func main() { | func main() { | ||
raw := `<User name="will"><id>123</id></User>` | |||
var user User | |||
xml.Unmarshall([]byte(raw), &user) | |||
fmt.Println(user) | |||
} | } | ||
</syntaxhighlight> | </syntaxhighlight> | ||
Line 127: | Line 141: | ||
</blockquote><!-- Non-Homogenous XML --> | </blockquote><!-- Non-Homogenous XML --> | ||
== | == Arbitrary XML == | ||
<blockquote> | <blockquote> | ||
<syntaxhighlight lang="go"> | <syntaxhighlight lang="go"> | ||
type Node struct { | |||
XMLName xml.Name | |||
Attrs []*xml.Attr `xml:",any,attr"` // each attr.Name, attr.Value | |||
Data string `xml:",chardata"` // 'Title' in <h1>Title</h1> | |||
Nodes []*Node `xml:",any"` // child-nodes | |||
} | } | ||
// deserialize | |||
var parsed Node | |||
xml.Unmarshal([]byte(raw), &parsed) | |||
// re-serialize | |||
bytes, err := xml.Marshal(&parsed) | |||
</syntaxhighlight> | </syntaxhighlight> | ||
{{ expand | |||
< | | Example: Deserialize, Modify, Re-serialize an arbitrary xml object. | ||
| | |||
{{ WARNING | | |||
Do not parse HTML with XML (elements like <code><nowiki><br/></nowiki></code> are invalid XML). | |||
}} | |||
<syntaxhighlight lang="go"> | <syntaxhighlight lang="go"> | ||
Line 150: | Line 175: | ||
type Node struct { | type Node struct { | ||
XMLName xml.Name | XMLName xml.Name | ||
Attrs []xml.Attr `xml:",any,attr"` // each attr.Name, attr.Value | Attrs []*xml.Attr `xml:",any,attr"` // each attr.Name, attr.Value | ||
Data string | Data string `xml:",chardata"` // 'Title' in <h1>Title</h1> | ||
Nodes []Node `xml:",any"` // child-nodes | Nodes []*Node `xml:",any"` // child-nodes | ||
} | |||
func addDotHtmlToAHrefs(node *Node) { | |||
// Adds a '.html' suffix to each href in a '<a href="foo">foo</a>' | |||
if node.XMLName.Local == "a" { | |||
for _, attr := range node.Attrs { | |||
if attr.Name.Local == "href" { | |||
attr.Value = fmt.Sprint(attr.Value, ".html") | |||
} | |||
} | |||
} | |||
for _, child := range node.Nodes { | |||
addDotHtmlToAHrefs(child) | |||
} | |||
} | } | ||
Line 165: | Line 205: | ||
var parsed Node | var parsed Node | ||
xml.Unmarshal([]byte(raw), &parsed) | xml.Unmarshal([]byte(raw), &parsed) | ||
// re-marshall | // modify | ||
bytes, _ := xml. | addDotHtmlToAHrefs(&parsed) | ||
fmt.Println(string(bytes)) | |||
// re-marshall, modified | |||
bytes, _ := xml.MarshalIndent(&parsed, "", " ") | |||
fmt.Println(string(bytes)) | |||
} | } | ||
</syntaxhighlight> | </syntaxhighlight> | ||
Outputs | |||
<syntaxhighlight lang="html5"> | |||
<html> | |||
<p> | |||
<a href="abc.html">ABC</a> | |||
</p> | |||
<blockquote> | |||
<p> | |||
<a href="def.html">DEF</a> | |||
</p> | |||
</blockquote> | |||
</html> | |||
</syntaxhighlight> | |||
}} | |||
</blockquote><!-- Arbitrary XML --> | </blockquote><!-- Arbitrary XML --> | ||
== Custom Deserializers == | |||
<blockquote> | |||
{{ TODO | | |||
finish }} | |||
<syntaxhighlight lang="go"> | |||
func (revision *Revision) UnmarshalXML(d *xml.Decoder, start xml.Startelement) error { | |||
return nil | |||
} | |||
</syntaxhighlight> | |||
</blockquote><!-- Custom Deserializers --> | |||
</blockquote><!-- Deserialization --> | </blockquote><!-- Deserialization --> |
Latest revision as of 06:07, 10 July 2022
go's builtin library for parsing xml.
See also: golang encoding, xml, golang x/net
A more detailed introduction to go's encoding interface can be seen in golang encoding/json.
NOTE:
Golang's builtin xml library does not support schema validation.
Documentation
official docs https://pkg.go.dev/encoding/xml@go1.18.3
Tutorials
tutorialedge https://tutorialedge.net/golang/parsing-xml-with-golang/
Struct Tags
See full details here
type User struct { Name `xml:"Name"` // <User><Name>value</Name></User> Color `xml:"color,attr"` // <User color="value"></User> Skip `xml:"-"` // <User></User> }XML namespaces are supported, by providing a prefix to the xml tag.
type Html struct { XMLName xml.Name `xml:"http://www.w3.org/1999/xhtml html` } // <html xmlns="http://www.w3.org/1999/xhtml"> // <!-- ... --> // </html>Known issues:
- schema validation does not appear to be supported
Serialization
Basics
type User struct { Id int `xml:"id"` Name string `xml:"name,attr"` } func main() { user := User{123, "will"} bytes, _ := xml.Marshal(&user) fmt.Println(string(bytes)) // <User name="will"><id>123</id></User> }You can also marshall with an indent
xml.MarshallIndent( &user, " ", // (2) indent entire object this many spaces " ", // (4) indent-width ) //| | <-- 2x spaces // <User name="will"> // <id>123</id> // <-- indented 4x spaces // </User>
Deserialization
Basics
type User struct { Id int `xml:"id"` Name string `xml:"name,attr"` } func main() { raw := `<User name="will"><id>123</id></User>` var user User xml.Unmarshall([]byte(raw), &user) fmt.Println(user) }Non-Homogenous XML
XML is generally not homogenous.
Record each possible sub-element as a field on your object.
If an element can occur multiple times, declare it as an array.
You can ignore elements by not defining fields for them.encoded := ` <mediawiki> <siteinfo> abc </siteinfo> <page> <title>Main Page</title> </page> <page> <title>Linux</title> </page> </mediawiki> ` type Result struct { XMLName xml.Name `xml:"mediawiki"` // root node SiteInfo string `xml:"siteinfo"` // only one 'siteinfo' element under 'mediawiki' Page []Page `xml:"page"` // multiple 'page' elements under 'mediawiki' } type Page struct { Title string `xml:"title"` } var result Result xml.Unmarshall([]byte(encoded), &result) fmt.Println(result.Page[0].Title.Text) // LinuxArbitrary XML
type Node struct { XMLName xml.Name Attrs []*xml.Attr `xml:",any,attr"` // each attr.Name, attr.Value Data string `xml:",chardata"` // 'Title' in <h1>Title</h1> Nodes []*Node `xml:",any"` // child-nodes } // deserialize var parsed Node xml.Unmarshal([]byte(raw), &parsed) // re-serialize bytes, err := xml.Marshal(&parsed)Example: Deserialize, Modify, Re-serialize an arbitrary xml object.
WARNING:
Do not parse HTML with XML (elements like
<br/>
are invalid XML).import ( "encoding/xml" "fmt" ) type Node struct { XMLName xml.Name Attrs []*xml.Attr `xml:",any,attr"` // each attr.Name, attr.Value Data string `xml:",chardata"` // 'Title' in <h1>Title</h1> Nodes []*Node `xml:",any"` // child-nodes } func addDotHtmlToAHrefs(node *Node) { // Adds a '.html' suffix to each href in a '<a href="foo">foo</a>' if node.XMLName.Local == "a" { for _, attr := range node.Attrs { if attr.Name.Local == "href" { attr.Value = fmt.Sprint(attr.Value, ".html") } } } for _, child := range node.Nodes { addDotHtmlToAHrefs(child) } } func main() { // define xml raw := `<html> <p><a href="abc">ABC</a></p> <blockquote><p><a href="def">DEF</a></p></blockquote> </html>` // unmarshall var parsed Node xml.Unmarshal([]byte(raw), &parsed) // modify addDotHtmlToAHrefs(&parsed) // re-marshall, modified bytes, _ := xml.MarshalIndent(&parsed, "", " ") fmt.Println(string(bytes)) }Outputs
<html> <p> <a href="abc.html">ABC</a> </p> <blockquote> <p> <a href="def.html">DEF</a> </p> </blockquote> </html>
Custom Deserializers
TODO:
finish
func (revision *Revision) UnmarshalXML(d *xml.Decoder, start xml.Startelement) error { return nil }