Golang encoding/xml: Difference between revisions
No edit summary |
|||
(39 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
go's builtin library for parsing xml. | go's builtin library for parsing xml.<br> | ||
See also: [[golang encoding]], [[xml]], [[golang x/net]] | |||
{{ | A more detailed introduction to go's encoding interface can be seen in [[golang encoding/json]]. | ||
{{ NOTE | | |||
Golang's builtin xml library does not support schema validation. }} | |||
= Documentation = | = Documentation = | ||
Line 8: | Line 11: | ||
{| class="wikitable" | {| class="wikitable" | ||
|- | |- | ||
| | | official docs || https://pkg.go.dev/encoding/xml@go1.18.3 | ||
|- | |- | ||
|} | |} | ||
Line 21: | Line 24: | ||
|} | |} | ||
</blockquote><!-- Tutorials --> | </blockquote><!-- Tutorials --> | ||
= Struct Tags = | = Struct Tags = | ||
Line 41: | Line 36: | ||
} | } | ||
</syntaxhighlight> | </syntaxhighlight> | ||
XML namespaces are supported, by providing a prefix to the xml tag. | |||
<syntaxhighlight lang="go"> | |||
type Html struct { | |||
XMLName xml.Name `xml:"http://www.w3.org/1999/xhtml html` | |||
} | |||
// <html xmlns="http://www.w3.org/1999/xhtml"> | |||
// <!-- ... --> | |||
// </html> | |||
</syntaxhighlight> | |||
Known issues: | |||
* schema validation does not appear to be supported | |||
</blockquote><!-- Struct Tags --> | </blockquote><!-- Struct Tags --> | ||
= Serialization = | |||
<blockquote> | |||
== Basics == | |||
<blockquote> | |||
<syntaxhighlight lang="go"> | |||
type User struct { | |||
Id int `xml:"id"` | |||
Name string `xml:"name,attr"` | |||
} | |||
func main() { | |||
user := User{123, "will"} | |||
bytes, _ := xml.Marshal(&user) | |||
fmt.Println(string(bytes)) // <User name="will"><id>123</id></User> | |||
} | |||
</syntaxhighlight> | |||
You can also marshall with an indent | |||
<syntaxhighlight lang="go"> | |||
xml.MarshallIndent( | |||
&user, | |||
" ", // (2) indent entire object this many spaces | |||
" ", // (4) indent-width | |||
) | |||
//| | <-- 2x spaces | |||
// <User name="will"> | |||
// <id>123</id> // <-- indented 4x spaces | |||
// </User> | |||
</syntaxhighlight> | |||
</blockquote><!-- Basics --> | |||
</blockquote><!-- Serializing --> | |||
= Deserialization = | |||
<blockquote> | |||
== Basics == | |||
<blockquote> | |||
<syntaxhighlight lang="go"> | |||
type User struct { | |||
Id int `xml:"id"` | |||
Name string `xml:"name,attr"` | |||
} | |||
func main() { | |||
raw := `<User name="will"><id>123</id></User>` | |||
var user User | |||
xml.Unmarshall([]byte(raw), &user) | |||
fmt.Println(user) | |||
} | |||
</syntaxhighlight> | |||
</blockquote><!-- Basics --> | |||
== Non-Homogenous XML == | |||
<blockquote> | |||
XML is generally not homogenous.<br> | |||
Record each possible sub-element as a field on your object.<br> | |||
If an element can occur multiple times, declare it as an array.<br> | |||
You can ignore elements by not defining fields for them. | |||
<syntaxhighlight lang="go"> | |||
encoded := ` | |||
<mediawiki> | |||
<siteinfo> | |||
abc | |||
</siteinfo> | |||
<page> | |||
<title>Main Page</title> | |||
</page> | |||
<page> | |||
<title>Linux</title> | |||
</page> | |||
</mediawiki> | |||
` | |||
type Result struct { | |||
XMLName xml.Name `xml:"mediawiki"` // root node | |||
SiteInfo string `xml:"siteinfo"` // only one 'siteinfo' element under 'mediawiki' | |||
Page []Page `xml:"page"` // multiple 'page' elements under 'mediawiki' | |||
} | |||
type Page struct { | |||
Title string `xml:"title"` | |||
} | |||
var result Result | |||
xml.Unmarshall([]byte(encoded), &result) | |||
fmt.Println(result.Page[0].Title.Text) // Linux | |||
</syntaxhighlight> | |||
</blockquote><!-- Non-Homogenous XML --> | |||
== Arbitrary XML == | |||
<blockquote> | |||
<syntaxhighlight lang="go"> | |||
type Node struct { | |||
XMLName xml.Name | |||
Attrs []*xml.Attr `xml:",any,attr"` // each attr.Name, attr.Value | |||
Data string `xml:",chardata"` // 'Title' in <h1>Title</h1> | |||
Nodes []*Node `xml:",any"` // child-nodes | |||
} | |||
// deserialize | |||
var parsed Node | |||
xml.Unmarshal([]byte(raw), &parsed) | |||
// re-serialize | |||
bytes, err := xml.Marshal(&parsed) | |||
</syntaxhighlight> | |||
{{ expand | |||
| Example: Deserialize, Modify, Re-serialize an arbitrary xml object. | |||
| | |||
{{ WARNING | | |||
Do not parse HTML with XML (elements like <code><nowiki><br/></nowiki></code> are invalid XML). | |||
}} | |||
<syntaxhighlight lang="go"> | |||
import ( | |||
"encoding/xml" | |||
"fmt" | |||
) | |||
type Node struct { | |||
XMLName xml.Name | |||
Attrs []*xml.Attr `xml:",any,attr"` // each attr.Name, attr.Value | |||
Data string `xml:",chardata"` // 'Title' in <h1>Title</h1> | |||
Nodes []*Node `xml:",any"` // child-nodes | |||
} | |||
func addDotHtmlToAHrefs(node *Node) { | |||
// Adds a '.html' suffix to each href in a '<a href="foo">foo</a>' | |||
if node.XMLName.Local == "a" { | |||
for _, attr := range node.Attrs { | |||
if attr.Name.Local == "href" { | |||
attr.Value = fmt.Sprint(attr.Value, ".html") | |||
} | |||
} | |||
} | |||
for _, child := range node.Nodes { | |||
addDotHtmlToAHrefs(child) | |||
} | |||
} | |||
func main() { | |||
// define xml | |||
raw := `<html> | |||
<p><a href="abc">ABC</a></p> | |||
<blockquote><p><a href="def">DEF</a></p></blockquote> | |||
</html>` | |||
// unmarshall | |||
var parsed Node | |||
xml.Unmarshal([]byte(raw), &parsed) | |||
// modify | |||
addDotHtmlToAHrefs(&parsed) | |||
// re-marshall, modified | |||
bytes, _ := xml.MarshalIndent(&parsed, "", " ") | |||
fmt.Println(string(bytes)) | |||
} | |||
</syntaxhighlight> | |||
Outputs | |||
<syntaxhighlight lang="html5"> | |||
<html> | |||
<p> | |||
<a href="abc.html">ABC</a> | |||
</p> | |||
<blockquote> | |||
<p> | |||
<a href="def.html">DEF</a> | |||
</p> | |||
</blockquote> | |||
</html> | |||
</syntaxhighlight> | |||
}} | |||
</blockquote><!-- Arbitrary XML --> | |||
== Custom Deserializers == | |||
<blockquote> | |||
{{ TODO | | |||
finish }} | |||
<syntaxhighlight lang="go"> | |||
func (revision *Revision) UnmarshalXML(d *xml.Decoder, start xml.Startelement) error { | |||
return nil | |||
} | |||
</syntaxhighlight> | |||
</blockquote><!-- Custom Deserializers --> | |||
</blockquote><!-- Deserialization --> |
Latest revision as of 06:07, 10 July 2022
go's builtin library for parsing xml.
See also: golang encoding, xml, golang x/net
A more detailed introduction to go's encoding interface can be seen in golang encoding/json.
NOTE:
Golang's builtin xml library does not support schema validation.
Documentation
official docs https://pkg.go.dev/encoding/xml@go1.18.3
Tutorials
tutorialedge https://tutorialedge.net/golang/parsing-xml-with-golang/
Struct Tags
See full details here
type User struct { Name `xml:"Name"` // <User><Name>value</Name></User> Color `xml:"color,attr"` // <User color="value"></User> Skip `xml:"-"` // <User></User> }XML namespaces are supported, by providing a prefix to the xml tag.
type Html struct { XMLName xml.Name `xml:"http://www.w3.org/1999/xhtml html` } // <html xmlns="http://www.w3.org/1999/xhtml"> // <!-- ... --> // </html>Known issues:
- schema validation does not appear to be supported
Serialization
Basics
type User struct { Id int `xml:"id"` Name string `xml:"name,attr"` } func main() { user := User{123, "will"} bytes, _ := xml.Marshal(&user) fmt.Println(string(bytes)) // <User name="will"><id>123</id></User> }You can also marshall with an indent
xml.MarshallIndent( &user, " ", // (2) indent entire object this many spaces " ", // (4) indent-width ) //| | <-- 2x spaces // <User name="will"> // <id>123</id> // <-- indented 4x spaces // </User>
Deserialization
Basics
type User struct { Id int `xml:"id"` Name string `xml:"name,attr"` } func main() { raw := `<User name="will"><id>123</id></User>` var user User xml.Unmarshall([]byte(raw), &user) fmt.Println(user) }Non-Homogenous XML
XML is generally not homogenous.
Record each possible sub-element as a field on your object.
If an element can occur multiple times, declare it as an array.
You can ignore elements by not defining fields for them.encoded := ` <mediawiki> <siteinfo> abc </siteinfo> <page> <title>Main Page</title> </page> <page> <title>Linux</title> </page> </mediawiki> ` type Result struct { XMLName xml.Name `xml:"mediawiki"` // root node SiteInfo string `xml:"siteinfo"` // only one 'siteinfo' element under 'mediawiki' Page []Page `xml:"page"` // multiple 'page' elements under 'mediawiki' } type Page struct { Title string `xml:"title"` } var result Result xml.Unmarshall([]byte(encoded), &result) fmt.Println(result.Page[0].Title.Text) // LinuxArbitrary XML
type Node struct { XMLName xml.Name Attrs []*xml.Attr `xml:",any,attr"` // each attr.Name, attr.Value Data string `xml:",chardata"` // 'Title' in <h1>Title</h1> Nodes []*Node `xml:",any"` // child-nodes } // deserialize var parsed Node xml.Unmarshal([]byte(raw), &parsed) // re-serialize bytes, err := xml.Marshal(&parsed)Example: Deserialize, Modify, Re-serialize an arbitrary xml object.
WARNING:
Do not parse HTML with XML (elements like
<br/>
are invalid XML).import ( "encoding/xml" "fmt" ) type Node struct { XMLName xml.Name Attrs []*xml.Attr `xml:",any,attr"` // each attr.Name, attr.Value Data string `xml:",chardata"` // 'Title' in <h1>Title</h1> Nodes []*Node `xml:",any"` // child-nodes } func addDotHtmlToAHrefs(node *Node) { // Adds a '.html' suffix to each href in a '<a href="foo">foo</a>' if node.XMLName.Local == "a" { for _, attr := range node.Attrs { if attr.Name.Local == "href" { attr.Value = fmt.Sprint(attr.Value, ".html") } } } for _, child := range node.Nodes { addDotHtmlToAHrefs(child) } } func main() { // define xml raw := `<html> <p><a href="abc">ABC</a></p> <blockquote><p><a href="def">DEF</a></p></blockquote> </html>` // unmarshall var parsed Node xml.Unmarshal([]byte(raw), &parsed) // modify addDotHtmlToAHrefs(&parsed) // re-marshall, modified bytes, _ := xml.MarshalIndent(&parsed, "", " ") fmt.Println(string(bytes)) }Outputs
<html> <p> <a href="abc.html">ABC</a> </p> <blockquote> <p> <a href="def.html">DEF</a> </p> </blockquote> </html>
Custom Deserializers
TODO:
finish
func (revision *Revision) UnmarshalXML(d *xml.Decoder, start xml.Startelement) error { return nil }