Golang encoding/xml: Difference between revisions
From wikinotes
Line 140: | Line 140: | ||
== Arbitrary XML == | == Arbitrary XML == | ||
<blockquote> | <blockquote> | ||
You can define a struct to serialize/deserialize arbitrary xml without losing data. | |||
<syntaxhighlight lang="go"> | <syntaxhighlight lang="go"> | ||
Line 148: | Line 147: | ||
"fmt" | "fmt" | ||
) | ) | ||
type Node struct { | type Node struct { | ||
XMLName xml.Name | |||
Attrs []xml.Attr `xml:",any,attr"` // each attr.Name, attr.Value | |||
Data string `xml:",chardata"` // 'Title' in <h1>Title</h1> | |||
Nodes []Node `xml:",any"` // child-nodes | |||
} | } | ||
func main() { | func main() { | ||
// define xml | |||
raw := ` | raw := `<html> | ||
<p><a href="abc">ABC</a></p> | <p><a href="abc">ABC</a></p> | ||
<blockquote><p><a href="def">DEF</a></p></blockquote> | <blockquote><p><a href="def">DEF</a></p></blockquote> | ||
</html>` | </html>` | ||
// unmarshall | |||
var parsed | var parsed Node | ||
xml.Unmarshal([]byte(raw), &parsed) | xml.Unmarshal([]byte(raw), &parsed) | ||
fmt.Println(parsed) | fmt.Println(parsed.Nodes[0].Nodes[0].Data) // 'ABC' | ||
// re-marshall | |||
bytes, _ := xml.Marshal(&parsed) | |||
fmt.Println(string(bytes)) | |||
} | } | ||
</syntaxhighlight> | </syntaxhighlight> | ||
</blockquote><!-- Arbitrary XML --> | </blockquote><!-- Arbitrary XML --> | ||
</blockquote><!-- Deserialization --> | </blockquote><!-- Deserialization --> |
Revision as of 04:31, 10 July 2022
go's builtin library for parsing xml.
See also: golang encoding, xml
A more detailed introduction to go's encoding interface can be seen in golang encoding/json.
NOTE:
Golang's builtin xml library does not support the full xml spec.
Confirm your needs are supported before using it
Documentation
official docs https://pkg.go.dev/encoding/xml@go1.18.3
Tutorials
tutorialedge https://tutorialedge.net/golang/parsing-xml-with-golang/
Struct Tags
See full details here
type User struct { Name `xml:"Name"` // <User><Name>value</Name></User> Color `xml:"color,attr"` // <User color="value"></User> Skip `xml:"-"` // <User></User> }XML namespaces are supported, by providing a prefix to the xml tag.
type Html struct { XMLName xml.Name `xml:"http://www.w3.org/1999/xhtml html` } // <html xmlns="http://www.w3.org/1999/xhtml"> // <!-- ... --> // </html>Known issues:
- schema validation does not appear to be supported
Serializing
Basics
type User struct { Id int `xml:"id"` Name string `xml:"name,attr"` } func main() { user := User{123, "will"} bytes, _ := xml.Marshal(&user) fmt.Println(string(bytes)) // <User name="will"><id>123</id></User> }
Deserialization
Basics
type User struct { Id int `xml:"id"` Name string `xml:"name,attr"` } func main() { user := User{123, "will"} bytes, _ := xml.Marshal(&user) fmt.Println(string(bytes)) // <User name="will"><id>123</id></User> }Non-Homogenous XML
XML is generally not homogenous.
Record each possible sub-element as a field on your object.
If an element can occur multiple times, declare it as an array.
You can ignore elements by not defining fields for them.encoded := ` <mediawiki> <siteinfo> abc </siteinfo> <page> <title>Main Page</title> </page> <page> <title>Linux</title> </page> </mediawiki> ` type Result struct { XMLName xml.Name `xml:"mediawiki"` // root node SiteInfo string `xml:"siteinfo"` // only one 'siteinfo' element under 'mediawiki' Page []Page `xml:"page"` // multiple 'page' elements under 'mediawiki' } type Page struct { Title string `xml:"title"` } var result Result xml.Unmarshall([]byte(encoded), &result) fmt.Println(result.Page[0].Title.Text) // LinuxCustom Deserializers
TODO:
finish
func (revision *Revision) UnmarshalXML(d *xml.Decoder, start xml.Startelement) error { return nil }Arbitrary XML
You can define a struct to serialize/deserialize arbitrary xml without losing data.
import ( "encoding/xml" "fmt" ) type Node struct { XMLName xml.Name Attrs []xml.Attr `xml:",any,attr"` // each attr.Name, attr.Value Data string `xml:",chardata"` // 'Title' in <h1>Title</h1> Nodes []Node `xml:",any"` // child-nodes } func main() { // define xml raw := `<html> <p><a href="abc">ABC</a></p> <blockquote><p><a href="def">DEF</a></p></blockquote> </html>` // unmarshall var parsed Node xml.Unmarshal([]byte(raw), &parsed) fmt.Println(parsed.Nodes[0].Nodes[0].Data) // 'ABC' // re-marshall bytes, _ := xml.Marshal(&parsed) fmt.Println(string(bytes)) }