Golang encoding/xml: Difference between revisions

From wikinotes
Line 137: Line 137:
</syntaxhighlight>
</syntaxhighlight>
</blockquote><!-- Custom Deserializers -->
</blockquote><!-- Custom Deserializers -->
== Arbitrary XML ==
<blockquote>
<syntaxhighlight lang="go">
import (
"encoding/xml"
"fmt"
)
type Html struct {
xmlName  xml.Name `xml:"html"`
Children []Node  `xml:",any"`
}
type Node struct {
Children []Node  `xml:",any"`      // recursively catch all nodes
Attrs    []string `xml:",any,attr"` // attr-values  (ex. 'pages/index' in '<a href="pages/index">index</a>')
Content  string  `xml:",innerxml"` // value        (ex. 'Title' in '<h1>Title</h1>')
}
func main() {
    // define raw html
raw := `
        <html>
          <p><a href="abc">ABC</a></p>
          <blockquote><p><a href="def">DEF</a></p></blockquote>
        </html>`
    // deserialize recursively
var parsed Html
xml.Unmarshal([]byte(raw), &parsed)
fmt.Println(parsed)
}
// {{ } [{[{[] [abc] ABC}] [] <a href="abc">ABC</a>} {[{[{[] [def] DEF}] [] <a href="def">DEF</a>}] [] <p><a href="def">DEF</a></p>}]}
</syntaxhighlight>
</blockquote><!-- Arbitrary XML -->
</blockquote><!-- Deserialization -->
</blockquote><!-- Deserialization -->

Revision as of 04:03, 10 July 2022

go's builtin library for parsing xml.
See also: golang encoding, xml

A more detailed introduction to go's encoding interface can be seen in golang encoding/json.

NOTE:

Golang's builtin xml library does not support the full xml spec.
Confirm your needs are supported before using it

Documentation

official docs https://pkg.go.dev/encoding/xml@go1.18.3

Tutorials

tutorialedge https://tutorialedge.net/golang/parsing-xml-with-golang/

Struct Tags

See full details here

type User struct {
    Name  `xml:"Name"`       // <User><Name>value</Name></User>
    Color `xml:"color,attr"` // <User color="value"></User>
    Skip  `xml:"-"`          // <User></User>
}

XML namespaces are supported, by providing a prefix to the xml tag.

type Html struct {
    XMLName xml.Name `xml:"http://www.w3.org/1999/xhtml html`
}

// <html xmlns="http://www.w3.org/1999/xhtml">
//   <!-- ... -->
// </html>

Known issues:

  • schema validation does not appear to be supported

Serializing

Basics

type User struct {
    Id   int    `xml:"id"`
    Name string `xml:"name,attr"`
}

func main() {
    user := User{123, "will"}
    bytes, _ := xml.Marshal(&user)
    fmt.Println(string(bytes))  // <User name="will"><id>123</id></User>
}

Deserialization

Basics

type User struct {
    Id   int    `xml:"id"`
    Name string `xml:"name,attr"`
}

func main() {
    user := User{123, "will"}
    bytes, _ := xml.Marshal(&user)
    fmt.Println(string(bytes))  // <User name="will"><id>123</id></User>
}

Non-Homogenous XML

XML is generally not homogenous.
Record each possible sub-element as a field on your object.
If an element can occur multiple times, declare it as an array.
You can ignore elements by not defining fields for them.

encoded := `
  <mediawiki>
      <siteinfo>
          abc
      </siteinfo>
      <page>
          <title>Main Page</title>
      </page>
      <page>
          <title>Linux</title>
      </page>
  </mediawiki>
`

type Result struct {
    XMLName  xml.Name `xml:"mediawiki"` // root node
    SiteInfo string   `xml:"siteinfo"`  // only one 'siteinfo' element under 'mediawiki'
    Page     []Page   `xml:"page"`      // multiple 'page' elements under 'mediawiki'
}

type Page struct {
    Title string `xml:"title"`
}

var result Result
xml.Unmarshall([]byte(encoded), &result)
fmt.Println(result.Page[0].Title.Text)  // Linux

Custom Deserializers

TODO:

finish

func (revision *Revision) UnmarshalXML(d *xml.Decoder, start xml.Startelement) error {
    return nil
}

Arbitrary XML

import (
	"encoding/xml"
	"fmt"
)

type Html struct {
	xmlName  xml.Name `xml:"html"`
	Children []Node   `xml:",any"`
}

type Node struct {
	Children []Node   `xml:",any"`      // recursively catch all nodes
	Attrs    []string `xml:",any,attr"` // attr-values  (ex. 'pages/index' in '<a href="pages/index">index</a>')
	Content  string   `xml:",innerxml"` // value        (ex. 'Title' in '<h1>Title</h1>')
}

func main() {
    // define raw html
	raw := `
        <html>
          <p><a href="abc">ABC</a></p>
          <blockquote><p><a href="def">DEF</a></p></blockquote>
        </html>`

    // deserialize recursively
	var parsed Html
	xml.Unmarshal([]byte(raw), &parsed)
	fmt.Println(parsed)
}

// {{ } [{[{[] [abc] ABC}] [] <a href="abc">ABC</a>} {[{[{[] [def] DEF}] [] <a href="def">DEF</a>}] [] <p><a href="def">DEF</a></p>}]}