summaryrefslogtreecommitdiff
path: root/lib/rss/xmlscanner.rb
blob: 1cdf81c0c3bf63be709181a6dc694495fd6c0b1d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
require 'xmlscan/scanner'
require 'stringio'

module RSS

  class XMLScanParser < BaseParser

    class << self
      def listener
        XMLScanListener
      end
    end

    private
    def _parse
      begin
        if @rss.is_a?(String)
          input = StringIO.new(@rss)
        else
          input = @rss
        end
        scanner = XMLScan::XMLScanner.new(@listener)
        scanner.parse(input)
      rescue XMLScan::Error => e
        lineno = e.lineno || scanner.lineno || input.lineno
        raise NotWellFormedError.new(lineno){e.message}
      end
    end

  end

  class XMLScanListener < BaseListener

    include XMLScan::Visitor
    include ListenerMixin

    ENTITIES = {
      'lt' => '<',
      'gt' => '>',
      'amp' => '&',
      'quot' => '"',
      'apos' => '\''
    }

    def on_xmldecl_version(str)
      @version = str
    end

    def on_xmldecl_encoding(str)
      @encoding = str
    end

    def on_xmldecl_standalone(str)
      @standalone = str
    end

    def on_xmldecl_end
      xmldecl(@version, @encoding, @standalone == "yes")
    end

    alias_method(:on_pi, :instruction)
    alias_method(:on_chardata, :text)
    alias_method(:on_cdata, :text)

    def on_etag(name)
      tag_end(name)
    end

    def on_entityref(ref)
      text(entity(ref))
    end

    def on_charref(code)
      text([code].pack('U'))
    end

    alias_method(:on_charref_hex, :on_charref)

    def on_stag(name)
      @attrs = {}
    end

    def on_attribute(name)
      @attrs[name] = @current_attr = ''
    end

    def on_attr_value(str)
      @current_attr << str
    end

    def on_attr_entityref(ref)
      @current_attr << entity(ref)
    end

    def on_attr_charref(code)
      @current_attr << [code].pack('U')
    end

    alias_method(:on_attr_charref_hex, :on_attr_charref)

    def on_stag_end(name)
      tag_start(name, @attrs)
    end

    def on_stag_end_empty(name)
      tag_start(name, @attrs)
      tag_end(name)
    end

    private
    def entity(ref)
      ent = ENTITIES[ref]
      if ent
        ent
      else
        wellformed_error("undefined entity: #{ref}")
      end
    end
  end

end