summaryrefslogtreecommitdiff
path: root/test/racc/assets/namae.y
diff options
context:
space:
mode:
Diffstat (limited to 'test/racc/assets/namae.y')
-rw-r--r--test/racc/assets/namae.y302
1 files changed, 0 insertions, 302 deletions
diff --git a/test/racc/assets/namae.y b/test/racc/assets/namae.y
deleted file mode 100644
index 0378345fef..0000000000
--- a/test/racc/assets/namae.y
+++ /dev/null
@@ -1,302 +0,0 @@
-# -*- ruby -*-
-# vi: set ft=ruby :
-
-# Copyright (C) 2012 President and Fellows of Harvard College
-# Copyright (C) 2013-2014 Sylvester Keil
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
-# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
-# EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
-# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
-# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# The views and conclusions contained in the software and documentation are
-# those of the authors and should not be interpreted as representing official
-# policies, either expressed or implied, of the copyright holder.
-
-class Namae::Parser
-
-token COMMA UWORD LWORD PWORD NICK AND APPELLATION TITLE SUFFIX
-
-expect 0
-
-rule
-
- names : { result = [] }
- | name { result = [val[0]] }
- | names AND name { result = val[0] << val[2] }
-
- name : word { result = Name.new(:given => val[0]) }
- | display_order
- | honorific word { result = val[0].merge(:family => val[1]) }
- | honorific display_order { result = val[1].merge(val[0]) }
- | sort_order
-
- honorific : APPELLATION { result = Name.new(:appellation => val[0]) }
- | TITLE { result = Name.new(:title => val[0]) }
-
- display_order : u_words word opt_suffices opt_titles
- {
- result = Name.new(:given => val[0], :family => val[1],
- :suffix => val[2], :title => val[3])
- }
- | u_words NICK last opt_suffices opt_titles
- {
- result = Name.new(:given => val[0], :nick => val[1],
- :family => val[2], :suffix => val[3], :title => val[4])
- }
- | u_words NICK von last opt_suffices opt_titles
- {
- result = Name.new(:given => val[0], :nick => val[1],
- :particle => val[2], :family => val[3],
- :suffix => val[4], :title => val[5])
- }
- | u_words von last
- {
- result = Name.new(:given => val[0], :particle => val[1],
- :family => val[2])
- }
- | von last
- {
- result = Name.new(:particle => val[0], :family => val[1])
- }
-
- sort_order : last COMMA first
- {
- result = Name.new({ :family => val[0], :suffix => val[2][0],
- :given => val[2][1] }, !!val[2][0])
- }
- | von last COMMA first
- {
- result = Name.new({ :particle => val[0], :family => val[1],
- :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
- }
- | u_words von last COMMA first
- {
- result = Name.new({ :particle => val[0,2].join(' '), :family => val[2],
- :suffix => val[4][0], :given => val[4][1] }, !!val[4][0])
- }
- ;
-
- von : LWORD
- | von LWORD { result = val.join(' ') }
- | von u_words LWORD { result = val.join(' ') }
-
- last : LWORD | u_words
-
- first : opt_words { result = [nil,val[0]] }
- | words opt_comma suffices { result = [val[2],val[0]] }
- | suffices { result = [val[0],nil] }
- | suffices COMMA words { result = [val[0],val[2]] }
-
- u_words : u_word
- | u_words u_word { result = val.join(' ') }
-
- u_word : UWORD | PWORD
-
- words : word
- | words word { result = val.join(' ') }
-
- opt_comma : /* empty */ | COMMA
- opt_words : /* empty */ | words
-
- word : LWORD | UWORD | PWORD
-
- opt_suffices : /* empty */ | suffices
-
- suffices : SUFFIX
- | suffices SUFFIX { result = val.join(' ') }
-
- opt_titles : /* empty */ | titles
-
- titles : TITLE
- | titles TITLE { result = val.join(' ') }
-
----- header
-require 'singleton'
-require 'strscan'
-
----- inner
-
- include Singleton
-
- attr_reader :options, :input
-
- def initialize
- @input, @options = StringScanner.new(''), {
- :debug => false,
- :prefer_comma_as_separator => false,
- :comma => ',',
- :stops => ',;',
- :separator => /\s*(\band\b|\&|;)\s*/i,
- :title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
- :suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
- :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
- }
- end
-
- def debug?
- options[:debug] || ENV['DEBUG']
- end
-
- def separator
- options[:separator]
- end
-
- def comma
- options[:comma]
- end
-
- def stops
- options[:stops]
- end
-
- def title
- options[:title]
- end
-
- def suffix
- options[:suffix]
- end
-
- def appellation
- options[:appellation]
- end
-
- def prefer_comma_as_separator?
- options[:prefer_comma_as_separator]
- end
-
- def parse(input)
- parse!(input)
- rescue => e
- warn e.message if debug?
- []
- end
-
- def parse!(string)
- input.string = normalize(string)
- reset
- do_parse
- end
-
- def normalize(string)
- string = string.strip
- string
- end
-
- def reset
- @commas, @words, @initials, @suffices, @yydebug = 0, 0, 0, 0, debug?
- self
- end
-
- private
-
- def stack
- @vstack || @racc_vstack || []
- end
-
- def last_token
- stack[-1]
- end
-
- def consume_separator
- return next_token if seen_separator?
- @commas, @words, @initials, @suffices = 0, 0, 0, 0
- [:AND, :AND]
- end
-
- def consume_comma
- @commas += 1
- [:COMMA, :COMMA]
- end
-
- def consume_word(type, word)
- @words += 1
-
- case type
- when :UWORD
- @initials += 1 if word =~ /^[[:upper:]]+\b/
- when :SUFFIX
- @suffices += 1
- end
-
- [type, word]
- end
-
- def seen_separator?
- !stack.empty? && last_token == :AND
- end
-
- def suffix?
- !@suffices.zero? || will_see_suffix?
- end
-
- def will_see_suffix?
- input.peek(8).to_s.strip.split(/\s+/)[0] =~ suffix
- end
-
- def will_see_initial?
- input.peek(6).to_s.strip.split(/\s+/)[0] =~ /^[[:upper:]]+\b/
- end
-
- def seen_full_name?
- prefer_comma_as_separator? && @words > 1 &&
- (@initials > 0 || !will_see_initial?) && !will_see_suffix?
- end
-
- def next_token
- case
- when input.nil?, input.eos?
- nil
- when input.scan(separator)
- consume_separator
- when input.scan(/\s*#{comma}\s*/)
- if @commas.zero? && !seen_full_name? || @commas == 1 && suffix?
- consume_comma
- else
- consume_separator
- end
- when input.scan(/\s+/)
- next_token
- when input.scan(title)
- consume_word(:TITLE, input.matched.strip)
- when input.scan(suffix)
- consume_word(:SUFFIX, input.matched.strip)
- when input.scan(appellation)
- [:APPELLATION, input.matched.strip]
- when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{stops}]*/)
- consume_word(:UWORD, input.matched)
- when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{stops}]*/)
- consume_word(:LWORD, input.matched)
- when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{stops}]*/)
- consume_word(:PWORD, input.matched)
- when input.scan(/('[^'\n]+')|("[^"\n]+")/)
- consume_word(:NICK, input.matched[1...-1])
- else
- raise ArgumentError,
- "Failed to parse name #{input.string.inspect}: unmatched data at offset #{input.pos}"
- end
- end
-
- def on_error(tid, value, stack)
- raise ArgumentError,
- "Failed to parse name: unexpected '#{value}' at #{stack.inspect}"
- end
-
-# -*- racc -*-