diff options
Diffstat (limited to 'test/racc/assets/namae.y')
-rw-r--r-- | test/racc/assets/namae.y | 302 |
1 files changed, 0 insertions, 302 deletions
diff --git a/test/racc/assets/namae.y b/test/racc/assets/namae.y deleted file mode 100644 index 0378345fef..0000000000 --- a/test/racc/assets/namae.y +++ /dev/null @@ -1,302 +0,0 @@ -# -*- ruby -*- -# vi: set ft=ruby : - -# Copyright (C) 2012 President and Fellows of Harvard College -# Copyright (C) 2013-2014 Sylvester Keil -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -# EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, -# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# The views and conclusions contained in the software and documentation are -# those of the authors and should not be interpreted as representing official -# policies, either expressed or implied, of the copyright holder. - -class Namae::Parser - -token COMMA UWORD LWORD PWORD NICK AND APPELLATION TITLE SUFFIX - -expect 0 - -rule - - names : { result = [] } - | name { result = [val[0]] } - | names AND name { result = val[0] << val[2] } - - name : word { result = Name.new(:given => val[0]) } - | display_order - | honorific word { result = val[0].merge(:family => val[1]) } - | honorific display_order { result = val[1].merge(val[0]) } - | sort_order - - honorific : APPELLATION { result = Name.new(:appellation => val[0]) } - | TITLE { result = Name.new(:title => val[0]) } - - display_order : u_words word opt_suffices opt_titles - { - result = Name.new(:given => val[0], :family => val[1], - :suffix => val[2], :title => val[3]) - } - | u_words NICK last opt_suffices opt_titles - { - result = Name.new(:given => val[0], :nick => val[1], - :family => val[2], :suffix => val[3], :title => val[4]) - } - | u_words NICK von last opt_suffices opt_titles - { - result = Name.new(:given => val[0], :nick => val[1], - :particle => val[2], :family => val[3], - :suffix => val[4], :title => val[5]) - } - | u_words von last - { - result = Name.new(:given => val[0], :particle => val[1], - :family => val[2]) - } - | von last - { - result = Name.new(:particle => val[0], :family => val[1]) - } - - sort_order : last COMMA first - { - result = Name.new({ :family => val[0], :suffix => val[2][0], - :given => val[2][1] }, !!val[2][0]) - } - | von last COMMA first - { - result = Name.new({ :particle => val[0], :family => val[1], - :suffix => val[3][0], :given => val[3][1] }, !!val[3][0]) - } - | u_words von last COMMA first - { - result = Name.new({ :particle => val[0,2].join(' '), :family => val[2], - :suffix => val[4][0], :given => val[4][1] }, !!val[4][0]) - } - ; - - von : LWORD - | von LWORD { result = val.join(' ') } - | von u_words LWORD { result = val.join(' ') } - - last : LWORD | u_words - - first : opt_words { result = [nil,val[0]] } - | words opt_comma suffices { result = [val[2],val[0]] } - | suffices { result = [val[0],nil] } - | suffices COMMA words { result = [val[0],val[2]] } - - u_words : u_word - | u_words u_word { result = val.join(' ') } - - u_word : UWORD | PWORD - - words : word - | words word { result = val.join(' ') } - - opt_comma : /* empty */ | COMMA - opt_words : /* empty */ | words - - word : LWORD | UWORD | PWORD - - opt_suffices : /* empty */ | suffices - - suffices : SUFFIX - | suffices SUFFIX { result = val.join(' ') } - - opt_titles : /* empty */ | titles - - titles : TITLE - | titles TITLE { result = val.join(' ') } - ----- header -require 'singleton' -require 'strscan' - ----- inner - - include Singleton - - attr_reader :options, :input - - def initialize - @input, @options = StringScanner.new(''), { - :debug => false, - :prefer_comma_as_separator => false, - :comma => ',', - :stops => ',;', - :separator => /\s*(\band\b|\&|;)\s*/i, - :title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d)\.?)(\s+|$)/i, - :suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/, - :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i - } - end - - def debug? - options[:debug] || ENV['DEBUG'] - end - - def separator - options[:separator] - end - - def comma - options[:comma] - end - - def stops - options[:stops] - end - - def title - options[:title] - end - - def suffix - options[:suffix] - end - - def appellation - options[:appellation] - end - - def prefer_comma_as_separator? - options[:prefer_comma_as_separator] - end - - def parse(input) - parse!(input) - rescue => e - warn e.message if debug? - [] - end - - def parse!(string) - input.string = normalize(string) - reset - do_parse - end - - def normalize(string) - string = string.strip - string - end - - def reset - @commas, @words, @initials, @suffices, @yydebug = 0, 0, 0, 0, debug? - self - end - - private - - def stack - @vstack || @racc_vstack || [] - end - - def last_token - stack[-1] - end - - def consume_separator - return next_token if seen_separator? - @commas, @words, @initials, @suffices = 0, 0, 0, 0 - [:AND, :AND] - end - - def consume_comma - @commas += 1 - [:COMMA, :COMMA] - end - - def consume_word(type, word) - @words += 1 - - case type - when :UWORD - @initials += 1 if word =~ /^[[:upper:]]+\b/ - when :SUFFIX - @suffices += 1 - end - - [type, word] - end - - def seen_separator? - !stack.empty? && last_token == :AND - end - - def suffix? - !@suffices.zero? || will_see_suffix? - end - - def will_see_suffix? - input.peek(8).to_s.strip.split(/\s+/)[0] =~ suffix - end - - def will_see_initial? - input.peek(6).to_s.strip.split(/\s+/)[0] =~ /^[[:upper:]]+\b/ - end - - def seen_full_name? - prefer_comma_as_separator? && @words > 1 && - (@initials > 0 || !will_see_initial?) && !will_see_suffix? - end - - def next_token - case - when input.nil?, input.eos? - nil - when input.scan(separator) - consume_separator - when input.scan(/\s*#{comma}\s*/) - if @commas.zero? && !seen_full_name? || @commas == 1 && suffix? - consume_comma - else - consume_separator - end - when input.scan(/\s+/) - next_token - when input.scan(title) - consume_word(:TITLE, input.matched.strip) - when input.scan(suffix) - consume_word(:SUFFIX, input.matched.strip) - when input.scan(appellation) - [:APPELLATION, input.matched.strip] - when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{stops}]*/) - consume_word(:UWORD, input.matched) - when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{stops}]*/) - consume_word(:LWORD, input.matched) - when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{stops}]*/) - consume_word(:PWORD, input.matched) - when input.scan(/('[^'\n]+')|("[^"\n]+")/) - consume_word(:NICK, input.matched[1...-1]) - else - raise ArgumentError, - "Failed to parse name #{input.string.inspect}: unmatched data at offset #{input.pos}" - end - end - - def on_error(tid, value, stack) - raise ArgumentError, - "Failed to parse name: unexpected '#{value}' at #{stack.inspect}" - end - -# -*- racc -*- |