require 'unicode' # Normalizes token text to lower case. class UnicodeLowerCaseFilter def initialize(token_stream) @input = token_stream end def text=(text) @input.text = text end def next() t = @input.next() if (t == nil) return nil end t.text = Unicode.downcase(t.text) return t end end class VietnameseAnalyzer < Ferret::Analysis::Analyzer include Ferret::Analysis # Standard Character mappings to remove all special characters # so only default ASCII characters get indexed CHARACTER_MAPPINGS = { ['á','à','ạ','ả','ã','ă','ắ','ằ','ặ','ẳ','ẵ','â','ấ','ầ','ậ','ẩ','ẫ'] => 'a', ['đ'] => 'd', ['é','è','ẹ','ẻ','ẽ','ê','ế','ề','ệ','ể','ễ'] => 'e', ['í','ì','ị&