# MathML饤֥
# $Id: x-math.rb 114 2005-08-13 15:04:36Z hiraku $
#
# Copyright (C) 2005, KURODA Hiraku <hiraku@hinet.mydns.jp>
# You can redistribute it and/or modify it under GPL2.
#

module MathML
	class Error < StandardError
	end

	# MathMLδ쥯饹
	class Base
		# ̾
		def ename
			''
		end

		# Ǥ°°򵭽Ҥݤϡ" name='value'"Ȥ(Ƭζ)
		def attribute
			''
		end

		# ʸؤѴ
		def to_s
			''
		end
	end

	# Ƥʤδ쥯饹
	class TagOnly < Base
		def to_s
			"<#{ename}#{attribute}/>"
		end
	end

	# <none>ǥ饹
	class None < TagOnly
		def ename
			'none'
		end
	end

	# <mspace>ǥ饹
	class Space < TagOnly
		def initialize(space)
			@space = space
		end

		def ename
			'mspace'
		end

		def attribute
			" width='#{@space}'"
		end
	end

	# Ƥĥδ쥯饹
	class WithContent < Base
		# ꤷʸñ̤1ʸǥȤ֤
		def indent(text)
			result = ''
			text.to_s.each_line do |l|
				result << ' '+l
			end
			return result
		end

		# ǤγϥȽλ
		def otag
			"<#{ename}#{attribute}>"
		end

		def ctag
			"</#{ename}>"
		end
	end

	# ʣǤƤȤƻƤ(<math>, <mrow>)δ쥯饹
	class BlockBase < WithContent
		def initialize
			@contents=Array.new
		end

		def <<( content )
			@contents << content if content
			self
		end

		def last
			@contents.last
		end

		def pop
			@contents.pop
		end

		# Ƥ򥤥ǥȤʸˤ֤
		def contents
			indent(@contents.join("\n"))
		end

		def to_s
			@contents.nitems>0 ? %Q[#{otag}\n#{contents}\n#{ctag}] : %Q[#{otag}#{ctag}]
		end

		def pop
			@contents.pop
		end
	end

	# 1ġޤʸƤȤ(<mi>, <mo>)δ쥯饹
	class InlineBase < WithContent
		def <<(c)
			@content = c
			self
		end

		def content
			@content.to_s
		end

		# otag=<hoge>, ctag=</hoge>Ԥ
		def to_s
			c = content
			l = c.count("\n")
			l += 1 if /[^\n]\z/ =~ c
			if l>1 then
				%Q[#{otag}\n#{indent(c)}\n#{ctag}]
			else
				"#{otag}#{c}#{ctag}"
			end
		end
	end

	module Variant
		NON = -1
		NORMAL = 0
		BOLD = 1

		attr_accessor :variant

		def initialize
			super
			@variant = NON
		end

		def attribute
			case @variant
			when NORMAL
				v = 'normal'
			when BOLD
				v = 'bold'
			else
				return ''
			end
			%Q[ mathvariant='#{v}']
		end
	end

	module Align
		CENTER=0
		LEFT=1
		RIGHT=2
	end

	# <math>ǥ饹
	class Math < BlockBase
		def initialize(displaystyle=false)
			super()
			@ds = displaystyle
		end

		def style
			if @ds then
				'block'
			else
				'inline'
			end
		end

		def ename
			'math'
		end

		def attribute
			%Q[ xmlns='http://www.w3.org/1998/Math/MathML' display='#{style}']
		end
	end

	# <mrow>ǥ饹
	class Row < BlockBase
		def ename
			'mrow'
		end
	end

	class Fenced < BlockBase
		attr_accessor :open, :close
		def ename
			'mfenced'
		end

		def attribute
			open = @open ? @open : '.'
			open='{' if open=='\{'
			open='' if open=='.'

			close = @close ? @close : '.'
			close='}' if close=='\}'
			close='' if close=='.'

			r = ""
			r << %Q[ open='#{open}'] 
			r << %Q[ close='#{close}']
		end
	end

	# <mfrac>ǥ饹
	class Frac < InlineBase
		attr_accessor :numerator

		def initialize
			super
			@numerator=nil
		end

		def ename
			'mfrac'
		end

		def content
			r = @numerator ? @numerator.to_s : None.new.to_s
			r << "\n" << @content.to_s
		end
	end

	# <mover>ǥ饹
	class Over < InlineBase
		attr_accessor :over

		def initialize
			super
			@over=nil
		end

		def ename
			'mover'
		end

		def content
			r = %Q[#{@content.to_s}\n#{@over.to_s}]
		end
	end

	# <munder>ǥ饹
	class Under < InlineBase
		attr_accessor :under

		def initialize
			super
			@under=nil
		end

		def ename
			'munder'
		end

		def content
			r = %Q[#{@content.to_s}\n#{@under.to_s}]
		end
	end

	# <msubsup>ǥ饹(<msub>, <msup>ͤ)
	class SubSup < InlineBase
		attr_accessor :sub, :sup

		def initialize
			super
			@sub=nil
			@sup=nil
		end

		def ename_sub
			'sub'
		end

		def ename_sup
			'sup'
		end

		def content
			r = @content.to_s
			r = None.new.to_s if r==''
			r << ("\n" << @sub.to_s) if @sub
			r << ("\n" << @sup.to_s) if @sup
			return r
		end

		def ename
			if @sub && @sup then
				"m#{ename_sub}#{ename_sup}"
			elsif @sub then
				"m#{ename_sub}"
			elsif @sup
				"m#{ename_sup}"
			else
				raise Error.new('No sub&sup.')
			end
		end

		def to_s
			if @sub || @sup then
				super
			else
				@content.to_s
			end
		end
	end

	# <munderover>ǥ饹
	class UnderOver < SubSup
		def ename_sub
			'under'
		end

		def ename_sup
			'over'
		end
	end

	# <mn>ǥ饹
	class Number < InlineBase
		include Variant
		def ename
			"mn"
		end
	end

	# <mi>ǥ饹
	class Identifier < InlineBase
		include Variant
		def ename
			"mi"
		end
	end
	
	# <mo>ǥ饹
	class Operator < InlineBase
		def ename
			"mo"
		end
	end

	# <mtext>ǥ饹
	class Text < InlineBase
		def ename
			"mtext"
		end
	end

	# <msqrt>ǥ饹
	class Sqrt < InlineBase
		def ename
			"msqrt"
		end
	end

	# <mtable>ǥ饹
	class Table < BlockBase
		def ename
			'mtable'
		end
	end

	# <mtr>ǥ饹
	class Tr < BlockBase
		def ename
			'mtr'
		end
	end

	# <mtd>ǥ饹
	class Td < BlockBase
		include Align
		attr :align, true

		def initialize
			@align = CENTER
			super
		end

		def ename
			'mtd'
		end

		def attribute
			r = super
			case @align
			when CENTER
				r << ""
			when LEFT
				r << " columnalign='left'"
			when RIGHT
				r << " columnalign='right'"
			end
		end
	end

	# ʲMathMLؤѴѥ饹ʤ
	def convert_from_tex(tex, displaystyle=false)
		TeX2MathML::Convertor.new(tex, displaystyle).to_s
	end
	module_function :convert_from_tex

	module TeX2MathML
		REG_WHITESPACE = /\A\s+/
		REG_NUMERICS = /\A\s*(\d)/
		REG_MULTI_NUMERICS = /\A\s*((\.\d+)|(\d+(\.\d+)?))/
		REG_OPERATORS = /\A\s*([,\.\+\-\*=\/\(\)\[\]<>"'|;:!])/
		REG_ALPHABETS = /\A\s*([a-zA-Z])/
		REG_BLOCK_OPEN = /\A\s*\{/
		REG_BLOCK_CLOSE = /\A\s*\}/
		REG_COMMANDS = /\A\s*\\([a-zA-Z]+|[\\%&:;\{\}\$\#])\s*/
		REG_PARAM = /\A\s*\{\s*([^\}]+)\s*\}\s*/
		REG_WBSLASH = /\A\s*\\\\\s*/
		REG_BRACES = /\A\s*([.|\[\]\(\)]|\\[\{\}]|\\[a-zA-Z]+)\s*/
		BRACE_COMMANDS = ["lfloor", "rfloor", "lceil", "rceil", "langle",
			"rangle", "{", "}"]

		UNDEROVERS = ['sum', 'prod', 'coprod', 'bigcap', 'bigcup', 'bigsqcup',
			'bigvee', 'bigwedge', 'bigodot', 'bigotimes', 'bigoplus', 'biguplus']
		IDENTIFIERS = ['aleph', 'imath', 'jmath', 'ell', 'wp', 'Re', 
			'flat', 'natural', 'sharp', 'spadesuit', 'Diamond', 'triangle',
			'clubsuit', 'diamondsuit', 'heartsuit', 'Im', 'mho', 'angle']
		REPLACE_IDENTIFIERS = { 'hbar'=>'hslash', 'Box'=>'square',
			'infty'=>'infin', 'emptyset'=>'empty'}
		ESCAPED_OPERATORS = ['\\', '%', '{', '}', '$', '#']
		GREEK_S = ['alpha', 'beta', 'gamma', 'delta', 'epsilon', 'varepsilon',
			'zeta', 'eta', 'theta', 'vartheta', 'iota', 'kappa', 'lambda', 'mu',
			'nu', 'xi', 'pi', 'varpi', 'rho', 'varrho', 'sigma', 'varsigma',
			'tau', 'upsilon', 'phi', 'varphi', 'chi', 'psi', 'omega']
		GREEK_L = ['Gamma', 'Delta', 'Theta', 'Lambda', 'Xi', 'Pi', 'Sigma',
			'Upsilon', 'Phi', 'Psi', 'Omega']
		REPLACE_OPERATORS = {'&'=>'amp', 'dots'=>'mldr', 'ldots'=>'mldr',
			'cdots'=>'ctdot', 'surd'=>'radic', 'partial'=>'part',
			'exists'=>'exist', 'neg'=>'not', 'lhd'=>'vrtri', 'rhd'=>'vltri',
			'unlhd'=>'ltrie', 'unrhd'=>'rtrie', 'neq'=>'ne', 'cdot'=>'sdot',
			'amalg'=>'coprod', 'circ'=>'compfn', 'asymp'=>'CupCap',
			'preceq'=>'prcue', 'leadsto'=>'zigrarr', 'to'=>'rightarrow'}
		OPERATORS = ['int', 'oint', 'pm', 'prime', 'nabla', 'top', 'bot',
			'forall', 'vdash', 'dashv', 'mp', 'times', 'div', 'ast', 'star',
			'dagger', 'ddagger', 'cap', 'cup', 'uplus', 'sqcap', 'sqcup', 'vee',
			'wedge', 'setminus', 'wr', 'bullet', 'diamond', 'oslash', 'odot',
			'bigcirc', 'bigtriangleup', 'bigtriangledown', 'triangleleft', 
			'triangleright', 'oplus', 'ominus', 'otimes', 'le', 'leq', 'ge',
			'geq', 'sim', 'll', 'gg', 'doteq', 'simeq', 'subset', 'supset',
			'approx', 'subseteq', 'supseteq', 'cong', 'smile', 'sqsubset',
			'sqsupset', 'equiv', 'frown', 'sqsubseteq', 'sqsupseteq', 'propto',
			'bowtie', 'in', 'ni', 'prec', 'succ', 'succeq', 'models', 'perp',
			'parallel', 'mid', 'rightarrow', 'leftarrow', 'uparrow', 'downarrow',
			'longrightarrow', 'longleftarrow', 'Rightarrow', 'Leftarrow',
			'Uparrow', 'Downarrow', 'Longrightarrow', 'Longleftarrow',
			'leftrightarrow', 'longleftrightarrow', 'Leftrightarrow',
			'Longleftrightarrow', 'updownarrow', 'Updownarrow', 'mapsto',
			'longmapsto', 'hookrightarrow', 'hookleftarrow', 'rightharpoonup',
			'leftharpoonup', 'rightharpoondown', 'leftharpoondown',
			'rightleftharpoons', 'nearrow', 'searrow', 'swarrow', 'nwarrow']
		STRINGS = ['arccos', 'ker', 'arcsin', 'cos', 'cosh', 'csc', 'min', 'deg',
			'exp', 'sinh', 'gcd', 'lg', 'ln', 'Pr', 'arctan', 'cot', 'det', 'hom',
			'log', 'sec', 'tan', 'arg', 'coth', 'dim', 'max', 'sin', 'tanh']
		DISP_STRINGS = ['sup', 'lim', 'inf']
		REPLACE_DISP_STRINGS = {'limsup'=>'lim sup', 'liminf'=>'lim inf'}
		OVERS = {'hat'=>'circ', 'breve'=>'smile', 'grave'=>'grave',
			'acute'=>'acute', 'dot'=>'sdot', 'ddot'=>'nldr', 'tilde'=>'tilde',
			'bar'=>'macr', 'vec'=>'rightarrow', 'check'=>'vee', 'widehat'=>'circ',
			'overline'=>'macr', 'widetilde'=>'tilde', 'overbrace'=>'OverBrace'}
		UNDERS = {'underbrace'=>'UnderBrace', 'underline'=>'macr'}

		# ե̾
		module Fonts
			NORMAL = 0
			BOLD = 1
			BLACKBOLD = 2
			SCRIPT = 3
			FRAKTUR = 4
			ROMAN = 5
		end

		class ConvertError < Error
			attr_accessor :tex, :remained_tex
			def initialize(tex, remained_tex, message)
				super(message)
				@tex = tex
				@remained_tex = remained_tex
			end
		end

		class Convertor
			include TeX2MathML
			@@unsecure_entity = false
			@@entity_list = Hash.new

			def self.add_entity_list(list)
				list.each do |i|
					@@entity_list[i] = true
				end
			end

			def self.unsecure_entity=(flg)
				@@unsecure_entity = flg
			end

			def initialize(tex, displaystyle=false)
				@tex = tex
				@ds = displaystyle
				@math = Array.new
			end

			def push( new_block, font=@math.last[1])
				@math << [new_block, font]
			end

			def pop_block
				@math.pop[0]
			end

			def last_block
				@math.last[0]
			end

			def last_font(font=@math.last[1])
				@math.last[1] = font
			end

			def to_s
				begin
					original = @tex.clone
					@tex = @tex.gsub(/\\\\/){"\001"}.
						gsub(/\\%/){"\002"}.
						gsub(/%.*$/){''}.
						gsub(/\002/){'\\%'}.
						gsub(/\001/){'\\\\'}
					push(Math.new(@ds), Fonts::NORMAL)
					until @tex =~ /\A\s*\z/
						last_block << parse(true)
					end
					pop_block.to_s
				rescue Error => e
					raise ConvertError.new(original, @tex, e.message)
				end
			end

			def parse(multi_num=false)
				raise Error.new('Unexpected end of TeX.') if @tex.size==0
				reg_num = multi_num ? REG_MULTI_NUMERICS : REG_NUMERICS
				case @tex
				when reg_num
					parse_num(multi_num)
				when REG_ALPHABETS
					parse_char
				when REG_OPERATORS
					parse_operator
				when REG_BLOCK_OPEN
					parse_block
				when /\A\s*_/
					parse_sub
				when /\A\s*\^/
					parse_sup
				when REG_COMMANDS
					parse_command
				else
					raise Error.new('Syntax error.')
				end
			end

			def parse_num(multi)
				n = Number.new
				n.variant = (last_font==Fonts::BOLD) ? Variant::BOLD : Variant::NON
				if multi then
					n << @tex.slice!(REG_MULTI_NUMERICS).slice(REG_MULTI_NUMERICS, 1)
				else
					n << @tex.slice!(REG_NUMERICS).slice(REG_NUMERICS, 1)
				end
			end

			def parse_char
				c = @tex.slice!(REG_ALPHABETS).slice(REG_ALPHABETS, 1)
				v = Variant::NON
				case @math.last[1]
				when Fonts::ROMAN
					v = Variant::NORMAL
				when Fonts::BOLD
					v = Variant::BOLD
				when Fonts::BLACKBOLD
					c = %Q[&#{c}opf;]
				when Fonts::SCRIPT
					c = %Q[&#{c}scr;]
				when Fonts::FRAKTUR
					c = %Q[&#{c}fr;]
				end
				i = Identifier.new
				i.variant = v
				i << c
			end

			def parse_operator
				o = @tex.slice!(REG_OPERATORS).slice(REG_OPERATORS,1)
				case o
				when '<'
					o ='&lt;'
				when '>'
					o = '&gt;'
				when '"'
					o = '&quot;'
				end
				Operator.new << o
			end

			def parse_block
				push(Row.new)
				@tex.slice!(REG_BLOCK_OPEN)
				until @tex=~REG_BLOCK_CLOSE
					raise Error.new('Brace unclosed.') if @tex.size==0
					last_block << parse(true)
				end
				@tex.slice!(REG_BLOCK_CLOSE)
				pop_block
			end

			def parse_sub
				@tex.slice!(/\A\s*\_/)

				if !last_block.last.is_a?(SubSup) then
					l = last_block.pop
					ss = SubSup.new
					ss << l
					last_block << ss
				end

				raise Error.new('Double subscript.') if last_block.last.sub != nil
				p = parse
				raise Error.new('Need any symbol here.') unless p

				last_block.last.sub = p
				last_block.pop
			end

			def parse_sup
				@tex.slice!(/\A\s*\^/)

				if !last_block.last.is_a?(SubSup) then
					l = last_block.pop
					ss = SubSup.new
					ss << l
					last_block << ss
				end

				raise Error.new('Double superscript.') if last_block.last.sup != nil
				p = parse
				raise Error.new('Need any symbol here.') unless p

				last_block.last.sup = p
				last_block.pop
			end

			def entity(str)
				str.sub(/^(.*)$/){"&#{$1};"}
			end

			def parse_command
				com = @tex.slice!(REG_COMMANDS).slice(REG_COMMANDS, 1)

				return ((@ds ? UnderOver : SubSup).new << (Operator.new << entity(com))) if UNDEROVERS.include?(com)

				if IDENTIFIERS.include?(com) then
					i = Identifier.new << entity(com)
					i.variant = Variant::NORMAL
					return i
				end

				if REPLACE_IDENTIFIERS.key?(com) then
					i = Identifier.new << entity(REPLACE_IDENTIFIERS[com]) 
					i.variant = Variant::NORMAL
					return i
				end

				return Operator.new << com if ESCAPED_OPERATORS.include?(com)

				if GREEK_S.include?(com) then
					g = Identifier.new << entity(com)
					return g
				end

				if GREEK_L.include?(com) then
					g = Identifier.new << entity(com)
					g.variant = Variant::NORMAL
					return g
				end

				return Operator.new << entity(REPLACE_OPERATORS[com]) if REPLACE_OPERATORS.key?(com)

				return Operator.new << entity(com) if OPERATORS.include?(com)

				return Identifier.new << com if STRINGS.include?(com)

				return ((@ds ? UnderOver : SubSup).new << (Identifier.new << com)) if DISP_STRINGS.include?(com)
				return ((@ds ? UnderOver : SubSup).new << (Identifier.new << REPLACE_DISP_STRINGS[com])) if REPLACE_DISP_STRINGS.key?(com)

				if OVERS.key?(com)
					o = Over.new
					o.over = (Operator.new << entity(OVERS[com]))
					o << parse
					return o
				end

				if UNDERS.key?(com)
					u = Under.new
					u.under = (Operator.new << entity(UNDERS[com]))
					u << parse
					return u
				end

				case com
				when 'entity'
					parse_user_entity
				when 'stackrel'
					o = Over.new
					p = parse
					p = Operator.new << p unless p.is_a?(Operator)
					o.over = p
					p = parse
					p = Operator.new << p unless p.is_a?(Operator)
					return o << p
				when 'backslash'
					Operator.new << '\\'
				when 'quad'
					Space.new("1em")
				when 'qquad'
					Space.new("2em")
				when ':'
					Space.new("0.222em")
				when ';'
					Space.new("0.278em")
				when 'it'
					last_font(Fonts::NORMAL)
					return nil
				when 'rm'
					last_font(Fonts::ROMAN)
					return nil
				when 'bf'
					last_font(Fonts::BOLD)
					return nil
				when 'mathit'
					r = Row.new
					push(r, Fonts::NORMAL)
					last_block << parse
					return pop_block
				when 'mathrm'
					r = Row.new
					push(r, Fonts::ROMAN)
					last_block << parse
					return pop_block
				when 'mathbf'
					r = Row.new
					push(r, Fonts::BOLD)
					last_block << parse
					return pop_block
				when 'mathbb'
					r = Row.new
					push(r, Fonts::BLACKBOLD)
					last_block << parse
					return pop_block
				when 'mathscr'
					r = Row.new
					push(r, Fonts::SCRIPT)
					last_block << parse
					return pop_block
				when 'mathfrak'
					r = Row.new
					push(r, Fonts::FRAKTUR)
					last_block << parse
					return pop_block
				when 'frac'
					f = Frac.new
					f.numerator = parse
					f << parse
				when 'sqrt'
					s = Sqrt.new
					s << parse
				when 'mbox'
					parse_mbox
				when 'left'
					parse_left_right("right")
				when 'bigg'
					parse_left_right("bigg")
				when 'begin'
					parse_environment
				else
					raise Error.new("Command '#{com}' is not defined.")
				end
			end

			def parse_user_entity
				raise Error.new("Need parameter.") unless @tex=~REG_PARAM
				param = @tex.slice!(REG_PARAM).slice(REG_PARAM, 1)
				raise Error.new("Unregisted entity \"#{param}\"") unless @@unsecure_entity || @@entity_list[param]
				Operator.new << entity(param)
			end

			def parse_mbox
				single = /\A\s*([^\{\}])\s*/
				block = /\A\s*\{([^\}]*?)\}\s*/
				if @tex=~single then
					s = @tex.slice!(single).slice(single, 1)
				elsif
					s = @tex.slice!(block).slice(block, 1)
				else
					raise Error.new("Syntax error.")
				end
				Text.new << s
			end

			def parse_left_right(right)
				f = Fenced.new
				raise Error.new('Need brace here.') unless @tex=~REG_BRACES

				def complement_brace(str)
					if str=~/\\[a-zA-Z]+/ then
						str = str.slice(/\\([a-zA-Z]+)/, 1)
						raise Error.new("Symbol'#{str}' is not brace.") unless BRACE_COMMANDS.include?(str)
						entity(str)
					elsif str=~/\\[\{\}]/ then
						str.slice(/\\([\{\}])/, 1)
					else
						str
					end
				end

				f.open = complement_brace(@tex.slice!(REG_BRACES).slice(REG_BRACES, 1))
				push(Row.new)
				until @tex=~/\A\s*\\#{right}/
						last_block << parse
					raise Error.new('Brace unclosed.') if @tex.size==0
				end
				@tex.slice!(/\A\s*\\#{right}/)
				f.close = complement_brace(@tex.slice!(REG_BRACES).slice(REG_BRACES, 1))
				f << pop_block
				return f
			end

			def parse_environment
				raise Error.new('Need parameter here.') unless @tex =~ REG_PARAM
				e = @tex.slice!(REG_PARAM).slice(REG_PARAM, 1)
				case e
				when 'array'
					parse_array
				else
					raise Error.new("Environment '#{e}' is not defined.")
				end
			end

			END_ARRAY = /\A\s*\\end\s*\{\s*array\s*\}/

			def parse_array
				raise Error.new('Need parameter here.') unless @tex =~ REG_PARAM
				layout = @tex.slice!(REG_PARAM).slice(REG_PARAM, 1)
				raise Error.new('Need layout symbol(lrc) here') unless layout =~ /[lrc]+/
				t = Table.new
				until @tex =~ END_ARRAY
					raise Error.new('Unexpected end of TeX') if @tex.size==0
					t << parse_table_row(layout)
					if @tex =~ END_ARRAY then
						break
					elsif !(@tex =~ REG_WBSLASH)
						raise Error.new('Need w-backslash here.')
					end
					@tex.slice!(REG_WBSLASH)
				end
				@tex.slice!(END_ARRAY)
				return t
			end

			def parse_table_row(layout)
				r = Tr.new
				last = false
				layout.each_byte{|b|
					raise Error.new('Unexpected end of item of table') if last
					d = parse_table_data
					case b.chr
					when 'r'
						d.align = Align::RIGHT
					when 'l'
						d.align = Align::LEFT
					end
					r << d
					if @tex=~END_ARRAY || @tex=~REG_WBSLASH then
						last = true
					else
						@tex.slice!(/\A\s*&/)
					end
				}
				raise Error.new('Unexpected end of array.') unless last
				return r
			end

			def parse_table_data
				d = Td.new

				push(d)
				until (@tex=~/\A\s*(&|\\\\)\s*/)||(@tex=~END_ARRAY)
					d << parse(true)
				end
				return pop_block
			end
		end
	end
end
