Module:UCS

--	┌─────────────────────────────────────────┐ --	│ Makes the table of UCS (Unicode) characters for a reference page    │ --	└─────────────────────────────────────────┘

--       G l o b a l   v a r i a b l e s . local outbuff = { '{| class="wikitable"' } -- a sequence of (output) strings local outptr = 1  -- global pointer in outbuff local base_codepoint = 32 local Block = " Basic Latin" local row_start = 1 -- usually, pointer to the last " |-" in outbuff

--       U t i l i t y   f u n c t i o n s   s t a r t   h e r e . function puts( s ) --   mw.log("Output: "..s)    outptr = outptr + 1 outbuff[outptr] = s end

function close_row( NoC, s ) --   mw.log("close_row("..NoC..", "..s..")") if ( outptr > row_start ) then local columns_deficit = row_start + NoC - outptr if (columns_deficit > 0) then -- may not happen with correct input data local colspan='' if (columns_deficit > 1) then colspan='colspan='..columns_deficit..' ' end puts ( ' | '..colspan..' style="color:red" |'..s)       end puts(" |-") row_start = outptr end end

function mkchar( c ) if (       ( c < 36)  -- C0, space, !, ", #    or  ( c == 38 ) -- &    or  ( c >= 91 ) and ( c <= 93 ) -- [ \ ]    or  ( c >= 123 ) and ( c <= 125 ) -- { | }    or  ( c == 127 ) -- DEL, and ( c < 160 ) (C1) pointless    ) then        return '&#'..c..';'    end    return mw.ustring.char( c ) end

local hh = 0; function is_hex ( c ) if (c>102) then return -1 end if (c>=97) then -- a–f hh = c - 87 return hh   end if (c>70) then return -1 end if (c>=65) then -- A–F hh = c - 55 return hh   end if (c>=58) then return -1 end if (c>=48) then -- 0–9 hh = c - 48 return hh   end return (-1) end

function get_hex ( s, i ) local v = 0 while ( is_hex (string.byte( s, i)) >= 0 ) do       v = 16*v + hh        i = i + 1 end return v, i end --       U t i l i t y   f u n c t i o n s   e n d   h e r e .

local p = {}

--       T h e   a n n o t a t i o n s   p a r s e r   s t a r t s   h e r e . p.annot_map = { } function mk_item ( c ) if ( p.annot_map[c] ) then return (..mkchar(c)..) end return mkchar(c) end

function p.process_arg3 ( annots ) --   mw.log(" annots = "..annots) local iter = mw.ustring.gmatch( annots, "(%S+)(#.-)%s" ) while (true) do       local t, a;        t, a = iter if (not a) then return end --       mw.log(t.." → "..a)        for cpt in mw.ustring.gcodepoint( t ) do            p.annot_map[cpt] = a        end end end --       T h e   a n n o t a t i o n s   p a r s e r   e n d s   h e r e .

--       T h e   c h a r a c t e r   l i s t   p a r s e r   s t a r t s   h e r e . local bubu = 'style="color:#9900FF" ' local bgg = { bubu, bubu, bubu, 'bgcolor=#999999 ', bubu, bubu, bubu, bubu, 'bgcolor=#6600FF ', -- IPA 'style="background-color:#000000; color:#FFFF66" ', --combining diacritics -- Latin letters (K, L, M)   'bgcolor=#3333FF ', -- ASCII 'bgcolor=#3377FF ', -- less common 'bgcolor=#0099FF ', -- exotic -- Numbers (N) 'bgcolor=#FF9999 ', -- Control characters (O) 'bgcolor=#FFAA66 ', -- Punctuation (P, Q)   'bgcolor=#33FF33 ', -- common (English) 'bgcolor=#22AA22 ', -- less common bubu, -- Symbols (S, T, U)   'bgcolor=#FFFF66 ', -- common 'bgcolor=#CCFF66 ', -- box drawing / pseudographics 'bgcolor=#AAAA44 ', -- uncommon bubu, bubu, '', bubu, bubu, bubu,  bubu,  bubu,  bubu,  bubu, [0] = bubu }

function p.process_arg2 ( charlist ) local c_length = string.len ( charlist ) if ( c_length <= 1 ) then return 0 end local c_index = 1 while ( c_index <= c_length ) do       local c_code = string.byte( charlist, c_index ) if ( c_code == 43 ) then -- “+” base_codepoint, c_index = get_hex (charlist, c_index+1 ) if (               ( outptr == row_start + 1 )            and string.match( outbuff[outptr], '^ | style=')            ) then outbuff[outptr] = ' | colspan=33 ' .. string.sub( outbuff[outptr], 3) puts(" |-") row_start = outptr else close_row( 33, "Unfinished row") end elseif ( c_code == 33 ) then -- “!” close_row( 33, "Unexpected “!” command") local eol = string.find( charlist, "\n", c_index+1, true ) if (eol == nil) then break end Block = string.sub( charlist, c_index+1, eol-1 ) puts(               ' | style="font-size:80%" |U+' ..                string.format('%04x:',base_codepoint) .. Block            ) local o = base_codepoint % 32 if ( o > 0 ) then puts( ' | colspan='..o..' |' ) row_start = row_start - o + 1 -- temporary kludge end c_index = eol + 1 elseif ( ( c_code == 35 ) or ( c_code == 59 ) or ( c_code == 47 ) ) then local eol = string.find( charlist, "\n", c_index+1, true ) if (eol == nil) then break end if ( c_code == 35 ) then c_index = eol else c_index = eol + 1 end elseif ( c_code == 10 ) then -- line feed if (               ( outptr == row_start + 2 )  -- only one item in the row            and ( string.byte( charlist, c_index - 1 ) == 45 ) -- it is “-”            and string.match( outbuff[row_start+1], '^ | style=')            ) then outbuff[row_start+1] = ' | colspan=33 bgcolor=#FF6699 ' .. string.sub( outbuff[row_start+1], 3) outbuff[outptr] = " |-" row_start = outptr else close_row( 33, "(skipped)") -- temporary end base_codepoint = base_codepoint + ( (2097152 - base_codepoint) % 16 ) c_index = c_index + 1 else if ( outptr <= row_start ) then puts(                   ' | style="font-size:75%" |U+' ..                    string.format('%04x:',base_codepoint) .. Block                ) end if ( (c_code >= 65 ) and (c_code <= 122) ) then local dimin = '' if (c_code >= 96 ) then dimin = 'style="font-size:75%" ' end local item = mk_item(base_codepoint) if ( c_code%32 == 10 ) then item = '◌'..item end puts(' | '..bgg[c_code%32]..dimin..'|\t'..item) base_codepoint = base_codepoint + 1 --temporary elseif ( c_code == 45 ) then -- “-” puts(' | bgcolor=#AA4466 | ') base_codepoint = base_codepoint + 1 --temporary end -- ignore all other bytes c_index = c_index + 1 end end close_row( 33, "end of data") return 1 end --       T h e   c h a r a c t e r   l i s t   p a r s e r   e n d s   h e r e .

--       T h e   m a i n   r o u t i n e   s t a r t s   h e r e . function p.table( frame ) -- frame.args[1] is ignored now, but planned to affect the table format puts(" |Block(s)") for k = 0, 9 do       puts(" ! "..k)    end for k = 10, 31 do       puts(' ! style="font-size:75%; line-height:1.25" |'..string.format("%d %02x", k, k)) end close_row( 33, "???") if ( frame.args[3] ) then p.process_arg3 ( frame.args[3] ) end if ( frame.args[2] ) then p.process_arg2 ( frame.args[2] ) else p.process_arg2 ( [=[ PPPSSSSPPPSSPPPPNNNNNNNNNNPPSSSP SKKKKKKKKKKKKKKKKKKKKKKKKKKPPPSS DKKKKKKKKKKKKKKKKKKKKKKKKKKPPPS- +00A0! Latin-1 Supplement PQSSSSUPDSDQSOSDSSDDDSPPDDDQdddQ LLLLLLlLLLLLLLLLLLLLLLLSLLLLLLLL LLLLLLlLLLLLLLLLILLLLLLULLLLLLLL ]=] ) end outbuff[outptr] = " |}" return table.concat( outbuff, "\n" ) end --       T h e   m a i n   r o u t i n e   e n d s   h e r e .

return p