Module:Plain text/sandbox
Documentation for this module may be created at Module:Plain text/sandbox/doc
--converts text with wikilinks to plain text, e.g "[[foo|gah]] is [[bar]]" to "gah is bar"
--removes anything enclosed in tags that isn't nested, mediawiki strip markers (references etc), files, italic and bold markup
local p = {}
function p.main(frame)
local text = frame.args[1]
local encode = require('Module:yesno')(frame.args.encode)
return p._main(text, encode)
end
function p._main(text, encode)
if not text then return end
text = mw.text.killMarkers(text)
:gsub(' ', ' ') --replace nbsp spaces with regular spaces
:gsub('<br ?/?>', ', ') --replace br with commas
:gsub('<(%a+)[^>]+>(.-)</%1>', function(tag, contents)
if tag:lower() == 'span' then
return contents
else
return ''
end
end)
:gsub('<i[^>]+>([^<]+)</i>', '%1') --remove italics while keeping text inside
:gsub('<[^>]+>[^<]+<[^>]+>', '') --strip out remaining tags and the text inside
:gsub('%b<>', '') --remove any other tag markup
:gsub('__[^_]+__', '') --remove __ markups
:gsub('^=+[^=]+=+', ''):gsub('\n=+[^=]+=+', '') --remove section titles
:gsub('%b[]',
function(bracketed)
return bracketed:gsub('^%[%[%s*(%a+):.-%]%]$',
function(link_prefix)
link_prefix = link_prefix:lower()
if link_prefix == 'image' or link_prefix == 'file'
or link_prefix == 'media' or link_prefix == 'category' then
return ""
end -- otherwise leave it alone
end)
end)
:gsub('%[%[[^%]|]+|', '') --strip out piped link text
:gsub('([^%[])%[[^%[%]][^%]]-%s', '%1') --strip out external link text
:gsub('^%[[^%[%]][^%]]-%s', '') --strip out external link text
:gsub('[%[%]]', '') --then strip out remaining [ and ]
:gsub("'''''", "") --strip out bold italic markup
:gsub("'''?", "") --not stripping out '''' gives correct output for bolded text in quotes
:gsub('----', '') --remove ---- lines
:gsub('^%s+', ''):gsub('\n%s+', '\n') --strip leading
:gsub('%s+$', ''):gsub('%s+\n', '\n') --and trailing spaces
:gsub('(%s)%s+', '%1') --strip redundant spaces
if encode then
return mw.text.encode(text)
else
return text
end
end
return p