The rest of the filters

From ConTeXt wiki

< HTML_to_ConTeXt


# Now we transfer the syntactically altered html to a string Object
# and manipulate that object further


newdoc=@article.inner_html

# remove empty space in the beginning
newdoc.gsub!(/^\s+/,"")

# remove all elements we don't need.
newdoc.gsub!(/^<div.*/,"")
newdoc.gsub!(/^<\/div.*/,"")
newdoc.gsub!(/^<form.*/,"")
newdoc.gsub!(/^<\/form.*/,"")
newdoc.gsub!(/<p>/,"\n")
newdoc.gsub!(/<\/p>/,"\n")
newdoc.gsub!(/<\u>/,"")
newdoc.gsub!(/<\/u>/,"")
newdoc.gsub!(/<ul>/,"\\startitemize[1]")
newdoc.gsub!(/<\/ul>/,"\\stopitemize")
newdoc.gsub!(/<ol>/,"\\startitemize[n]")
newdoc.gsub!(/<\/ol>/,"\\stopitemize")
newdoc.gsub!(/<li>/,"\\item ")
newdoc.gsub!(/<\/li>/,"\n")
newdoc.gsub!("_","\\_")
newdoc.gsub!(/<table>/,"\\bTABLE \n")
newdoc.gsub!(/<\/table>/,"\\eTABLE \n")
newdoc.gsub!(/<tr>/,"\\bTR ")
newdoc.gsub!(/<\/tr>/,"\\eTR ")
newdoc.gsub!(/<td>/,"\\bTD ")
newdoc.gsub!(/<\/td>/,"\\eTD ")
newdoc.gsub!(/<th>/,"\\bTH ")
newdoc.gsub!(/<\/th>/,"\\eTH ")
newdoc.gsub!(/<center>/,"")
newdoc.gsub!(/<\/center>/,"")
newdoc.gsub!(/<em>/,"{\\em ")
newdoc.gsub!(/<\/em>/,"}")
newdoc.gsub!("^","")
newdoc.gsub!("\%","\\%")
newdoc.gsub!("&","&")
newdoc.gsub!("&",'\\\&')
newdoc.gsub!("$",'\\$')
newdoc.gsub!(/<tbody>/,"\\bTABLEbody \n")
newdoc.gsub!(/<\/tbody>/,"\\eTABLEbody \n")

# Context does not mind "_" in figures and does not recognize \_,
# so i have to catch these and replace \_ with _

# First catch
filter=/\/AnnRep07\/Figures\/(\w+\/)*(\w+\\_)*/

if newdoc[filter]
newdoc.gsub!(filter) { |fString|
fString.gsub("\\_","_")
}
end

# Second catch
filter2=/\/AnnRep07\/Figures\/(\w+\/)*\w+[-.]\w+\\_\w+/

if newdoc[filter2]
newdoc.gsub!(filter2) { |fString|
fString.gsub("\\_","_") }
end

# Third catch; remove \_ inside []
filter3=/\[\w+\\_\w+\]/

if newdoc[filter3]
newdoc.gsub!(filter3) { |fString|
puts fString
fString.gsub("\\_","_") }
end


# remove the comment tag, which we used to embed context commands
newdoc.gsub!("<!--","")
newdoc.gsub!("-->","")
                                # add full path to the images                                                                 
newdoc.gsub!("\/AnnRep07\/Figures\/","~\/AnnRep07\/Figures\/")

newdoc.gsub!(/<\w+\s*\/>/,"")

#puts newdoc
# open file for output
#outfil="#{oFile}.tex"
#`rm #{outfil}`

#fil=File.new(outfil,"a")
#puts "Writing #{oFile}"
oFile.write newdoc

end