< HTML_to_ConTeXt
# Now we transfer the syntactically altered html to a string Object
# and manipulate that object further
newdoc=@article.inner_html
# remove empty space in the beginning
newdoc.gsub!(/^\s+/,"")
# remove all elements we don't need.
newdoc.gsub!(/^<div.*/,"")
newdoc.gsub!(/^<\/div.*/,"")
newdoc.gsub!(/^<form.*/,"")
newdoc.gsub!(/^<\/form.*/,"")
newdoc.gsub!(/<p>/,"\n")
newdoc.gsub!(/<\/p>/,"\n")
newdoc.gsub!(/<\u>/,"")
newdoc.gsub!(/<\/u>/,"")
newdoc.gsub!(/<ul>/,"\\startitemize[1]")
newdoc.gsub!(/<\/ul>/,"\\stopitemize")
newdoc.gsub!(/<ol>/,"\\startitemize[n]")
newdoc.gsub!(/<\/ol>/,"\\stopitemize")
newdoc.gsub!(/<li>/,"\\item ")
newdoc.gsub!(/<\/li>/,"\n")
newdoc.gsub!("_","\\_")
newdoc.gsub!(/<table>/,"\\bTABLE \n")
newdoc.gsub!(/<\/table>/,"\\eTABLE \n")
newdoc.gsub!(/<tr>/,"\\bTR ")
newdoc.gsub!(/<\/tr>/,"\\eTR ")
newdoc.gsub!(/<td>/,"\\bTD ")
newdoc.gsub!(/<\/td>/,"\\eTD ")
newdoc.gsub!(/<th>/,"\\bTH ")
newdoc.gsub!(/<\/th>/,"\\eTH ")
newdoc.gsub!(/<center>/,"")
newdoc.gsub!(/<\/center>/,"")
newdoc.gsub!(/<em>/,"{\\em ")
newdoc.gsub!(/<\/em>/,"}")
newdoc.gsub!("^","")
newdoc.gsub!("\%","\\%")
newdoc.gsub!("&","&")
newdoc.gsub!("&",'\\\&')
newdoc.gsub!("$",'\\$')
newdoc.gsub!(/<tbody>/,"\\bTABLEbody \n")
newdoc.gsub!(/<\/tbody>/,"\\eTABLEbody \n")
# Context does not mind "_" in figures and does not recognize \_,
# so i have to catch these and replace \_ with _
# First catch
filter=/\/AnnRep07\/Figures\/(\w+\/)*(\w+\\_)*/
if newdoc[filter]
newdoc.gsub!(filter) { |fString|
fString.gsub("\\_","_")
}
end
# Second catch
filter2=/\/AnnRep07\/Figures\/(\w+\/)*\w+[-.]\w+\\_\w+/
if newdoc[filter2]
newdoc.gsub!(filter2) { |fString|
fString.gsub("\\_","_") }
end
# Third catch; remove \_ inside []
filter3=/\[\w+\\_\w+\]/
if newdoc[filter3]
newdoc.gsub!(filter3) { |fString|
puts fString
fString.gsub("\\_","_") }
end
# remove the comment tag, which we used to embed context commands
newdoc.gsub!("<!--","")
newdoc.gsub!("-->","")
# add full path to the images
newdoc.gsub!("\/AnnRep07\/Figures\/","~\/AnnRep07\/Figures\/")
newdoc.gsub!(/<\w+\s*\/>/,"")
#puts newdoc
# open file for output
#outfil="#{oFile}.tex"
#`rm #{outfil}`
#fil=File.new(outfil,"a")
#puts "Writing #{oFile}"
oFile.write newdoc
end