#!/usr/bin/perl # Purpose: convert latex to html # URL: http://materia.fisica.unimi.it/manini/scripts/convert2html $version="4.37"; $date="12 Jun 2025"; sub print_help { print STDERR < outputfile.html] do clean LaTex into html conversion -nf no figures conversion -h print this help Other uses: LaTex into MS-Word conversion (reads html) This script generates fairly clean and smooth html files (without the messy images of latex2html, avoiding deprecated to render Greeks & symbols). Decent conversion guesses for tabulars + (e)psfig-ures ... looks for png/gif/jpg/bmp figures of same name! Conversion done by pattern matching, using no parser: warning, it could be easily tricked into mistakes! see http://www.w3.org/MarkUp/html3/mathsym.html v. $version, by Nick Manini, $date EOF return; } $dofigures=1; while($p=shift(@ARGV)){ if($p=~/^[^\-]/){#filename unless starting with - if(-e $p){ push(@inputfiles,$p); }else{ print STDERR "ERROR: file $p does not exist\n"; } next; } if($p eq "-nf"){# forces brutal max $dofigures=0; next; } if($p eq "-h"){# [-h] print help print_help; exit; } print_help; print STDERR "ERROR: invalid option $p\n "; exit; } if($#inputfiles<0){ push(@inputfiles,"-"); # adds stdio for lack of arguments } # the following definitions should match the local site needs: $localsiteURL='http://www.unimi.it/'; $localsiteLOGO='http://materia.fisica.unimi.it/manini/images/UniversitasMediolanensis.gif'; $localsiteURLandlogo= ' local '; $localgroupURL="http://eng.fisica.unimi.it/ecm/home/research/research-groups/physics-of-matter"; #old: http://www.fisica.unimi.it/" $localauthorURL="http://materia.fisica.unimi.it/manini/"; $localimagefolder="images"; # e.g. "images" or "figs" or even ".", not ""!! @imageextensions=("png","gif","jpg","jpeg","bmp"); # formats of images $n=$#imageextensions; for ($i=0;$i<=$n;$i++){ push(@imageextensions,uc $imageextensions[$i]); } $Converter="Nicola Manini"; $URLofConverter="http://materia.fisica.unimi.it/manini/"; $numberofURLedref=0; # automatically inserted hyperlinks (this list may be expanded at will): %repl=( "PCAM","http://www.pcam-doctorate.eu/", "E\\.T\\.S\\.F\\.|European Theoretical Spectroscopy Facility|e-I3-ETSF","http://www.etsf.eu/", "NANOQUANTA","https://www.cmt.york.ac.uk/nanoquanta/", "NFFA-Europe","https://www.nffa.eu/", "NFFA-EUROPE PILOT","https://cordis.europa.eu/project/id/101007417", "c\\+\\+","http://www.cplusplus.com/", "emacs","http://www.gnu.org/software/emacs/", "[lL]a[tT]e[xX]","http://www.latex-project.org/", "xmgrace","http://plasma-gate.weizmann.ac.il/Grace/", "gimp","http://www.gimp.org/", "xfig","http://www.xfig.org/", "libreoffice","http://www.libreoffice.org/", "perl","https://www.perl.org/", "python","https://www.python.org/", "google drive","https://en.wikipedia.org/wiki/Google_Drive", "[mM]athematica","http://www.wolfram.com/mathematica/", "mpi","http://mpi-forum.org/", "sed","https://en.wikipedia.org/wiki/Sed", "awk","https://en.wikipedia.org/wiki/AWK", "lapack","http://www.netlib.org/lapack/", "rawtherapee","http://rawtherapee.com/", "Numerical Recipes","http://numerical.recipes/", "UTFROM","https://sites.google.com/uniroma1.it/prin-utfrom/home-page", "CEFR","https://en.wikipedia.org/wiki/Common_European_Framework_of_Reference_for_Languages", "E\\.S\\.R\\.F\\.","http://www.esrf.eu/", "I\\.N\\.F\\.M\\.","https://it.wikipedia.org/wiki/Istituto_nazionale_per_la_fisica_della_materia", "SISSA|S\\.I\\.S\\.S\\.A\\.|I\\.S\\.A\\.S\\.","http://www.sissa.it/", "I\\.C\\.T\\.P\\.","http://www.ictp.it/", "Area Science Park","http://www.area.trieste.it/", "Physics Olympiads","http://ipho.org/", "Sincrotrone Trieste","http://www.elettra.trieste.it/", "European XFEL Project","http://xfel.desy.de/", "DESY","http://www.desy.de/", "Stanford Univ(\\.|ersity)","http://www.stanford.edu/", "Yale Univ(\\.|ersity)","https://www.yale.edu/", "Universit.* degli [sS]tudi di Trento|Trento University","http://www.unitn.it/", "Universit.* degli [sS]tudi di Milano|Milan University","http://www.unimi.it/", "The Electrochemical Society, Inc.","http://www.electrochem.org/", "(Oleg|O\\.) ?M\. Braun","http://www.iop.kiev.ua/~obraun/", "(Andrea|A\\.) Dal Corso","http://www.sissa.it/~dalcorso/", "(Paolo|P\\.) De Los Rios","http://people.epfl.ch/paolo.delosrios", "(Sebastian|S\\.) Doniach","http://www.stanford.edu/dept/app-physics/cgi-bin/person/doniach-sebastian/", "(Olle|O\\.) Gunnarsson","http://www2.fkf.mpg.de/andersen/users/gunnarsson/gunnarsson.html", "(Francesco|F\\.) Iachello","http://physics.yale.edu/people/francesco-iachello", "(Davide|D\\.E\\.) Galli","http://materia.fisica.unimi.it/~dgalli/", "(Enrico|E\\.) Gnecco","http://www.mfm.uni-jena.de/en/People/Prof_+Gnecco.html", "(Francesco|F\\.) Montalenti","https://www.unimib.it/francesco-cimbro-mattia-montalenti", "(Stefano|S\\.) Oss","http://www5.unitn.it/People/it/Web/Persona/PER0003759#INFO", "(Fabio|F\\.) Pistolesi","https://www.loma.cnrs.fr/fabio-pistolesi/", "(Luca|L\\.) Salasnich","http://materia.dfa.unipd.it/salasnich/", "(Alberto|A\\.) Parola","https://www.uninsubria.it/hpp/alberto.parola", "(Davide|D\\.) Pini","http://www0.mi.infn.it/~dpini/", "(Mario|M\\.) Scotoni","http://www.science.unitn.it/labfm/pmwiki/pmwiki.php?n=Scotoni.Scotoni", "(Erio|E\\.) Tosatti","https://sites.google.com/site/tosattierio/", "(Michael|M\\.) Urbakh","http://www.tau.ac.il/~urbakh1/", "(Andrea|A\\.) Vanossi","http://people.sissa.it/~vanossi/", "(Michel|M\\.) van Veenendaal","http://www.niu.edu/physics/directory/faculty/veenendaal.shtml", "(Lorenza|L\\.) Viola","http://www.dartmouth.edu/~physics/faculty/viola.html", "(Giovanni|G\\.) Onida","https://www.unimi.it/en/ugov/person/giovanni-onida", "(Stefano|S\\.) Zapperi","http://www.smmlab.it/people/cv/", "(Giuseppe|G\\.E\\.) Santoro","https://cm.sissa.it/people/members.php?ID=3", "(Theo|T\\.A\\.) Costi","http://iffwww.iff.kfa-juelich.de/~costi/t_costi.html" ); # start of the actual conversion: generation of suitable header and footer chomp($day=`date +%e`); # day of the month chomp($month=`date +%B`); # name of the month chomp($year=`date +%Y`); $header= ' TiTlE

TiTlE

'.$localsiteURLandlogo.'
<separation line>
'; $footer= '
<separation line>
automated conversion from LaTeX by convert2html v. '.$version.' ('.$date.');
conversion date: '.$day.' '.$month.' '.$year.'
'; $printok=0; $insideref=0; while ($filename=shift(@inputfiles)){ open(INF, $filename); # Open for input while ($_ = ){ # initial parser: paragraphs join->$line s/ //g; # for MS-dos/Win-spoiled txt files s/^%.*\n//; # comments away (entire line)! s/([^\\])%.*\n/\1 /; # comments away! s/\s+\n/\n/; # remove trailing blanks if(/^\n/){ # paragraph breaking line $nb++; } else{ $nb=0; s/\n/ /g; # unnecessary newlines removed $line=$line." ".$_; } if(/\\end\{document\}/){$nb++;} # print $_," ",$nb," QUIII\n"; if($nb==1){ $_=$line; if($printok==0){ # tentative construction of a smart parser to implement the \newcommand's # still in progress! [and failing] if(/\\newcommand\s*\{([\}]+)\}\s*\{([\}]*)\}/){ # $tmp=s/\\newcommand\s*\{([}]+)\}\s*\{([}]*)\}/\1|%|%|\2/g; @adds=split(/|%|%|/,$tmp); push(@repllist0,$adds[0]); push(@repllist1,$adds[1]); # print "quii "."@repllist0","xxx"."@repllist1"."\n"; } if(/\\title\W/){ # fixing of the title s/.*\\title/\\title/; $_=&latex2html($_); @lil=split(/[{}]/); if($#lil >= 1){ $title=$lil[1]; for($i=2;$i<=$#lil;++$i){ $title=$title.$lil[$i]; } } $header =~ s/TiTlE/${title}/g; print $header; $printok=1; $_=""; } if(/\\begin\{center\}/){ # fixing of the pseudo-title s/.*\\begin\{center\}/\\title{/; s/\\end\{center\}/}fInDuTiTrE\n/; @tmp=split(/fInDuTiTrE/); $_=$tmp[0]; $_=&latex2html($_); @lil=split(/[{}]/); if($#lil >= 1){ $title=$lil[1]; for($i=2;$i<=$#lil;++$i){ $title=$title.$lil[$i]; } } $header =~ s/TiTlE/${title}/g; print $header; $printok=1; $_=$tmp[1]; } } # print "QUII $printok $_\n"; if(/\\bibliographystyle/){$printok=0}; # references if(/\\begin\{thebibliography\}/ || /\\begin\{references\}/ ){ $printok=0; $insideref=1; print "

References

\n"; print "
    \n"; $_=""; } if(/\\end\{thebibliography\}/ || /\\end\{references\}/ ){ $printok=1; $insideref=0; print "
"; $_=""; } if($insideref){ $_= &latex2html($_); $_= &addURLref($_); @lil=split(/[{}]/); if($#lil >= 2) { $bib=""; for($i=2;$i<=$#lil;++$i) { $bib=$bib.$lil[$i]; } print "
  • \n ",$bib."\n"; } } if(/\\end\{document\}/){$printok=0;} if($printok){ # protect underscores in filenames: s/(file|figure)=([^,\}]*)_/\1=\2YeStHiSaTrUeUnDeRsCoRe/g; s/(file|figure)=([^,\}]*)_/\1=\2YeStHiSaTrUeUnDeRsCoRe/g; s/(file|figure)=([^,\}]*)_/\1=\2YeStHiSaTrUeUnDeRsCoRe/g; s/(file|figure)=([^,\}]*)_/\1=\2YeStHiSaTrUeUnDeRsCoRe/g; s/(file|figure)=([^,\}]*)_/\1=\2YeStHiSaTrUeUnDeRsCoRe/g; # protect underscores in labelings: s/\\(label|ref|eqref)\{([^\}]*)_/\\\1\{\2YeStHiSaTrUeUnDeRsCoRe/g; s/\\(label|ref|eqref)\{([^\}]*)_/\\\1\{\2YeStHiSaTrUeUnDeRsCoRe/g; # protect latex-protected underscores: s/\\_/YeStHiSaTrUeUnDeRsCoRe/g; $line = &latex2html($_); # print "QUIII".$line."\n"; $line =~ s/YeStHiSaTrUeUnDeRsCoRe/_/g; $line = &fig2html($line); if($dofigures){$line = &epsfile2html($line);} $line = &table2html($line); $line = &addURLref($line); $line = &finalcleanups($line); if($line ne ""){print $line,"\n

    \n"}; } $line = ""; } } close(INF); } if($numberofURLedref>0){ print "\n


    \n$numberofURLedref URL-ed refs\n"; } print $footer; ########################### end of main #################################### sub latex2html{ # simple syntactic converter # (not a parser: it can be easily tricked into mistakes!) local($_,@verbstring,$string); $_ = $_[0]; s/([^\\])&/\1TABsEpArAtOr/g; s//\>/g; # larger than s/\\rangle/\>/g; s/\\gg/\>\>/g; s/\\mid/|/g; s/{\s*}/ /g; # pointless empty curly brackets s/([^\\])\\\{/\1YeStHiSiSaLeFtCuRlYbRaCkEt/g;# protect true curly brackets s/([^\\])\\\}/\1YeStHiSiSaRiGhTcUrLyBrAcKeT/g; s/\\vspace[^\}]*\}//g; s/\\.?phantom[^\}]*\}//g; s/\\(over|under)brace//g; # don't know what to do here! s/\\(front|back)matter//g; s/\\tableofcontents//g; s/\\itemsep\s*[0-9]+pt//g; s/\\vfill//g; s/\\vfill//g; s/\\vskip\s*[0-9.]+\s*[a-z]+\s/


    \n/g; s/\\(\\|cr|newline)/
    \n/g; # linebreaks while(s/\\verb(.)(.*?)\1/ThIsStRiNgIsOnHoLd28<\/CODE>/ || s/(\\url)\{(.*?)\}/ThIsStRiNgIsOnHoLd28<\/CODE>/){ push(@verbstring,$2); } # fractions approximate treatment s/\\nicefrac/\\frac/g; s/\\frac\s*(1|\{1\})\s*(2|\{2\})/\½/g; # special fractions with html symbols s/\\frac\s*(1|\{1\})\s*(4|\{4\})/\¼/g; s/\\frac\s*(3|\{3\})\s*(4|\{4\})/\¾/g; # fractions with {}: s/\\frac\s*{(\w|\\\w+)}\s*{(\w|\\\w+)}/\1\/\2/g; # numerator and denominator are single symbols s/\\frac\s*{(\w|\\\w+)}\s*{([^\}]*)}/\1\/(\2)/g; # numerator is single symbol s/\\frac\s*{([^\}]*)}\s*{(\w|\\\w+)}/(\1)\/\2/g; # denominator is single symbol s/\\frac\s*{([^\}]*)}\s*{([^\}]*)}/(\1)\/(\2)/g; # fractions with some or all {} missing: s/\\frac\s*{([^\}]*)}\s*([^ \\]|\\[^ \\]*)/(\1)\/\2/g; # numerator is single symbol s/\\frac\s*([^ \\]|\\[^ \\]*)\s*{([^\}]*)}/\1\/(\2)/g; # denominator is single symbol s/\\frac\s*([^ \\]|\\[^ \\]*)\s*([^ \\]|\\[^ \\]*)/\1\/\2/g; # numerator and denominator are single symbols s/{([^\}]*)}\s*\\over\s*{([^\}]*)}/\1<\/sup>\/\2<\/sub>/g; s/([^ \\]|\\[^ \\]*)\s*\\over\s*{([^\}]*)}/\1<\/sup>\/\2<\/sub>/g; s/{([^\}]*)}\s*\\over\s*([^ \\]|\\[^ \\]*)/\1<\/sup>\/\2<\/sub>/g; s/([^ \\]|\\[^ \\]*)\s*\\over\s*([^ \\]|\\[^ \\]*)/\1<\/sup>\/\2<\/sub>/g; s/{([^\}]*)}\s*\\atop\s*{([^\}]*)}/\1 \/\/ \2/g; s/([^ \\]|\\[^ \\]*)\s*\\atop\s*{([^\}]*)}/\1 \/\/ \2/g; s/{([^\}]*)}\s*\\atop\s*([^ \\]|\\[^ \\]*)/\1 \/\/ \2/g; s/([^ \\]|\\[^ \\]*)\s*\\atop\s*([^ \\]|\\[^ \\]*)/\1 \/\/ \2/g; # below here start the character and calligraphic substitutions: # subscripts & superscripts: s/([_^])\{\\(bf|mathbf|bm)\s*\\rm\s+([^\}]*)\}/\1\{\3<\/B>\}/g; # both s/([_^])\{\\rm\s*\\(bf|mathbf|bm)\s+([^\}]*)\}/\1\{\3<\/B>\}/g; s/([^_^])\{\\(bf|mathbf|bm)\s+([^\}]+)\}/\1\3<\/B>/g; # boldfaces s/\\textbf\s*\{([^\}]+)\}/\1<\/B>/g; # boldface s/\\textbf\s*([^\{])/\1<\/B>/g; # boldface 1 char s/([_^])\{\\(bf|mathbf|bm)\s+([^\}]+)\}/\1\{\3<\/B>\}/g; s/([^_^])\{\\rm\s+([^\}]*)\}/\1\2<\/i>/g;# romans s/([_^])\\textrm\s*\{([^\}]*)}/\1\{\2<\/i>\}/g;# romans s/([_^])\\text\s*\{([^\}]*)}/\1\{\2<\/i>\}/g;# romans s/\\textrm\s*\{([^\}]*)\}/\1<\/i>/g;# romans s/\\text\s*\{([^\}]*)\}/\1<\/i>/g;# romans s/([_^])\{\\rm\s+([^\}]*)\}/\1\{\2<\/i>\}/g; s/([^_^])\{\\(mathcal|cal)\s+([^\}]*)\}/\1\3<\/CODE>/g;# cal s/([_^])\{\\(mathcal|cal)\s+([^\}]*)\}/\1\{\3<\/CODE>\}/g; s/~/\ /g; # ~ as a space s/\\onlinecite/\\cite/g; # further processed below s/\s\$([^\$]*)\$\s/\n\1<\/i>\n/g; # mathematical formulae s/\$([^\$]*)\$/\1<\/i>/g; # are rendered as italic s/\\begin\{(equation\*?|displaymath)\}/<\/p>\n

    \n/g; s/\\end\{(equation\*?|displaymath)\}/\n <\/i><\/p>\n

    /g; s/\\begin\{eqnarray\*?\}/\n

    \n\\begin{array}\n/g; s/\\end\{eqnarray\*?\}/\n\\end\{array\}\n <\/i><\/p>\n/g; s/\\begin\{align\*?\}/\n

    \n\\begin{array}[2]\n/g; s/\\end\{align\*?\}/\n\\end\{array\}\n <\/i><\/p>\n/g; s/\\begin\{description\}/\n