#!/usr/bin/env python # txt2tags - generic text conversion tool # http://txt2tags.sf.net # # Copyright 2001, 2002, 2003, 2004, 2005 Aurelio Marinho Jargas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 2. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You have received a copy of the GNU General Public License along # with this program, on the COPYING file. # # # # +-------------------------------------------------------------+ # | IMPORTANT MESSAGES, PLEASE READ | # +-------------------------------------------------------------+ # | | # | | # | v1.x COMPATIBILITY | # | ------------------ | # | | # | Due the major syntax changes, the new 2.x series | # | BREAKS backwards compatibility. | # | | # | Use the 't2tconv' script to upgrade your existing | # | v1.x files to conform the new v2.x syntax. | # | | # | Do a visual inspection on the new converted file. | # | Specially Pre & Post proc filters can break. | # | Check them! | # | | # | | # +-------------------------------------------------------------+ # # ######################################################################## # # BORING CODE EXPLANATION AHEAD # # Just read if you wish to understand how the txt2tags code works # ######################################################################## # # Version 2.0 was a complete rewrite for the program 'core'. # # Now the code that [1] parses the marked text is separated from the # code that [2] insert the target tags. # # [1] made by: def convert() # [2] made by: class BlockMaster # # The structures of the marked text are identifyed and its contents are # extracted into a data holder (Python lists and dictionaries). # # When parsing the source file, the blocks (para, lists, quote, table) # are opened with BlockMaster, right when found. Then its contents, # which spans on several lines, are feeded into a special holder on the # BlockMaster instance. Just when the block is closed, the target tags # are inserted for the full block as a whole, in one pass. This way, we # have a better control on blocks. Much better than the previous line by # line approach. # # In other words, whenever inside a block, the parser *holds* the tag # insertion process, waiting until the full block is readed. That was # needed primary to close paragraphs for the new XHTML target, but # proved to be a very good adding, improving many other processings. # # ------------------------------------------------------------------- # # There is also a brand new code for the Configuration schema, 100% # rewritten. There are new classes, all self documented: CommandLine, # SourceDocument, ConfigMaster and ConfigLines. In short, a new RAW # Config format was created, and all kind of configuration is first # converted to this format, and then a generic method parses it. # # The init processing was changed also, and now the functions which # gets informations about the input files are: get_infiles_config(), # process_source_file() and convert_this_files() # # Other parts are untouched, and remains the same as in v1.7, as the # marks regexes, target Headers and target Tags&Rules. # ######################################################################## # Now I think the code is nice, easier to read and understand #XXX Python coding warning # Avoid common mistakes: # - do NOT use newlist=list instead newlist=list[:] # - do NOT use newdic=dic instead newdic=dic.copy() # - do NOT use dic[key] instead dic.get(key) # - do NOT use del dic[key] without has_key() before #XXX Smart Image Align don't work if the image is a link # Can't fix that because the image is expanded together with the # link, at the linkbank filling moment. Only the image is passed # to parse_images(), not the full line, so it is always 'middle'. #XXX Paragraph separation not valid inside Quote # Quote will not have

inside, instead will close and open # again the
. This really sux in CSS, when defining a # diferent background color. Still don't know how to fix it. #XXX TODO (maybe) # New mark or macro which expands to an anchor full title. # It is necessary to parse the full document in this order: # DONE 1st scan: HEAD: get all settings, including %!includeconf # DONE 2nd scan: BODY: expand includes & apply %!preproc # 3rd scan: BODY: read titles and compose TOC info # 4th scan: BODY: full parsing, expanding [#anchor] 1st # Steps 2 and 3 can be made together, with no tag adding. # Two complete body scans will be *slow*, don't know if it worths. # One solution may be add the titles as postproc rules ############################################################################## # User config (1=ON, 0=OFF) USE_I18N = 1 # use gettext for i18ned messages? (default is 1) COLOR_DEBUG = 1 # show debug messages in colors? (default is 1) BG_LIGHT = 0 # your terminal background color is light (default is 0) HTML_LOWER = 0 # use lowercased HTML tags instead upper? (default is 0) ############################################################################## # these are all the core Python modules used by txt2tags (KISS!) import re, string, os, sys, time, getopt # program information my_url = 'http://txt2tags.sf.net' my_name = 'txt2tags' my_email = 'verde@aurelio.net' my_version = '2.3' # i18n - just use if available if USE_I18N: try: import gettext # if your locale dir is different, change it here cat = gettext.Catalog('txt2tags',localedir='/usr/share/locale/') _ = cat.gettext except: _ = lambda x:x else: _ = lambda x:x # FLAGS : the conversion related flags , may be used in %!options # OPTIONS : the conversion related options, may be used in %!options # ACTIONS : the other behaviour modifiers, valid on command line only # MACROS : the valid macros with their default values for formatting # SETTINGS: global miscelaneous settings, valid on RC file only # NO_TARGET: actions that don't require a target specification # NO_MULTI_INPUT: actions that don't accept more than one input file # CONFIG_KEYWORDS: the valid %!key:val keywords # # FLAGS and OPTIONS are configs that affect the converted document. # They usually have also a --no-