#coding=utf-8 import re import cgi re_string = re.compile(r'(?P[<&>])|(?P^[ \t]+)|(?P\r\n|\r|\n)|(?P(^|\s*)((http|ftp)://.*?))(\s|$)', re.S|re.M|re.I) def text2html(text, tabstop=4): def do_sub(m): c = m.groupdict() if c['htmlchars']: return cgi.escape(c['htmlchars']) if c['lineend']: return '
' elif c['space']: t = m.group().replace('\t', ' '*tabstop) t = t.replace(' ', ' ') return t elif c['space'] == '\t': return ' '*tabstop; else: url = m.group('protocal') if url.startswith(' '): prefix = ' ' url = url[1:] else: prefix = '' last = m.groups()[-1] if last in ['\n', '\r', '\r\n']: last = '
' return '%s%s%s' % (prefix, url, url, last) return re.sub(re_string, do_sub, text) if __name__ == '__main__': text=""" http://groups.google.com/group/python-cn/pending """ print text2html(text)