From e2773dd1cd2621bdc2e0514ff66ea0a07aeea3e9 Mon Sep 17 00:00:00 2001
From: Arun Persaud <arun@nubati.net>
Date: Mon, 15 Oct 2012 10:33:19 -0700
Subject: [PATCH 1/2] removed trailing whitespaces

---
 feedparser.py     | 210 ++++++++++++++++++++--------------------
 html2text.py      |  96 +++++++++----------
 readme.html       |   8 +-
 rss2email.py      | 240 +++++++++++++++++++++++-----------------------
 test_rss2email.py |   4 +-
 5 files changed, 279 insertions(+), 279 deletions(-)

diff --git a/feedparser.py b/feedparser.py
index b9144a9..229e5d7 100644
--- a/feedparser.py
+++ b/feedparser.py
@@ -89,7 +89,7 @@
 except (NameError, AttributeError):
     import string
     _maketrans = string.maketrans
-    
+
 # base64 support for Atom feeds that contain embedded binary data
 try:
     import base64, binascii
@@ -240,7 +240,7 @@ def search(self,string,index=0):
             if match is not None:
                 # Returning a new object in the calling thread's context
                 # resolves a thread-safety.
-                return EndBracketMatch(match) 
+                return EndBracketMatch(match)
             return None
     class EndBracketMatch:
         def __init__(self, match):
@@ -334,7 +334,7 @@ def setdefault(self, key, value):
         if not self.has_key(key):
             self[key] = value
         return self[key]
-        
+
     def has_key(self, key):
         try:
             return hasattr(self, key) or UserDict.__contains__(self, key)
@@ -343,7 +343,7 @@ def has_key(self, key):
     # This alias prevents the 2to3 tool from changing the semantics of the
     # __contains__ function below and exhausting the maximum recursion depth
     __has_key = has_key
-        
+
     def __getattr__(self, key):
         try:
             return self.__dict__[key]
@@ -398,7 +398,7 @@ def _ebcdic_to_ascii(s):
         _ebcdic_to_ascii_map = _maketrans( \
             _l2bytes(range(256)), _l2bytes(emap))
     return s.translate(_ebcdic_to_ascii_map)
- 
+
 _cp1252 = {
   unichr(128): unichr(8364), # euro sign
   unichr(130): unichr(8218), # single low-9 quotation mark
@@ -451,7 +451,7 @@ class _FeedParserMixin:
                   'http://purl.org/atom/ns#': '',
                   'http://www.w3.org/2005/Atom': '',
                   'http://purl.org/rss/1.0/modules/rss091#': '',
-                  
+
                   'http://webns.net/mvcb/':                               'admin',
                   'http://purl.org/rss/1.0/modules/aggregation/':         'ag',
                   'http://purl.org/rss/1.0/modules/annotate/':            'annotate',
@@ -508,7 +508,7 @@ class _FeedParserMixin:
     can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
     can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
     html_types = ['text/html', 'application/xhtml+xml']
-    
+
     def __init__(self, baseuri=None, baselang=None, encoding='utf-8'):
         if _debug: sys.stderr.write('initializing FeedParser\n')
         if not self._matchnamespaces:
@@ -554,7 +554,7 @@ def unknown_starttag(self, tag, attrs):
         # strict xml parsers do -- account for this difference
         if isinstance(self, _LooseFeedParser):
             attrs = [(k, v.replace('&amp;', '&')) for k, v in attrs]
-        
+
         # track xml:base and xml:lang
         attrsD = dict(attrs)
         baseuri = attrsD.get('xml:base', attrsD.get('base')) or self.baseuri
@@ -582,7 +582,7 @@ def unknown_starttag(self, tag, attrs):
         self.lang = lang
         self.basestack.append(self.baseuri)
         self.langstack.append(lang)
-        
+
         # track namespaces
         for prefix, uri in attrs:
             if prefix.startswith('xmlns:'):
@@ -620,7 +620,7 @@ def unknown_starttag(self, tag, attrs):
             self.intextinput = 0
         if (not prefix) and tag not in ('title', 'link', 'description', 'url', 'href', 'width', 'height'):
             self.inimage = 0
-        
+
         # call special handler (if defined) or default handler
         methodname = '_start_' + prefix + suffix
         try:
@@ -754,7 +754,7 @@ def mapContentType(self, contentType):
         elif contentType == 'xhtml':
             contentType = 'application/xhtml+xml'
         return contentType
-    
+
     def trackNamespace(self, prefix, uri):
         loweruri = uri.lower()
         if (prefix, loweruri) == (None, 'http://my.netscape.com/rdf/simple/0.9/') and not self.version:
@@ -775,7 +775,7 @@ def trackNamespace(self, prefix, uri):
 
     def resolveURI(self, uri):
         return _urljoin(self.baseuri or '', uri)
-    
+
     def decodeEntities(self, element, data):
         return data
 
@@ -788,7 +788,7 @@ def push(self, element, expectingText):
     def pop(self, element, stripWhitespace=1):
         if not self.elementstack: return
         if self.elementstack[-1][0] != element: return
-        
+
         element, expectingText, pieces = self.elementstack.pop()
 
         if self.version == 'atom10' and self.contentparams.get('type','text') == 'application/xhtml+xml':
@@ -833,11 +833,11 @@ def pop(self, element, stripWhitespace=1):
                 # In Python 3, base64 takes and outputs bytes, not str
                 # This may not be the most correct way to accomplish this
                 output = _base64decode(output.encode('utf-8')).decode('utf-8')
-                
+
         # resolve relative URIs
         if (element in self.can_be_relative_uri) and output:
             output = self.resolveURI(output)
-        
+
         # decode entities within embedded markup
         if not self.contentparams.get('base64', 0):
             output = self.decodeEntities(element, output)
@@ -860,7 +860,7 @@ def pop(self, element, stripWhitespace=1):
         if is_htmlish and RESOLVE_RELATIVE_URIS:
             if element in self.can_contain_relative_uris:
                 output = _resolveRelativeURIs(output, self.baseuri, self.encoding, self.contentparams.get('type', 'text/html'))
-                
+
         # parse microformats
         # (must do this before sanitizing because some microformats
         # rely on elements that we sanitize)
@@ -876,7 +876,7 @@ def pop(self, element, stripWhitespace=1):
                 vcard = mfresults.get('vcard')
                 if vcard:
                     self._getContext()['vcard'] = vcard
-        
+
         # sanitize embedded markup
         if is_htmlish and SANITIZE_HTML:
             if element in self.can_contain_dangerous_markup:
@@ -906,7 +906,7 @@ def pop(self, element, stripWhitespace=1):
 
         if element == 'title' and self.hasTitle:
             return output
-        
+
         # store output in appropriate place(s)
         if self.inentry and not self.insource:
             if element == 'content':
@@ -962,7 +962,7 @@ def popContent(self, tag):
         self.incontent -= 1
         self.contentparams.clear()
         return value
-        
+
     # a number of elements in a number of RSS variants are nominally plain
     # text, but this is routinely ignored.  This is an attempt to detect
     # the most common cases.  As false positives often result in silent
@@ -993,7 +993,7 @@ def _mapToStandardPrefix(self, name):
             prefix = self.namespacemap.get(prefix, prefix)
             name = prefix + ':' + suffix
         return name
-        
+
     def _getAttribute(self, attrsD, name):
         return attrsD.get(self._mapToStandardPrefix(name))
 
@@ -1021,7 +1021,7 @@ def _itsAnHrefDamnIt(self, attrsD):
                 pass
             attrsD['href'] = href
         return attrsD
-    
+
     def _save(self, key, value, overwrite=False):
         context = self._getContext()
         if overwrite:
@@ -1046,7 +1046,7 @@ def _start_rss(self, attrsD):
                 self.version = 'rss20'
             else:
                 self.version = 'rss'
-    
+
     def _start_dlhottitles(self, attrsD):
         self.version = 'hotrss'
 
@@ -1064,7 +1064,7 @@ def _cdf_common(self, attrsD):
             self._start_link({})
             self.elementstack[-1][-1] = attrsD['href']
             self._end_link()
-    
+
     def _start_feed(self, attrsD):
         self.infeed = 1
         versionmap = {'0.1': 'atom01',
@@ -1081,7 +1081,7 @@ def _start_feed(self, attrsD):
     def _end_channel(self):
         self.infeed = 0
     _end_feed = _end_channel
-    
+
     def _start_image(self, attrsD):
         context = self._getContext()
         if not self.inentry:
@@ -1089,7 +1089,7 @@ def _start_image(self, attrsD):
         self.inimage = 1
         self.hasTitle = 0
         self.push('image', 0)
-            
+
     def _end_image(self):
         self.pop('image')
         self.inimage = 0
@@ -1101,7 +1101,7 @@ def _start_textinput(self, attrsD):
         self.hasTitle = 0
         self.push('textinput', 0)
     _start_textInput = _start_textinput
-    
+
     def _end_textinput(self):
         self.pop('textinput')
         self.intextinput = 0
@@ -1301,7 +1301,7 @@ def _end_subtitle(self):
         self.popContent('subtitle')
     _end_tagline = _end_subtitle
     _end_itunes_subtitle = _end_subtitle
-            
+
     def _start_rights(self, attrsD):
         self.pushContent('rights', attrsD, 'text/plain', 1)
     _start_dc_rights = _start_rights
@@ -1399,7 +1399,7 @@ def _start_cc_license(self, attrsD):
         attrsD['rel']='license'
         if value: attrsD['href']=value
         context.setdefault('links', []).append(attrsD)
-        
+
     def _start_creativecommons_license(self, attrsD):
         self.push('license', 1)
     _start_creativeCommons_license = _start_creativecommons_license
@@ -1420,7 +1420,7 @@ def _addXFN(self, relationships, href, name):
         value = FeedParserDict({'relationships': relationships, 'href': href, 'name': name})
         if value not in xfn:
             xfn.append(value)
-        
+
     def _addTag(self, term, scheme, label):
         context = self._getContext()
         tags = context.setdefault('tags', [])
@@ -1438,7 +1438,7 @@ def _start_category(self, attrsD):
         self.push('category', 1)
     _start_dc_subject = _start_category
     _start_keywords = _start_category
-        
+
     def _start_media_category(self, attrsD):
         attrsD.setdefault('scheme', 'http://search.yahoo.com/mrss/category_schema')
         self._start_category(attrsD)
@@ -1446,11 +1446,11 @@ def _start_media_category(self, attrsD):
     def _end_itunes_keywords(self):
         for term in self.pop('itunes_keywords').split():
             self._addTag(term, 'http://www.itunes.com/', None)
-        
+
     def _start_itunes_category(self, attrsD):
         self._addTag(attrsD.get('text'), 'http://www.itunes.com/', None)
         self.push('category', 1)
-        
+
     def _end_category(self):
         value = self.pop('category')
         if not value: return
@@ -1467,7 +1467,7 @@ def _end_category(self):
 
     def _start_cloud(self, attrsD):
         self._getContext()['cloud'] = FeedParserDict(attrsD)
-        
+
     def _start_link(self, attrsD):
         attrsD.setdefault('rel', 'alternate')
         if attrsD['rel'] == 'self':
@@ -1568,7 +1568,7 @@ def _end_generator(self):
         context = self._getContext()
         if context.has_key('generator_detail'):
             context['generator_detail']['name'] = value
-            
+
     def _start_admin_generatoragent(self, attrsD):
         self.push('generator', 1)
         value = self._getAttribute(attrsD, 'rdf:resource')
@@ -1583,7 +1583,7 @@ def _start_admin_errorreportsto(self, attrsD):
         if value:
             self.elementstack[-1][2].append(value)
         self.pop('errorreportsto')
-        
+
     def _start_summary(self, attrsD):
         context = self._getContext()
         if context.has_key('summary'):
@@ -1601,13 +1601,13 @@ def _end_summary(self):
             self.popContent(self._summaryKey or 'summary')
         self._summaryKey = None
     _end_itunes_summary = _end_summary
-        
+
     def _start_enclosure(self, attrsD):
         attrsD = self._itsAnHrefDamnIt(attrsD)
         context = self._getContext()
         attrsD['rel']='enclosure'
         context.setdefault('links', []).append(FeedParserDict(attrsD))
-            
+
     def _start_source(self, attrsD):
         if 'url' in attrsD:
           # This means that we're processing a source element from an RSS 2.0 feed
@@ -1659,7 +1659,7 @@ def _start_itunes_image(self, attrsD):
         if attrsD.get('href'):
             self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')})
     _start_itunes_link = _start_itunes_image
-        
+
     def _end_itunes_block(self):
         value = self.pop('itunes_block', 0)
         self._getContext()['itunes_block'] = (value == 'yes') and 1 or 0
@@ -1718,12 +1718,12 @@ def __init__(self, baseuri, baselang, encoding):
             self.bozo = 0
             self.exc = None
             self.decls = {}
-        
+
         def startPrefixMapping(self, prefix, uri):
             self.trackNamespace(prefix, uri)
             if uri == 'http://www.w3.org/1999/xlink':
               self.decls['xmlns:'+prefix] = uri
-        
+
         def startElementNS(self, name, qname, attrs):
             namespace, localname = name
             lowernamespace = str(namespace or '').lower()
@@ -1805,7 +1805,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
     special = re.compile('''[<>'"]''')
     bare_ampersand = re.compile("&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)")
     elements_no_end_tag = [
-      'area', 'base', 'basefont', 'br', 'col', 'command', 'embed', 'frame', 
+      'area', 'base', 'basefont', 'br', 'col', 'command', 'embed', 'frame',
       'hr', 'img', 'input', 'isindex', 'keygen', 'link', 'meta', 'param',
       'source', 'track', 'wbr'
     ]
@@ -1837,7 +1837,7 @@ def parse_starttag(self,i):
     def feed(self, data):
         data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'&lt;!\1', data)
         #data = re.sub(r'<(\S+?)\s*?/>', self._shorttag_replace, data) # bug [ 1399464 ] Bad regexp for _shorttag_replace
-        data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data) 
+        data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data)
         data = data.replace('&#39;', "'")
         data = data.replace('&#34;', '"')
         try:
@@ -1910,7 +1910,7 @@ def handle_charref(self, ref):
             self.pieces.append('&#%s;' % hex(ord(_cp1252[value]))[1:])
         else:
             self.pieces.append('&#%(ref)s;' % locals())
-        
+
     def handle_entityref(self, ref):
         # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
         # Reconstruct the original entity reference.
@@ -1925,12 +1925,12 @@ def handle_data(self, text):
         # Store the original text verbatim.
         if _debug: sys.stderr.write('_BaseHTMLProcessor, handle_data, text=%s\n' % text)
         self.pieces.append(text)
-        
+
     def handle_comment(self, text):
         # called for each HTML comment, e.g. <!-- insert Javascript code here -->
         # Reconstruct the original comment.
         self.pieces.append('<!--%(text)s-->' % locals())
-        
+
     def handle_pi(self, text):
         # called for each processing instruction, e.g. <?instruction>
         # Reconstruct original processing instruction.
@@ -1942,7 +1942,7 @@ def handle_decl(self, text):
         #     "http://www.w3.org/TR/html4/loose.dtd">
         # Reconstruct original DOCTYPE
         self.pieces.append('<!%(text)s>' % locals())
-        
+
     _new_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9:]*\s*').match
     def _scan_name(self, i, declstartpos):
         rawdata = self.rawdata
@@ -2006,7 +2006,7 @@ def decodeEntities(self, element, data):
             data = data.replace('&quot;', '"')
             data = data.replace('&apos;', "'")
         return data
-        
+
     def strattrs(self, attrs):
         return ''.join([' %s="%s"' % (n,v.replace('"','&quot;')) for n,v in attrs])
 
@@ -2030,12 +2030,12 @@ def __init__(self, data, baseuri, encoding):
         self.enclosures = []
         self.xfn = []
         self.vcard = None
-    
+
     def vcardEscape(self, s):
         if type(s) in (type(''), type(u'')):
             s = s.replace(',', '\\,').replace(';', '\\;').replace('\n', '\\n')
         return s
-    
+
     def vcardFold(self, s):
         s = re.sub(';+$', '', s)
         sFolded = ''
@@ -2051,14 +2051,14 @@ def vcardFold(self, s):
 
     def normalize(self, s):
         return re.sub(r'\s+', ' ', s).strip()
-    
+
     def unique(self, aList):
         results = []
         for element in aList:
             if element not in results:
                 results.append(element)
         return results
-    
+
     def toISO8601(self, dt):
         return time.strftime('%Y-%m-%dT%H:%M:%SZ', dt)
 
@@ -2148,21 +2148,21 @@ def getPropertyValue(self, elmRoot, sProperty, iPropertyType=4, bAllowMultiple=0
 
     def findVCards(self, elmRoot, bAgentParsing=0):
         sVCards = ''
-        
+
         if not bAgentParsing:
             arCards = self.getPropertyValue(elmRoot, 'vcard', bAllowMultiple=1)
         else:
             arCards = [elmRoot]
-            
+
         for elmCard in arCards:
             arLines = []
-            
+
             def processSingleString(sProperty):
                 sValue = self.getPropertyValue(elmCard, sProperty, self.STRING, bAutoEscape=1).decode(self.encoding)
                 if sValue:
                     arLines.append(self.vcardFold(sProperty.upper() + ':' + sValue))
                 return sValue or u''
-            
+
             def processSingleURI(sProperty):
                 sValue = self.getPropertyValue(elmCard, sProperty, self.URI)
                 if sValue:
@@ -2185,7 +2185,7 @@ def processSingleURI(sProperty):
                     if sContentType:
                         sContentType = ';TYPE=' + sContentType.upper()
                     arLines.append(self.vcardFold(sProperty.upper() + sEncoding + sContentType + sValueKey + ':' + sValue))
-    
+
             def processTypeValue(sProperty, arDefaultType, arForceType=None):
                 arResults = self.getPropertyValue(elmCard, sProperty, bAllowMultiple=1)
                 for elmResult in arResults:
@@ -2197,7 +2197,7 @@ def processTypeValue(sProperty, arDefaultType, arForceType=None):
                     sValue = self.getPropertyValue(elmResult, 'value', self.EMAIL, 0)
                     if sValue:
                         arLines.append(self.vcardFold(sProperty.upper() + ';TYPE=' + ','.join(arType) + ':' + sValue))
-            
+
             # AGENT
             # must do this before all other properties because it is destructive
             # (removes nested class="vcard" nodes so they don't interfere with
@@ -2216,10 +2216,10 @@ def processTypeValue(sProperty, arDefaultType, arForceType=None):
                     sAgentValue = self.getPropertyValue(elmAgent, 'value', self.URI, bAutoEscape=1);
                     if sAgentValue:
                         arLines.append(self.vcardFold('AGENT;VALUE=uri:' + sAgentValue))
-    
+
             # FN (full name)
             sFN = processSingleString('fn')
-            
+
             # N (name)
             elmName = self.getPropertyValue(elmCard, 'n')
             if elmName:
@@ -2228,7 +2228,7 @@ def processTypeValue(sProperty, arDefaultType, arForceType=None):
                 arAdditionalNames = self.getPropertyValue(elmName, 'additional-name', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'additional-names', self.STRING, 1, 1)
                 arHonorificPrefixes = self.getPropertyValue(elmName, 'honorific-prefix', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'honorific-prefixes', self.STRING, 1, 1)
                 arHonorificSuffixes = self.getPropertyValue(elmName, 'honorific-suffix', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'honorific-suffixes', self.STRING, 1, 1)
-                arLines.append(self.vcardFold('N:' + sFamilyName + ';' + 
+                arLines.append(self.vcardFold('N:' + sFamilyName + ';' +
                                          sGivenName + ';' +
                                          ','.join(arAdditionalNames) + ';' +
                                          ','.join(arHonorificPrefixes) + ';' +
@@ -2245,25 +2245,25 @@ def processTypeValue(sProperty, arDefaultType, arForceType=None):
                         arLines.append(self.vcardFold('N:' + arNames[0] + ';' + arNames[1]))
                     else:
                         arLines.append(self.vcardFold('N:' + arNames[1] + ';' + arNames[0]))
-    
+
             # SORT-STRING
             sSortString = self.getPropertyValue(elmCard, 'sort-string', self.STRING, bAutoEscape=1)
             if sSortString:
                 arLines.append(self.vcardFold('SORT-STRING:' + sSortString))
-            
+
             # NICKNAME
             arNickname = self.getPropertyValue(elmCard, 'nickname', self.STRING, 1, 1)
             if arNickname:
                 arLines.append(self.vcardFold('NICKNAME:' + ','.join(arNickname)))
-            
+
             # PHOTO
             processSingleURI('photo')
-            
+
             # BDAY
             dtBday = self.getPropertyValue(elmCard, 'bday', self.DATE)
             if dtBday:
                 arLines.append(self.vcardFold('BDAY:' + self.toISO8601(dtBday)))
-            
+
             # ADR (address)
             arAdr = self.getPropertyValue(elmCard, 'adr', bAllowMultiple=1)
             for elmAdr in arAdr:
@@ -2285,38 +2285,38 @@ def processTypeValue(sProperty, arDefaultType, arForceType=None):
                                          sRegion + ';' +
                                          sPostalCode + ';' +
                                          sCountryName))
-            
+
             # LABEL
             processTypeValue('label', ['intl','postal','parcel','work'])
-            
+
             # TEL (phone number)
             processTypeValue('tel', ['voice'])
-            
+
             # EMAIL
             processTypeValue('email', ['internet'], ['internet'])
-            
+
             # MAILER
             processSingleString('mailer')
-            
+
             # TZ (timezone)
             processSingleString('tz')
-    
+
             # GEO (geographical information)
             elmGeo = self.getPropertyValue(elmCard, 'geo')
             if elmGeo:
                 sLatitude = self.getPropertyValue(elmGeo, 'latitude', self.STRING, 0, 1)
                 sLongitude = self.getPropertyValue(elmGeo, 'longitude', self.STRING, 0, 1)
                 arLines.append(self.vcardFold('GEO:' + sLatitude + ';' + sLongitude))
-    
+
             # TITLE
             processSingleString('title')
-    
+
             # ROLE
             processSingleString('role')
 
             # LOGO
             processSingleURI('logo')
-    
+
             # ORG (organization)
             elmOrg = self.getPropertyValue(elmCard, 'org')
             if elmOrg:
@@ -2330,39 +2330,39 @@ def processTypeValue(sProperty, arDefaultType, arForceType=None):
                 else:
                     arOrganizationUnit = self.getPropertyValue(elmOrg, 'organization-unit', self.STRING, 1, 1)
                     arLines.append(self.vcardFold('ORG:' + sOrganizationName + ';' + ';'.join(arOrganizationUnit)))
-    
+
             # CATEGORY
             arCategory = self.getPropertyValue(elmCard, 'category', self.STRING, 1, 1) + self.getPropertyValue(elmCard, 'categories', self.STRING, 1, 1)
             if arCategory:
                 arLines.append(self.vcardFold('CATEGORIES:' + ','.join(arCategory)))
-    
+
             # NOTE
             processSingleString('note')
-    
+
             # REV
             processSingleString('rev')
-    
+
             # SOUND
             processSingleURI('sound')
-    
+
             # UID
             processSingleString('uid')
-    
+
             # URL
             processSingleURI('url')
-    
+
             # CLASS
             processSingleString('class')
-    
+
             # KEY
             processSingleURI('key')
-    
+
             if arLines:
                 arLines = [u'BEGIN:vCard',u'VERSION:3.0'] + arLines + [u'END:vCard']
                 sVCards += u'\n'.join(arLines) + u'\n'
-    
+
         return sVCards.strip()
-    
+
     def isProbablyDownloadable(self, elm):
         attrsD = elm.attrMap
         if not attrsD.has_key('href'): return 0
@@ -2461,7 +2461,7 @@ def __init__(self, baseuri, encoding, _type):
 
     def resolveURI(self, uri):
         return _makeSafeAbsoluteURI(_urljoin(self.baseuri, uri.strip()))
-    
+
     def unknown_starttag(self, tag, attrs):
         if _debug:
             sys.stderr.write('tag: [%s] with attributes: [%s]\n' % (tag, str(attrs)))
@@ -2575,7 +2575,7 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
     # svgtiny - foreignObject + linearGradient + radialGradient + stop
     svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
       'animateTransform', 'circle', 'defs', 'desc', 'ellipse', 'foreignObject',
-      'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern', 
+      'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
       'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph', 'mpath',
       'path', 'polygon', 'polyline', 'radialGradient', 'rect', 'set', 'stop',
       'svg', 'switch', 'text', 'title', 'tspan', 'use']
@@ -2621,7 +2621,7 @@ def reset(self):
         self.unacceptablestack = 0
         self.mathmlOK = 0
         self.svgOK = 0
-        
+
     def unknown_starttag(self, tag, attrs):
         acceptable_attributes = self.acceptable_attributes
         keymap = {}
@@ -2683,7 +2683,7 @@ def unknown_starttag(self, tag, attrs):
                 clean_value = self.sanitize_style(value)
                 if clean_value: clean_attrs.append((key,clean_value))
         _BaseHTMLProcessor.unknown_starttag(self, tag, clean_attrs)
-        
+
     def unknown_endtag(self, tag):
         if not tag in self.acceptable_elements:
             if tag in self.unacceptable_elements_with_end_tag:
@@ -2815,7 +2815,7 @@ def http_error_301(self, req, fp, code, msg, headers):
     http_error_300 = http_error_302
     http_error_303 = http_error_302
     http_error_307 = http_error_302
-        
+
     def http_error_401(self, req, fp, code, msg, headers):
         # Check if
         # - server requires digest auth, AND
@@ -2914,7 +2914,7 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
             return opener.open(request)
         finally:
             opener.close() # JohnD
-    
+
     # try to open with native open function (if url_file_stream_or_string is a filename)
     try:
         return open(url_file_stream_or_string, 'rb')
@@ -2966,7 +2966,7 @@ def _build_urllib2_request(url, agent, etag, modified, referrer, auth, request_h
 def registerDateHandler(func):
     '''Register a date handler function (takes string, returns 9-tuple date in GMT)'''
     _date_handlers.insert(0, func)
-    
+
 # ISO-8601 date parsing routines written by Fazal Majid.
 # The ISO 8601 standard is very convoluted and irregular - a full ISO 8601
 # parser is beyond the scope of feedparser and would be a worthwhile addition
@@ -2977,7 +2977,7 @@ def registerDateHandler(func):
 # Please note the order in templates is significant because we need a
 # greedy match.
 _iso8601_tmpl = ['YYYY-?MM-?DD', 'YYYY-0MM?-?DD', 'YYYY-MM', 'YYYY-?OOO',
-                'YY-?MM-?DD', 'YY-?OOO', 'YYYY', 
+                'YY-?MM-?DD', 'YY-?OOO', 'YYYY',
                 '-YY-?MM', '-OOO', '-YY',
                 '--MM-?DD', '--MM',
                 '---DD',
@@ -3079,7 +3079,7 @@ def _parse_date_iso8601(dateString):
     # Many implementations have bugs, but we'll pretend they don't.
     return time.localtime(time.mktime(tuple(tm)))
 registerDateHandler(_parse_date_iso8601)
-    
+
 # 8-bit date handling routines written by ytrewq1.
 _korean_year  = u'\ub144' # b3e2 in euc-kr
 _korean_month = u'\uc6d4' # bff9 in euc-kr
@@ -3170,7 +3170,7 @@ def _parse_date_mssql(dateString):
    u'\u03a4\u03b5\u03c4': u'Wed', # d4e5f4 in iso-8859-7
    u'\u03a0\u03b5\u03bc': u'Thu', # d0e5ec in iso-8859-7
    u'\u03a0\u03b1\u03c1': u'Fri', # d0e1f1 in iso-8859-7
-   u'\u03a3\u03b1\u03b2': u'Sat', # d3e1e2 in iso-8859-7   
+   u'\u03a3\u03b1\u03b2': u'Sat', # d3e1e2 in iso-8859-7
   }
 
 _greek_date_format_re = \
@@ -3360,7 +3360,7 @@ def _parse_date_rfc822(dateString):
 # 'ET' is equivalent to 'EST', etc.
 _additional_timezones = {'AT': -400, 'ET': -500, 'CT': -600, 'MT': -700, 'PT': -800}
 rfc822._timezones.update(_additional_timezones)
-registerDateHandler(_parse_date_rfc822)    
+registerDateHandler(_parse_date_rfc822)
 
 def _parse_date_perforce(aDateString):
 	"""parse a date in yyyy/mm/dd hh:mm:ss TTT format"""
@@ -3398,7 +3398,7 @@ def _getCharacterEncoding(http_headers, xml_data):
 
     http_headers is a dictionary
     xml_data is a raw string (not Unicode)
-    
+
     This is so much trickier than it sounds, it's not even funny.
     According to RFC 3023 ('XML Media Types'), if the HTTP Content-Type
     is application/xml, application/*+xml,
@@ -3417,12 +3417,12 @@ def _getCharacterEncoding(http_headers, xml_data):
     served with a Content-Type of text/* and no charset parameter
     must be treated as us-ascii.  (We now do this.)  And also that it
     must always be flagged as non-well-formed.  (We now do this too.)
-    
+
     If Content-Type is unspecified (input was local file or non-HTTP source)
     or unrecognized (server just got it totally wrong), then go by the
     encoding given in the XML prefix of the document and default to
     'iso-8859-1' as per the HTTP specification (RFC 2616).
-    
+
     Then, assuming we didn't find a character encoding in the HTTP headers
     (and the HTTP Content-type allowed us to look in the body), we need
     to sniff the first few bytes of the XML data and try to determine
@@ -3532,7 +3532,7 @@ def _parseHTTPContentType(content_type):
     if true_encoding.lower() == 'gb2312':
         true_encoding = 'gb18030'
     return true_encoding, http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type
-    
+
 def _toUTF8(data, encoding):
     '''Changes an XML data stream on the fly to specify a new encoding
 
@@ -3595,7 +3595,7 @@ def _stripDoctype(data):
     start = re.search(_s2bytes('<\w'), data)
     start = start and start.start() or -1
     head,data = data[:start+1], data[start+1:]
-    
+
     entity_pattern = re.compile(_s2bytes(r'^\s*<!ENTITY([^>]*?)>'), re.MULTILINE)
     entity_results=entity_pattern.findall(head)
     head = entity_pattern.sub(_s2bytes(''), head)
@@ -3617,10 +3617,10 @@ def _stripDoctype(data):
     data = doctype_pattern.sub(replacement, head) + data
 
     return version, data, dict(replacement and [(k.decode('utf-8'), v.decode('utf-8')) for k, v in safe_pattern.findall(replacement)])
-    
+
 def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], request_headers={}, response_headers={}):
     '''Parse a feed from a URL, file, stream, or string.
-    
+
     request_headers, if given, is a dict from http header name to value to add
     to the request; this overrides internally generated values.
     '''
@@ -3861,7 +3861,7 @@ def _writer(self, stream, node, prefix):
                 stream.write('\n')
             except:
                 pass
-        
+
 class PprintSerializer(Serializer):
     def write(self, stream=sys.stdout):
         if self.results.has_key('href'):
@@ -3869,7 +3869,7 @@ def write(self, stream=sys.stdout):
         from pprint import pprint
         pprint(self.results, stream)
         stream.write('\n')
-        
+
 if __name__ == '__main__':
     try:
         from optparse import OptionParser
diff --git a/html2text.py b/html2text.py
index 0ed4cec..97caa8c 100644
--- a/html2text.py
+++ b/html2text.py
@@ -59,13 +59,13 @@ def name2cp(k):
         if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
         return ord(codecs.latin_1_decode(k)[0])
 
-unifiable = {'rsquo':"'", 'lsquo':"'", 'rdquo':'"', 'ldquo':'"', 
+unifiable = {'rsquo':"'", 'lsquo':"'", 'rdquo':'"', 'ldquo':'"',
 'copy':'(C)', 'mdash':'--', 'nbsp':' ', 'rarr':'->', 'larr':'<-', 'middot':'*',
 'ndash':'-', 'oelig':'oe', 'aelig':'ae',
-'agrave':'a', 'aacute':'a', 'acirc':'a', 'atilde':'a', 'auml':'a', 'aring':'a', 
-'egrave':'e', 'eacute':'e', 'ecirc':'e', 'euml':'e', 
+'agrave':'a', 'aacute':'a', 'acirc':'a', 'atilde':'a', 'auml':'a', 'aring':'a',
+'egrave':'e', 'eacute':'e', 'ecirc':'e', 'euml':'e',
 'igrave':'i', 'iacute':'i', 'icirc':'i', 'iuml':'i',
-'ograve':'o', 'oacute':'o', 'ocirc':'o', 'otilde':'o', 'ouml':'o', 
+'ograve':'o', 'oacute':'o', 'ocirc':'o', 'otilde':'o', 'ouml':'o',
 'ugrave':'u', 'uacute':'u', 'ucirc':'u', 'uuml':'u'}
 
 unifiable_n = {}
@@ -78,7 +78,7 @@ def charref(name):
         c = int(name[1:], 16)
     else:
         c = int(name)
-    
+
     if not UNICODE_SNOB and c in unifiable_n.keys():
         return unifiable_n[c]
     else:
@@ -101,7 +101,7 @@ def entityref(c):
 
 def replaceEntities(s):
     s = s.group(1)
-    if s[0] == "#": 
+    if s[0] == "#":
         return charref(s[1:])
     else: return entityref(s)
 
@@ -122,7 +122,7 @@ def optwrap(text):
     """Wrap all paragraphs in the provided text."""
     if not BODY_WIDTH:
         return text
-    
+
     assert wrap, "Requires Python 2.3."
     result = ''
     newlines = 0
@@ -153,7 +153,7 @@ def hn(tag):
 class _html2text(HTMLParser.HTMLParser):
     def __init__(self, out=None, baseurl=''):
         HTMLParser.HTMLParser.__init__(self)
-        
+
         if out is None: self.out = self.outtextf
         else: self.out = out
         try:
@@ -177,43 +177,43 @@ def __init__(self, out=None, baseurl=''):
         self.abbr_data = None # last inner HTML (for abbr being defined)
         self.abbr_list = {} # stack of abbreviations to write later
         self.baseurl = baseurl
-    
-    def outtextf(self, s): 
+
+    def outtextf(self, s):
         self.outtext += s
-    
+
     def close(self):
         HTMLParser.HTMLParser.close(self)
-        
+
         self.pbr()
         self.o('', 0, 'end')
-        
+
         return self.outtext
-        
+
     def handle_charref(self, c):
         self.o(charref(c))
 
     def handle_entityref(self, c):
         self.o(entityref(c))
-            
+
     def handle_starttag(self, tag, attrs):
         self.handle_tag(tag, attrs, 1)
-    
+
     def handle_endtag(self, tag):
         self.handle_tag(tag, None, 0)
-        
+
     def previousIndex(self, attrs):
         """ returns the index of certain set of attributes (of a link) in the
             self.a list
- 
+
             If the set of attributes is not found, returns None
         """
         if not has_key(attrs, 'href'): return None
-        
+
         i = -1
         for a in self.a:
             i += 1
             match = 0
-            
+
             if has_key(a, 'href') and a['href'] == attrs['href']:
                 if has_key(a, 'title') or has_key(attrs, 'title'):
                         if (has_key(a, 'title') and has_key(attrs, 'title') and
@@ -226,13 +226,13 @@ def previousIndex(self, attrs):
 
     def handle_tag(self, tag, attrs, start):
         #attrs = fixattrs(attrs)
-    
+
         if hn(tag):
             self.p()
             if start: self.o(hn(tag)*"#" + ' ')
 
         if tag in ['p', 'div']: self.p()
-        
+
         if tag == "br" and start: self.o("  \n")
 
         if tag == "hr" and start:
@@ -240,21 +240,21 @@ def handle_tag(self, tag, attrs, start):
             self.o("* * *")
             self.p()
 
-        if tag in ["head", "style", 'script']: 
+        if tag in ["head", "style", 'script']:
             if start: self.quiet += 1
             else: self.quiet -= 1
 
         if tag in ["body"]:
             self.quiet = 0 # sites like 9rules.com never close <head>
-        
+
         if tag == "blockquote":
-            if start: 
+            if start:
                 self.p(); self.o('> ', 0, 1); self.start = 1
                 self.blockquote += 1
             else:
                 self.blockquote -= 1
                 self.p()
-        
+
         if tag in ['em', 'i', 'u']: self.o("_")
         if tag in ['strong', 'b']: self.o("**")
         if tag == "code" and not self.pre: self.o('`') #TODO: `` `this` ``
@@ -263,7 +263,7 @@ def handle_tag(self, tag, attrs, start):
                 attrsD = {}
                 for (x, y) in attrs: attrsD[x] = y
                 attrs = attrsD
-                
+
                 self.abbr_title = None
                 self.abbr_data = ''
                 if has_key(attrs, 'title'):
@@ -273,13 +273,13 @@ def handle_tag(self, tag, attrs, start):
                     self.abbr_list[self.abbr_data] = self.abbr_title
                     self.abbr_title = None
                 self.abbr_data = ''
-        
+
         if tag == "a":
             if start:
                 attrsD = {}
                 for (x, y) in attrs: attrsD[x] = y
                 attrs = attrsD
-                if has_key(attrs, 'href') and not (SKIP_INTERNAL_LINKS and attrs['href'].startswith('#')): 
+                if has_key(attrs, 'href') and not (SKIP_INTERNAL_LINKS and attrs['href'].startswith('#')):
                     self.astack.append(attrs)
                     self.o("[")
                 else:
@@ -297,7 +297,7 @@ def handle_tag(self, tag, attrs, start):
                             a['outcount'] = self.outcount
                             self.a.append(a)
                         self.o("][" + str(a['count']) + "]")
-        
+
         if tag == "img" and start:
             attrsD = {}
             for (x, y) in attrs: attrsD[x] = y
@@ -316,20 +316,20 @@ def handle_tag(self, tag, attrs, start):
                 self.o("![")
                 self.o(alt)
                 self.o("]["+ str(attrs['count']) +"]")
-        
+
         if tag == 'dl' and start: self.p()
         if tag == 'dt' and not start: self.pbr()
         if tag == 'dd' and start: self.o('    ')
         if tag == 'dd' and not start: self.pbr()
-        
+
         if tag in ["ol", "ul"]:
             if start:
                 self.list.append({'name':tag, 'num':0})
             else:
                 if self.list: self.list.pop()
-            
+
             self.p()
-        
+
         if tag == 'li':
             if start:
                 self.pbr()
@@ -343,10 +343,10 @@ def handle_tag(self, tag, attrs, start):
                 self.start = 1
             else:
                 self.pbr()
-        
+
         if tag in ["table", "tr"] and start: self.p()
         if tag == 'td': self.pbr()
-        
+
         if tag == "pre":
             if start:
                 self.startpre = 1
@@ -354,34 +354,34 @@ def handle_tag(self, tag, attrs, start):
             else:
                 self.pre = 0
             self.p()
-            
+
     def pbr(self):
         if self.p_p == 0: self.p_p = 1
 
     def p(self): self.p_p = 2
-    
+
     def o(self, data, puredata=0, force=0):
         if self.abbr_data is not None: self.abbr_data += data
-        
-        if not self.quiet: 
+
+        if not self.quiet:
             if puredata and not self.pre:
                 data = re.sub('\s+', ' ', data)
                 if data and data[0] == ' ':
                     self.space = 1
                     data = data[1:]
             if not data and not force: return
-            
+
             if self.startpre:
                 #self.out(" :") #TODO: not output when already one there
                 self.startpre = 0
-            
+
             bq = (">" * self.blockquote)
             if not (force and data and data[0] == ">") and self.blockquote: bq += " "
-            
+
             if self.pre:
                 bq += "    "
                 data = data.replace("\n", "\n"+bq)
-            
+
             if self.start:
                 self.space = 0
                 self.p_p = 0
@@ -397,7 +397,7 @@ def o(self, data, puredata=0, force=0):
             if self.p_p:
                 self.out(('\n'+bq)*self.p_p)
                 self.space = 0
-                
+
             if self.space:
                 if not self.lastWasNL: self.out(' ')
                 self.space = 0
@@ -408,7 +408,7 @@ def o(self, data, puredata=0, force=0):
                 newa = []
                 for link in self.a:
                     if self.outcount > link['outcount']:
-                        self.out("   ["+ str(link['count']) +"]: " + urlparse.urljoin(self.baseurl, link['href'])) 
+                        self.out("   ["+ str(link['count']) +"]: " + urlparse.urljoin(self.baseurl, link['href']))
                         if has_key(link, 'title'): self.out(" ("+link['title']+")")
                         self.out("\n")
                     else:
@@ -417,7 +417,7 @@ def o(self, data, puredata=0, force=0):
                 if self.a != newa: self.out("\n") # Don't need an extra line when nothing was done.
 
                 self.a = newa
-            
+
             if self.abbr_list and force == "end":
                 for abbr, definition in self.abbr_list.items():
                     self.out("  *[" + abbr + "]: " + definition + "\n")
@@ -430,7 +430,7 @@ def o(self, data, puredata=0, force=0):
     def handle_data(self, data):
         if r'\/script>' in data: self.quiet -= 1
         self.o(data, 1)
-    
+
     def unknown_decl(self, data): pass
 
 def wrapwrite(text): sys.stdout.write(text)
diff --git a/readme.html b/readme.html
index 224fbbe..e00a253 100644
--- a/readme.html
+++ b/readme.html
@@ -23,7 +23,7 @@ <h3>Download</h3>
 <li>Create a new folder</li>
 <li>Download the latest rss2email .ZIP file and unzip to the new folder
 </ol>
-  
+
 <h3>Configure</h3>
 
 <p>Edit the <code>config.py</code> file and fill in your outoing email server's details. If your server requires you to login, change <code>"AUTHREQUIRED = 0"</code> to <code>"AUTHREQUIRED = 1"</code> and enter your email username and password.</p>
@@ -157,7 +157,7 @@ <h1 id="customizeit">Customize rss2email</h1>
 
 <blockquote>
   <p><code>DATE_HEADER = 1</code></p>
-  
+
 </body>
-</html>  
-  
\ No newline at end of file
+</html>
+
diff --git a/rss2email.py b/rss2email.py
index 9fd2426..9d08eaa 100644
--- a/rss2email.py
+++ b/rss2email.py
@@ -18,9 +18,9 @@
 __version__ = "2.72"
 __author__ = "Lindsey Smith (lindsey@allthingsrss.com)"
 __copyright__ = "(C) 2004 Aaron Swartz. GNU GPL 2 or 3."
-___contributors__ = ["Dean Jackson", "Brian Lalor", "Joey Hess", 
-                     "Matej Cepl", "Martin 'Joey' Schulze", 
-                     "Marcel Ackermann (http://www.DreamFlasher.de)", 
+___contributors__ = ["Dean Jackson", "Brian Lalor", "Joey Hess",
+                     "Matej Cepl", "Martin 'Joey' Schulze",
+                     "Marcel Ackermann (http://www.DreamFlasher.de)",
                      "Lindsey Smith (maintainer)", "Erik Hetzner", "Aaron Swartz (original author)" ]
 
 import urllib2
@@ -50,17 +50,17 @@
 
 # A tuple consisting of some combination of
 # ('issued', 'created', 'modified', 'expired')
-# expressing ordered list of preference in dates 
+# expressing ordered list of preference in dates
 # to use for the Date header of the email.
 DATE_HEADER_ORDER = ('modified', 'issued', 'created')
 
 # 1: Apply Q-P conversion (required for some MUAs).
 # 0: Send message in 8-bits.
 # http://cr.yp.to/smtp/8bitmime.html
-#DEPRECATED 
+#DEPRECATED
 QP_REQUIRED = 0
-#DEPRECATED 
-	
+#DEPRECATED
+
 # 1: Name feeds as they're being processed.
 # 0: Keep quiet.
 VERBOSE = 0
@@ -114,20 +114,20 @@
 from email.MIMEText import MIMEText
 from email.Header import Header
 from email.Utils import parseaddr, formataddr
-			 
+
 # Note: You can also override the send function.
 
 def send(sender, recipient, subject, body, contenttype, extraheaders=None, smtpserver=None):
 	"""Send an email.
-	
+
 	All arguments should be Unicode strings (plain ASCII works as well).
-	
+
 	Only the real name part of sender and recipient addresses may contain
 	non-ASCII characters.
-	
+
 	The email will be properly MIME encoded and delivered though SMTP to
 	localhost port 25.  This is easy to change if you want something different.
-	
+
 	The charset of the email will be the first one out of the list
 	that can represent all the characters occurring in the email.
 	"""
@@ -135,7 +135,7 @@ def send(sender, recipient, subject, body, contenttype, extraheaders=None, smtps
 	# Header class is smart enough to try US-ASCII, then the charset we
 	# provide, then fall back to UTF-8.
 	header_charset = 'ISO-8859-1'
-	
+
 	# We must choose the body charset manually
 	for body_charset in CHARSET_LIST:
 	    try:
@@ -148,16 +148,16 @@ def send(sender, recipient, subject, body, contenttype, extraheaders=None, smtps
 	# Split real name (which is optional) and email address parts
 	sender_name, sender_addr = parseaddr(sender)
 	recipient_name, recipient_addr = parseaddr(recipient)
-	
+
 	# We must always pass Unicode strings to Header, otherwise it will
 	# use RFC 2047 encoding even on plain ASCII strings.
 	sender_name = str(Header(unicode(sender_name), header_charset))
 	recipient_name = str(Header(unicode(recipient_name), header_charset))
-	
+
 	# Make sure email addresses do not contain non-ASCII characters
 	sender_addr = sender_addr.encode('ascii')
 	recipient_addr = recipient_addr.encode('ascii')
-	
+
 	# Create the message ('plain' stands for Content-Type: text/plain)
 	msg = MIMEText(body.encode(body_charset), contenttype, body_charset)
 	msg['To'] = formataddr((recipient_name, recipient_addr))
@@ -167,7 +167,7 @@ def send(sender, recipient, subject, body, contenttype, extraheaders=None, smtps
 			msg[hdr] = Header(unicode(extraheaders[hdr], header_charset))
 		except:
 			msg[hdr] = Header(extraheaders[hdr])
-		
+
 	fromhdr = formataddr((sender_name, sender_addr))
 	msg['From'] = fromhdr
 
@@ -178,9 +178,9 @@ def send(sender, recipient, subject, body, contenttype, extraheaders=None, smtps
 #DEPRECATED 		msg_as_string = outs.getvalue()
 
 	if SMTP_SEND:
-		if not smtpserver: 
+		if not smtpserver:
 			import smtplib
-			
+
 			try:
 				if SMTP_SSL:
 					smtpserver = smtplib.SMTP_SSL()
@@ -196,7 +196,7 @@ def send(sender, recipient, subject, body, contenttype, extraheaders=None, smtps
 				if hasattr(e, 'reason'):
 					print >>warn, "Reason:", e.reason
 				sys.exit(1)
-					
+
 			if AUTHREQUIRED:
 				try:
 					smtpserver.ehlo()
@@ -212,7 +212,7 @@ def send(sender, recipient, subject, body, contenttype, extraheaders=None, smtps
 					if hasattr(e, 'reason'):
 						print >>warn, "Reason:", e.reason
 					sys.exit(1)
-					
+
 		smtpserver.sendmail(sender, recipient, msg_as_string)
 		return smtpserver
 
@@ -262,7 +262,7 @@ def send(sender, recipient, subject, body, contenttype, extraheaders=None, smtps
 	pass
 
 warn = sys.stderr
-	
+
 if QP_REQUIRED:
 	print >>warn, "QP_REQUIRED has been deprecated in rss2email."
 
@@ -280,18 +280,18 @@ def send(sender, recipient, subject, body, contenttype, extraheaders=None, smtps
 unix = 0
 try:
 	import fcntl
-# A pox on SunOS file locking methods	
-	if (sys.platform.find('sunos') == -1): 
+# A pox on SunOS file locking methods
+	if (sys.platform.find('sunos') == -1):
 		unix = 1
 except:
 	pass
-		
+
 import socket; socket_errors = []
 for e in ['error', 'gaierror']:
 	if hasattr(socket, e): socket_errors.append(getattr(socket, e))
 
-#DEPRECATED import mimify 
-#DEPRECATED from StringIO import StringIO as SIO 
+#DEPRECATED import mimify
+#DEPRECATED from StringIO import StringIO as SIO
 #DEPRECATED mimify.CHARSET = 'utf-8'
 
 import feedparser
@@ -325,13 +325,13 @@ def __init__(self):
                     threading.Thread.__init__(self)
                     self.result = None
                     self.error = None
-                
+
                 def run(self):
                     try:
                         self.result = function(*args, **kw)
                     except:
                         self.error = sys.exc_info()
-            
+
             c = Calculator()
             c.setDaemon(True) # don't hold up exiting
             c.start()
@@ -343,7 +343,7 @@ def run(self):
             return c.result
         return internal2
 #    return internal
-    
+
 
 def isstr(f): return isinstance(f, type('')) or isinstance(f, type(u''))
 def ishtml(t): return type(t) is type(())
@@ -357,10 +357,10 @@ def unu(s): # I / freakin' hate / that unicode
 def getContent(entry, HTMLOK=0):
 	"""Select the best content from an entry, deHTMLizing if necessary.
 	If raw HTML is best, an ('HTML', best) tuple is returned. """
-	
+
 	# How this works:
-	#  * We have a bunch of potential contents. 
-	#  * We go thru looking for our first choice. 
+	#  * We have a bunch of potential contents.
+	#  * We go thru looking for our first choice.
 	#    (HTML or text, depending on HTMLOK)
 	#  * If that doesn't work, we go thru looking for our second choice.
 	#  * If that still doesn't work, we just take the first one.
@@ -369,35 +369,35 @@ def getContent(entry, HTMLOK=0):
 	#  * Instead of just taking the first one
 	#    pick the one in the "best" language.
 	#  * HACK: hardcoded HTMLOK, should take a tuple of media types
-	
+
 	conts = entry.get('content', [])
-	
+
 	if entry.get('summary_detail', {}):
 		conts += [entry.summary_detail]
-	
+
 	if conts:
 		if HTMLOK:
 			for c in conts:
 				if contains(c.type, 'html'): return ('HTML', c.value)
-	
+
 		if not HTMLOK: # Only need to convert to text if HTML isn't OK
 			for c in conts:
 				if contains(c.type, 'html'):
 					cleanerhtml = BeautifulSoup.BeautifulSoup(c.value)
 					return html2text(unicode(cleanerhtml))
-		
+
 		for c in conts:
 			if c.type == 'text/plain': return c.value
-	
-		return conts[0].value	
-	
+
+		return conts[0].value
+
 	return ""
 
 def getID(entry):
 	"""Get best ID from an entry.
 	NEEDS UNIT TESTS"""
 	if TRUST_GUID:
-		if 'id' in entry and entry.id: 
+		if 'id' in entry and entry.id:
 			# Newer versions of feedparser could return a dictionary
 			if type(entry.id) is DictType:
 				return entry.id.values()[0]
@@ -418,7 +418,7 @@ def getName(fullfeed, entry):
 	feedinfo = fullfeed.feed
 	if hasattr(fullfeed, "url") and fullfeed.url in OVERRIDE_FROM.keys():
 		return OVERRIDE_FROM[fullfeed.url]
-	
+
 	name = feedinfo.get('title', '')
 
 	if 'name' in entry.get('author_detail', []): # normally {} but py2.1
@@ -434,7 +434,7 @@ def getName(fullfeed, entry):
 		if feedinfo.author_detail.name:
 			if name: name += ", "
 			name += feedinfo.author_detail.name
-	
+
 	return name
 
 def validateEmail(email, planb):
@@ -443,31 +443,31 @@ def validateEmail(email, planb):
 	if (len(email_parts) != 2) or not email_parts[0] or not email_parts[1]:
 		return planb
 	return email
-	
+
 def getEmail(r, entry):
 	"""Get the best email_address. If the best guess isn't well-formed (something@somthing.com), use DEFAULT_FROM instead.
 	NEEDS UNIT TESTS"""
-	
+
 	feed = r.feed
-		
+
 	if FORCE_FROM: return DEFAULT_FROM
-	
+
 	if hasattr(r, "url") and r.url in OVERRIDE_EMAIL.keys():
 		return validateEmail(OVERRIDE_EMAIL[r.url], DEFAULT_FROM)
-	
+
 	if 'email' in entry.get('author_detail', []):
 		return validateEmail(entry.author_detail.email, DEFAULT_FROM)
-	
+
 	if 'email' in feed.get('author_detail', []):
 		return validateEmail(feed.author_detail.email, DEFAULT_FROM)
-		
+
 	if USE_PUBLISHER_EMAIL:
 		if 'email' in feed.get('publisher_detail', []):
 			return validateEmail(feed.publisher_detail.email, DEFAULT_FROM)
-		
+
 		if feed.get("errorreportsto", ''):
 			return validateEmail(feed.errorreportsto, DEFAULT_FROM)
-			
+
 	if hasattr(r, "url") and r.url in DEFAULT_EMAIL.keys():
 		return DEFAULT_EMAIL[r.url]
 	return DEFAULT_FROM
@@ -485,7 +485,7 @@ def getTags(entry):
 			tagline = ",".join(taglist)
 
 	return tagline
-	
+
 
 ### Simple Database of Feeds ###
 
@@ -493,7 +493,7 @@ class Feed:
 	def __init__(self, url, to):
 		self.url, self.etag, self.modified, self.seen = url, None, None, {}
 		self.active = True
-		self.to = to		
+		self.to = to
 
 def load(lock=1):
 	if not os.path.exists(feedfile):
@@ -506,7 +506,7 @@ def load(lock=1):
 		print "Feedfile could not be opened: %s" % e
 		sys.exit(1)
 	feeds = pickle.load(feedfileObject)
-	
+
 	if lock:
 		locktype = 0
 		if unix:
@@ -515,19 +515,19 @@ def load(lock=1):
 		#HACK: to deal with lock caching
 		feedfileObject = open(feedfile, 'r')
 		feeds = pickle.load(feedfileObject)
-		if unix: 
+		if unix:
 			fcntl.flock(feedfileObject.fileno(), locktype)
-	if feeds: 
+	if feeds:
 		for feed in feeds[1:]:
-			if not hasattr(feed, 'active'): 
+			if not hasattr(feed, 'active'):
 				feed.active = True
-		
+
 	return feeds, feedfileObject
 
 def unlock(feeds, feedfileObject):
-	if not unix: 
+	if not unix:
 		pickle.dump(feeds, open(feedfile, 'w'))
-	else:	
+	else:
 		fd = open(feedfile+'.tmp', 'w')
 		pickle.dump(feeds, fd)
 		fd.flush()
@@ -536,15 +536,15 @@ def unlock(feeds, feedfileObject):
 		os.rename(feedfile+'.tmp', feedfile)
 		fcntl.flock(feedfileObject.fileno(), fcntl.LOCK_UN)
 
-#@timelimit(FEED_TIMEOUT)		
+#@timelimit(FEED_TIMEOUT)
 def parse(url, etag, modified):
 	if PROXY == '':
 		return feedparser.parse(url, etag, modified)
 	else:
 		proxy = urllib2.ProxyHandler( {"http":PROXY} )
-		return feedparser.parse(url, etag, modified, handlers = [proxy])	
-	
-		
+		return feedparser.parse(url, etag, modified, handlers = [proxy])
+
+
 ### Program Functions ###
 
 def add(*args):
@@ -552,7 +552,7 @@ def add(*args):
 		urls, to = [args[0]], args[1]
 	else:
 		urls, to = args, None
-	
+
 	feeds, feedfileObject = load()
 	if (feeds and not isstr(feeds[0]) and to is None) or (not len(feeds) and to is None):
 		print "No email address has been defined. Please run 'r2e email emailaddress' or"
@@ -568,17 +568,17 @@ def run(num=None):
 		# We store the default to address as the first item in the feeds list.
 		# Here we take it out and save it for later.
 		default_to = ""
-		if feeds and isstr(feeds[0]): default_to = feeds[0]; ifeeds = feeds[1:] 
+		if feeds and isstr(feeds[0]): default_to = feeds[0]; ifeeds = feeds[1:]
 		else: ifeeds = feeds
-		
+
 		if num: ifeeds = [feeds[num]]
 		feednum = 0
-		
+
 		for f in ifeeds:
-			try: 
+			try:
 				feednum += 1
 				if not f.active: continue
-				
+
 				if VERBOSE: print >>warn, 'I: Processing [%d] "%s"' % (feednum, f.url)
 				r = {}
 				try:
@@ -586,7 +586,7 @@ def run(num=None):
 				except TimeoutError:
 					print >>warn, 'W: feed [%d] "%s" timed out' % (feednum, f.url)
 					continue
-				
+
 				# Handle various status conditions, as required
 				if 'status' in r:
 					if r.status == 301: f.url = r['url']
@@ -594,15 +594,15 @@ def run(num=None):
 						print >>warn, "W: feed gone; deleting", f.url
 						feeds.remove(f)
 						continue
-				
+
 				http_status = r.get('status', 200)
 				if VERBOSE > 1: print >>warn, "I: http status", http_status
 				http_headers = r.get('headers', {
-				  'content-type': 'application/rss+xml', 
+				  'content-type': 'application/rss+xml',
 				  'content-length':'1'})
 				exc_type = r.get("bozo_exception", Exception()).__class__
 				if http_status != 304 and not r.entries and not r.get('version', ''):
-					if http_status not in [200, 302]: 
+					if http_status not in [200, 302]:
 						print >>warn, "W: error %d [%d] %s" % (http_status, feednum, f.url)
 
 					elif contains(http_headers.get('content-type', 'rss'), 'html'):
@@ -613,13 +613,13 @@ def run(num=None):
 
 					elif hasattr(socket, 'timeout') and exc_type == socket.timeout:
 						print >>warn, "W: timed out on [%d] %s" % (feednum, f.url)
-					
+
 					elif exc_type == IOError:
 						print >>warn, 'W: "%s" [%d] %s' % (r.bozo_exception, feednum, f.url)
-					
+
 					elif hasattr(feedparser, 'zlib') and exc_type == feedparser.zlib.error:
 						print >>warn, "W: broken compression [%d] %s" % (feednum, f.url)
-					
+
 					elif exc_type in socket_errors:
 						exc_reason = r.bozo_exception.args[1]
 						print >>warn, "W: %s [%d] %s" % (exc_reason, feednum, f.url)
@@ -630,13 +630,13 @@ def run(num=None):
 						else:
 							exc_reason = r.bozo_exception.reason
 						print >>warn, "W: %s [%d] %s" % (exc_reason, feednum, f.url)
-					
+
 					elif exc_type == AttributeError:
 						print >>warn, "W: %s [%d] %s" % (r.bozo_exception, feednum, f.url)
-					
+
 					elif exc_type == KeyboardInterrupt:
 						raise r.bozo_exception
-						
+
 					elif r.bozo:
 						print >>warn, 'E: error in [%d] "%s" feed (%s)' % (feednum, f.url, r.get("bozo_exception", "can't process"))
 
@@ -652,25 +652,25 @@ def run(num=None):
 						print >>warn, "Python", sys.version
 						print >>warn, "=== END HERE ==="
 					continue
-				
+
 				r.entries.reverse()
-				
+
 				for entry in r.entries:
 					id = getID(entry)
-					
+
 					# If TRUST_GUID isn't set, we get back hashes of the content.
 					# Instead of letting these run wild, we put them in context
 					# by associating them with the actual ID (if it exists).
-					
+
 					frameid = entry.get('id')
 					if not(frameid): frameid = id
 					if type(frameid) is DictType:
 						frameid = frameid.values()[0]
-					
+
 					# If this item's ID is in our database
 					# then it's already been sent
 					# and we don't need to do anything more.
-					
+
 					if frameid in f.seen:
 						if f.seen[frameid] == id: continue
 
@@ -678,7 +678,7 @@ def run(num=None):
 						print "No default email address defined. Please run 'r2e email emailaddress'"
 						print "Ignoring feed %s" % f.url
 						break
-					
+
 					if 'title_detail' in entry and entry.title_detail:
 						title = entry.title_detail.value
 						if contains(entry.title_detail.type, 'html'):
@@ -687,25 +687,25 @@ def run(num=None):
 						title = getContent(entry)[:70]
 
 					title = title.replace("\n", " ").strip()
-					
+
 					datetime = time.gmtime()
 
 					if DATE_HEADER:
 						for datetype in DATE_HEADER_ORDER:
 							kind = datetype+"_parsed"
 							if kind in entry and entry[kind]: datetime = entry[kind]
-						
+
 					link = entry.get('link', "")
-					
+
 					from_addr = getEmail(r, entry)
-					
+
 					name = h2t.unescape(getName(r, entry))
 					fromhdr = formataddr((name, from_addr,))
 					tohdr = (f.to or default_to)
 					subjecthdr = title
 					datehdr = time.strftime("%a, %d %b %Y %H:%M:%S -0000", datetime)
 					useragenthdr = "rss2email"
-					
+
 					# Add post tags, if available
 					tagline = getTags(entry)
 
@@ -716,14 +716,14 @@ def run(num=None):
 							if pos > 0:
 								extraheaders[hdr[:pos]] = hdr[pos+1:].strip()
 							else:
-								print >>warn, "W: malformed BONUS HEADER", BONUS_HEADER	
-					
+								print >>warn, "W: malformed BONUS HEADER", BONUS_HEADER
+
 					entrycontent = getContent(entry, HTMLOK=HTML_MAIL)
 					contenttype = 'plain'
 					content = ''
 					if USE_CSS_STYLING and HTML_MAIL:
 						contenttype = 'html'
-						content = "<html>\n" 
+						content = "<html>\n"
 						content += '<head><style><!--' + STYLE_SHEET + '//--></style></head>\n'
 						content += '<body>\n'
 						content += '<div id="entry">\n'
@@ -734,7 +734,7 @@ def run(num=None):
 							body = entrycontent[1].strip()
 						else:
 							body = entrycontent.strip()
-						if body != '':	
+						if body != '':
 							content += '<div id="body"><table><tr><td>\n' + body + '</td></tr></table></div>\n'
 						content += '\n<p class="footer">URL: <a href="'+link+'">'+link+'</a>'
 						if hasattr(entry,'enclosures'):
@@ -754,15 +754,15 @@ def run(num=None):
 									content += '<br/>Via: <a href="'+extraurl+'">'+viatitle+'</a>\n'
 						content += '</p></div>\n'
 						content += "\n\n</body></html>"
-					else:	
+					else:
 						if ishtml(entrycontent):
 							contenttype = 'html'
-							content = "<html>\n" 
-							content = ("<html><body>\n\n" + 
+							content = "<html>\n"
+							content = ("<html><body>\n\n" +
 							           '<h1><a href="'+link+'">'+subjecthdr+'</a></h1>\n\n' +
 							           entrycontent[1].strip() + # drop type tag (HACK: bad abstraction)
 							           '<p>URL: <a href="'+link+'">'+link+'</a></p>' )
-							           
+
 							if hasattr(entry,'enclosures'):
 								for enclosure in entry.enclosures:
 									if enclosure.url != "":
@@ -771,7 +771,7 @@ def run(num=None):
 								for extralink in entry.links:
 									if ('rel' in extralink) and extralink['rel'] == u'via':
 										content += 'Via: <a href="'+extralink['href']+'">'+extralink['title']+'</a><br/>\n'
-                                                                
+
 							content += ("\n</body></html>")
 						else:
 							content = entrycontent.strip() + "\n\nURL: "+link
@@ -785,9 +785,9 @@ def run(num=None):
 										content += '<a href="'+extralink['href']+'">Via: '+extralink['title']+'</a>\n'
 
 					smtpserver = send(fromhdr, tohdr, subjecthdr, content, contenttype, extraheaders, smtpserver)
-			
+
 					f.seen[frameid] = id
-					
+
 				f.etag, f.modified = r.get('etag', None), r.get('modified', None)
 			except (KeyboardInterrupt, SystemExit):
 				raise
@@ -804,7 +804,7 @@ def run(num=None):
 				print >>warn, "=== END HERE ==="
 				continue
 
-	finally:		
+	finally:
 		unlock(feeds, feedfileObject)
 		if smtpserver:
 			smtpserver.quit()
@@ -812,7 +812,7 @@ def run(num=None):
 def list():
 	feeds, feedfileObject = load(lock=0)
 	default_to = ""
-	
+
 	if feeds and isstr(feeds[0]):
 		default_to = feeds[0]; ifeeds = feeds[1:]; i=1
 		print "default email:", default_to
@@ -827,7 +827,7 @@ def list():
 def opmlexport():
 	import xml.sax.saxutils
 	feeds, feedfileObject = load(lock=0)
-	
+
 	if feeds:
 		print '<?xml version="1.0" encoding="UTF-8"?>\n<opml version="1.0">\n<head>\n<title>rss2email OPML export</title>\n</head>\n<body>'
 		for f in feeds[1:]:
@@ -854,15 +854,15 @@ def opmlimport(importfile):
 		sys.exit(1)
 
 	feeds, feedfileObject = load(lock=1)
-	
+
 	import xml.sax.saxutils
-	
+
 	for f in newfeeds:
 		if f.hasAttribute('xmlUrl'):
 			feedurl = f.getAttribute('xmlUrl')
 			print 'Adding %s' % xml.sax.saxutils.unescape(feedurl)
 			feeds.append(Feed(feedurl, None))
-			
+
 	unlock(feeds, feedfileObject)
 
 def delete(n):
@@ -877,7 +877,7 @@ def delete(n):
 		if n != len(feeds):
 			print >>warn, "W: feed IDs have changed, list before deleting again"
 	unlock(feeds, feedfileObject)
-	
+
 def toggleactive(n, active):
 	feeds, feedfileObject = load()
 	if (n == 0) and (feeds and isstr(feeds[0])):
@@ -889,7 +889,7 @@ def toggleactive(n, active):
 		print >>warn, "%s feed %s" % (action, feeds[n].url)
 		feeds[n].active = active
 	unlock(feeds, feedfileObject)
-	
+
 def reset():
 	feeds, feedfileObject = load()
 	if feeds and isstr(feeds[0]):
@@ -900,9 +900,9 @@ def reset():
 		f.seen = {}
 		f.etag = None
 		f.modified = None
-	
+
 	unlock(feeds, feedfileObject)
-	
+
 def email(addr):
 	feeds, feedfileObject = load()
 	if feeds and isstr(feeds[0]): feeds[0] = addr
@@ -914,8 +914,8 @@ def email(addr):
 	try:
 		if len(args) < 3: raise InputError, "insufficient args"
 		feedfile, action, args = args[1], args[2], args[3:]
-		
-		if action == "run": 
+
+		if action == "run":
 			if args and args[0] == "--no-send":
 				def send(sender, recipient, subject, body, contenttype, extraheaders=None, smtpserver=None):
 					if VERBOSE: print 'Not sending:', unu(subject)
@@ -931,7 +931,7 @@ def send(sender, recipient, subject, body, contenttype, extraheaders=None, smtps
 
 		elif action == "add": add(*args)
 
-		elif action == "new": 
+		elif action == "new":
 			if len(args) == 1: d = [args[0]]
 			else: d = []
 			pickle.dump(d, open(feedfile, 'w'))
@@ -961,14 +961,14 @@ def send(sender, recipient, subject, body, contenttype, extraheaders=None, smtps
 
 		elif action == "opmlexport": opmlexport()
 
-		elif action == "opmlimport": 
+		elif action == "opmlimport":
 			if not args:
 				raise InputError, "OPML import '%s' requires a filename argument" % action
 			opmlimport(args[0])
 
 		else:
 			raise InputError, "Invalid action"
-		
+
 	except InputError, e:
 		print "E:", e
 		print
diff --git a/test_rss2email.py b/test_rss2email.py
index 8ab532f..437db85 100644
--- a/test_rss2email.py
+++ b/test_rss2email.py
@@ -50,7 +50,7 @@ def test_no_friendly_name(self):
         name = getName(0, 0)
         rss2email.NO_FRIENDLY_NAME = 0
         self.assertEqual(name, '')
-        
+
     def test_override_from(self):
         # have to fake url attribute because it is only set on downloaded feeds
         urlToOverride = 'http://example.com/feed/'
@@ -96,7 +96,7 @@ def test_multiple_tags(self):
         tagline = getTags(entry)
         self.assertEqual(tagline, "tag1,tag2")
 
-    
+
 
 if __name__ == '__main__':
     unittest.main()

From cd230d4be6513b1fc6e276079b12cde3a5685d7c Mon Sep 17 00:00:00 2001
From: Arun Persaud <arun@nubati.net>
Date: Mon, 15 Oct 2012 10:38:42 -0700
Subject: [PATCH 2/2] added comment about customization of CHARSET_LIST (should
 be done in config.py)

---
 rss2email.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rss2email.py b/rss2email.py
index 9d08eaa..73350cb 100644
--- a/rss2email.py
+++ b/rss2email.py
@@ -109,6 +109,7 @@
 
 # To most correctly encode emails with international characters, we iterate through the list below and use the first character set that works
 # Eventually (and theoretically) ISO-8859-1 and UTF-8 are our catch-all failsafes
+# CHARSET_LIST gets customized in configy.py, so any changes there will overwrite this setting
 CHARSET_LIST='US-ASCII', 'BIG5', 'ISO-2022-JP', 'ISO-8859-1', 'UTF-8'
 
 from email.MIMEText import MIMEText