<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>Rasila Garage &#187; email</title>
	<atom:link href="http://rasilagarage.com/tag/email/feed/" rel="self" type="application/rss+xml" />
	<link>http://rasilagarage.com</link>
	<description>Tuomas Rasila's blog about software and entrepreneurship</description>
	<lastBuildDate>Sun, 07 Mar 2010 09:11:27 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.org/?v=3.0</generator>
		<item>
		<title>Extracting email addresses from any text file with Python</title>
		<link>http://rasilagarage.com/2009/06/extracting-email-addresses-from-any-text-file-with-python/</link>
		<comments>http://rasilagarage.com/2009/06/extracting-email-addresses-from-any-text-file-with-python/#comments</comments>
		<pubDate>Sat, 13 Jun 2009 09:05:48 +0000</pubDate>
		<dc:creator>Tuomas Rasila</dc:creator>
				<category><![CDATA[Programming]]></category>
		<category><![CDATA[email]]></category>
		<category><![CDATA[python]]></category>

		<guid isPermaLink="false">http://rasilagarage.com/?p=215</guid>
		<description><![CDATA[Ok, this might sound like that we are in the spamming business now. Well, we are not. The case is that email address is typically the only per-person unique key in CRM data. These couple of lines of Python will extract email addresses from any text file, e.g a HTML-file. This script will also make [...]]]></description>
			<content:encoded><![CDATA[<p><img class="alignright size-medium wp-image-217" title="picture-11" src="http://rasilagarage.com/wp-content/uploads/2009/06/picture-11-300x190.png" alt="picture-11" width="300" height="190" />Ok, this might sound like that we are in the spamming business now. Well, we are not. The case is that email address is typically the only per-person unique key in CRM data. These couple of lines of Python will extract email addresses from any text file, e.g a HTML-file. This script will also make list unique so if the same email address is listed many times in the original data, it will be only once in the output. Enjoy:</p>
<pre class="brush:python">
#!/usr/bin/env python
# coding: utf-8

import os
import re
import sys

def grab_email(file):
    """Try and grab all emails addresses found within a given file."""
    email_pattern = re.compile(r'\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b',
                               re.IGNORECASE)
    found = set()
    if os.path.isfile(file):
        for line in open(file, 'r'):
            found.update(email_pattern.findall(line))
    for email_address in found:
        print email_address

if __name__ == '__main__':
    grab_email(sys.argv[1])
</pre>
]]></content:encoded>
			<wfw:commentRss>http://rasilagarage.com/2009/06/extracting-email-addresses-from-any-text-file-with-python/feed/</wfw:commentRss>
		<slash:comments>4</slash:comments>
		</item>
	</channel>
</rss>
