<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>Joseph Wilk &#187; Datamining</title>
	<atom:link href="http://blog.josephwilk.net/tag/datamining/feed" rel="self" type="application/rss+xml" />
	<link>http://blog.josephwilk.net</link>
	<description>on AI, The Web, Usability, Testing &#38; Software process</description>
	<lastBuildDate>Tue, 26 Jul 2011 17:14:14 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.org/?v=3.1.2</generator>
		<item>
		<title>Mining Cucumber Features</title>
		<link>http://blog.josephwilk.net/cucumber/mining-cucumber-features.html</link>
		<comments>http://blog.josephwilk.net/cucumber/mining-cucumber-features.html#comments</comments>
		<pubDate>Tue, 26 Jul 2011 17:00:23 +0000</pubDate>
		<dc:creator>Joseph Wilk</dc:creator>
				<category><![CDATA[Cucumber]]></category>
		<category><![CDATA[Datamining]]></category>
		<category><![CDATA[metrics]]></category>
		<category><![CDATA[Testing]]></category>

		<guid isPermaLink="false">http://blog.josephwilk.net/?p=2261</guid>
		<description><![CDATA[Failure Rates vs Change Rates I&#8217;ve been spending a lot of  time recently data mining our test suite at Songkick.com. I&#8217;ve been using our own internal version of www.limited-red.com which records all our test fails for every build on our continuous integration server. With the Cucumber features I decided to plot the number of times a feature file [...]]]></description>
		<wfw:commentRss>http://blog.josephwilk.net/cucumber/mining-cucumber-features.html/feed</wfw:commentRss>
		<slash:comments>3</slash:comments>
		</item>
		<item>
		<title>Latent Semantic Analysis in Python</title>
		<link>http://blog.josephwilk.net/projects/latent-semantic-analysis-in-python.html</link>
		<comments>http://blog.josephwilk.net/projects/latent-semantic-analysis-in-python.html#comments</comments>
		<pubDate>Wed, 19 Dec 2007 11:14:42 +0000</pubDate>
		<dc:creator>Joseph Wilk</dc:creator>
				<category><![CDATA[Information retrieval]]></category>
		<category><![CDATA[Projects]]></category>
		<category><![CDATA[Datamining]]></category>
		<category><![CDATA[Python]]></category>
		<category><![CDATA[Search]]></category>

		<guid isPermaLink="false">http://www.joesniff.co.uk/projects/latent-semantic-analysis-in-python.html</guid>
		<description><![CDATA[Latent Semantic Analysis (LSA) is a mathematical method that tries to bring out latent relationships within a collection of documents. Rather than looking at each document isolated from the others it looks at all the documents as a whole and the terms within them to identify relationships. An example of LSA: Using a search engine [...]]]></description>
		<wfw:commentRss>http://blog.josephwilk.net/projects/latent-semantic-analysis-in-python.html/feed</wfw:commentRss>
		<slash:comments>11</slash:comments>
		</item>
		<item>
		<title>Building a Vector Space Search Engine in Python</title>
		<link>http://blog.josephwilk.net/projects/building-a-vector-space-search-engine-in-python.html</link>
		<comments>http://blog.josephwilk.net/projects/building-a-vector-space-search-engine-in-python.html#comments</comments>
		<pubDate>Tue, 27 Nov 2007 08:08:49 +0000</pubDate>
		<dc:creator>Joseph Wilk</dc:creator>
				<category><![CDATA[Critique]]></category>
		<category><![CDATA[Information retrieval]]></category>
		<category><![CDATA[Projects]]></category>
		<category><![CDATA[Datamining]]></category>
		<category><![CDATA[Python]]></category>
		<category><![CDATA[Search]]></category>
		<category><![CDATA[Vectors]]></category>

		<guid isPermaLink="false">http://www.joesniff.co.uk/wordpress/projects/building-a-vector-space-search-engine-in-python.html</guid>
		<description><![CDATA[A vector space search involves converting documents into vectors. Each dimension within the vectors represents a term. If a document contains that term then the value within the vector is greater than zero. Here is an implementation of Vector space searching using python (2.4+). 1 Stemming &#38; Stop words Fetch all terms within documents and [...]]]></description>
		<wfw:commentRss>http://blog.josephwilk.net/projects/building-a-vector-space-search-engine-in-python.html/feed</wfw:commentRss>
		<slash:comments>9</slash:comments>
		</item>
		<item>
		<title>Automatic Tag Generation</title>
		<link>http://blog.josephwilk.net/projects/automatic-tag-generation.html</link>
		<comments>http://blog.josephwilk.net/projects/automatic-tag-generation.html#comments</comments>
		<pubDate>Mon, 22 Oct 2007 13:38:58 +0000</pubDate>
		<dc:creator>Joseph Wilk</dc:creator>
				<category><![CDATA[Information retrieval]]></category>
		<category><![CDATA[Projects]]></category>
		<category><![CDATA[AI]]></category>
		<category><![CDATA[categorization]]></category>
		<category><![CDATA[Datamining]]></category>
		<category><![CDATA[Statistics]]></category>

		<guid isPermaLink="false">http://www.joesniff.co.uk/wordpress/projects/whats-in-a-name.html</guid>
		<description><![CDATA[This project looked at dynamically generating suggestion tags for content. To simplify the task some constraints where introduced. The content which will be tagged is news articles with HTML markup. Only English content. I used the following HTML page to experiment on with suggestion tags: http://news.bbc.co.uk/1/hi/entertainment/6624223.stm To help evaluate the tagging methods I asked a [...]]]></description>
		<wfw:commentRss>http://blog.josephwilk.net/projects/automatic-tag-generation.html/feed</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
	</channel>
</rss>

