<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>Joseph Wilk &#187; Datamining</title>
	<atom:link href="http://blog.josephwilk.net/tag/datamining/feed" rel="self" type="application/rss+xml" />
	<link>http://blog.josephwilk.net</link>
	<description>on AI, The Web, Usability, Testing &#38; Software process</description>
	<lastBuildDate>Sat, 09 Jan 2010 18:27:09 +0000</lastBuildDate>
	<generator>http://wordpress.org/?v=2.9.1</generator>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
			<item>
		<title>Latent Semantic Analysis in Python</title>
		<link>http://blog.josephwilk.net/projects/latent-semantic-analysis-in-python.html</link>
		<comments>http://blog.josephwilk.net/projects/latent-semantic-analysis-in-python.html#comments</comments>
		<pubDate>Wed, 19 Dec 2007 11:14:42 +0000</pubDate>
		<dc:creator>Joseph Wilk</dc:creator>
				<category><![CDATA[Information retrieval]]></category>
		<category><![CDATA[Projects]]></category>
		<category><![CDATA[Datamining]]></category>
		<category><![CDATA[Python]]></category>
		<category><![CDATA[Search]]></category>

		<guid isPermaLink="false">http://www.joesniff.co.uk/projects/latent-semantic-analysis-in-python.html</guid>
		<description><![CDATA[Latent Semantic Analysis (LSA) is a mathematical method that tries to bring out latent relationships within a collection of documents. Rather than looking at each document isolated from the others it looks at all the documents as a whole and the terms within them to identify relationships.
An example of LSA:
Using a search engine search for [...]]]></description>
		<wfw:commentRss>http://blog.josephwilk.net/projects/latent-semantic-analysis-in-python.html/feed</wfw:commentRss>
		<slash:comments>11</slash:comments>
		</item>
		<item>
		<title>Building a Vector Space Search Engine in Python</title>
		<link>http://blog.josephwilk.net/projects/building-a-vector-space-search-engine-in-python.html</link>
		<comments>http://blog.josephwilk.net/projects/building-a-vector-space-search-engine-in-python.html#comments</comments>
		<pubDate>Tue, 27 Nov 2007 08:08:49 +0000</pubDate>
		<dc:creator>Joseph Wilk</dc:creator>
				<category><![CDATA[Critique]]></category>
		<category><![CDATA[Information retrieval]]></category>
		<category><![CDATA[Projects]]></category>
		<category><![CDATA[Datamining]]></category>
		<category><![CDATA[Python]]></category>
		<category><![CDATA[Search]]></category>
		<category><![CDATA[Vectors]]></category>

		<guid isPermaLink="false">http://www.joesniff.co.uk/wordpress/projects/building-a-vector-space-search-engine-in-python.html</guid>
		<description><![CDATA[A vector space search involves converting documents into vectors. Each dimension within the vectors represents a term. If a document contains that term then the value within the vector is greater than zero.
Here is an implementation of Vector space searching using python (2.4+).
1 Stemming &#38; Stop words
Fetch all terms within documents and clean &#8211; use [...]]]></description>
		<wfw:commentRss>http://blog.josephwilk.net/projects/building-a-vector-space-search-engine-in-python.html/feed</wfw:commentRss>
		<slash:comments>9</slash:comments>
		</item>
		<item>
		<title>Automatic Tag Generation</title>
		<link>http://blog.josephwilk.net/projects/automatic-tag-generation.html</link>
		<comments>http://blog.josephwilk.net/projects/automatic-tag-generation.html#comments</comments>
		<pubDate>Mon, 22 Oct 2007 13:38:58 +0000</pubDate>
		<dc:creator>Joseph Wilk</dc:creator>
				<category><![CDATA[Information retrieval]]></category>
		<category><![CDATA[Projects]]></category>
		<category><![CDATA[AI]]></category>
		<category><![CDATA[categorization]]></category>
		<category><![CDATA[Datamining]]></category>
		<category><![CDATA[Statistics]]></category>

		<guid isPermaLink="false">http://www.joesniff.co.uk/wordpress/projects/whats-in-a-name.html</guid>
		<description><![CDATA[This project looked at dynamically generating suggestion tags for content. To simplify the task some constraints where introduced.

The content which will be tagged is news articles with HTML markup.
Only English content.

I used the following HTML page to experiment on with suggestion tags: http://news.bbc.co.uk/1/hi/entertainment/6624223.stm

To help evaluate the tagging methods I asked a sample of people to [...]]]></description>
		<wfw:commentRss>http://blog.josephwilk.net/projects/automatic-tag-generation.html/feed</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
	</channel>
</rss>
