Commit cf5df6c9 by Frank Wiles

Added stemming support.

parent d72afae0
...@@ -4,17 +4,28 @@ taggit.contrib.suggest ...@@ -4,17 +4,28 @@ taggit.contrib.suggest
This add on module allows you to easily associate keywords and regular This add on module allows you to easily associate keywords and regular
expressions with a Tag object. This is useful to help keep your database expressions with a Tag object. This is useful to help keep your database
getting filled up with several similar tags that really represent the same thing. getting filled up with several similar tags that really represent the same
thing.
For example, if your site is a humor site you might want to collapse all of For example, if your site is a humor site you might want to collapse all of
#fun, #funny, #funnies, #hilarious, #rofl, and #lol into one tag #funny. The #fun, #funny, #funnies, #hilarious, #rofl, and #lol into one tag #funny. The
suggest_tags() function in taggit.contrib.suggest.utils will give you a list suggest_tags() function in taggit.contrib.suggest.utils will give you a list
of tags that seem appropriate for the text content given to it. of tags that seem appropriate for the text content given to it.
Usage
=====
Put 'taggit.contrib.suggest' into INSTALLED_APPS and run a syncdb to create
the necessary models. This will add Keywords and Regular Expression inlines
to the default django-taggit admin. Once you've populated those based on your
site you can do a simple:
from taggit.contrib.suggest.utils import suggest_tags
tags = suggest_tags(content='Some textual content...')
TODO TODO
==== ====
* Basic stemming of the keywords for you! Which will require the Python NLTK.
* In a later version I hope to a simple way to help determine keywords for you * In a later version I hope to a simple way to help determine keywords for you
automatically, by learning from your past tags and content. automatically, by learning from your past tags and content.
...@@ -4,15 +4,29 @@ from django.db import models ...@@ -4,15 +4,29 @@ from django.db import models
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
from taggit.models import Tag from taggit.models import Tag
HAS_PYSTEMMER = True
try:
import Stemmer
except ImportError:
HAS_PYSTEMMER = False
class TagKeyword(models.Model): class TagKeyword(models.Model):
""" Model to associate simple keywords to a Tag """ """ Model to associate simple keywords to a Tag """
tag = models.ForeignKey(Tag, related_name='keywords') tag = models.ForeignKey(Tag, related_name='keywords')
keyword = models.CharField(max_length=30) keyword = models.CharField(max_length=30)
stem = models.CharField(max_length=30)
def __unicode__(self): def __unicode__(self):
return "Keyword '%s' for Tag '%s'" % (self.keyword, self.tag.name) return "Keyword '%s' for Tag '%s'" % (self.keyword, self.tag.name)
def save(self, *args, **kwargs):
""" Stem the keyword on save if they have PyStemmer """
language = kwargs.pop('stemmer-language', 'english')
if not self.id and not self.stem and HAS_PYSTEMMER:
stemmer = Stemmer.Stemmer(language)
self.stem = stemmer.stemWord(self.keyword)
super(TagKeyword,self).save(*args,**kwargs)
def validate_regexp(value): def validate_regexp(value):
""" Make sure we have a valid regular expression """ """ Make sure we have a valid regular expression """
try: try:
......
...@@ -4,12 +4,6 @@ from taggit.contrib.suggest.models import TagKeyword, TagRegExp ...@@ -4,12 +4,6 @@ from taggit.contrib.suggest.models import TagKeyword, TagRegExp
from django.conf import settings from django.conf import settings
HAS_NLTK = True
try:
from nltk.stemmer.porter import PorterStemmer
except ImportError:
HAS_NLTK = False
def _suggest_keywords(content=None): def _suggest_keywords(content=None):
""" Suggest by keywords """ """ Suggest by keywords """
suggested_keywords = set() suggested_keywords = set()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment