Commit 9ec89d70 by Frank Wiles

Finished intial creation of taggit.contrib.suggest

  -- Added TAGGIT_SUGGEST_MAX_LENGTH setting option
parent bfeafd17
taggit.contrib.suggest
======================
This add on module allows you to easily associate keywords and regular
expressions with a Tag object. This is useful to help keep your database
getting filled up with several similar tags that really represent the same thing.
For example, if your site is a humor site you might want to collapse all of
#fun, #funny, #funnies, #hilarious, #rofl, and #lol into one tag #funny. The
suggest_tags() function in taggit.contrib.suggest.utils will give you a list
of tags that seem appropriate for the text content given to it.
It will also do some basic stemming of the keywords for you! Which requires the
Python NLTK.
In a later version I hope to a simple way to help determine keywords for you
automatically, by learning from your past tags and content.
from django.contrib import admin
from taggit.models import Tag
from taggit.admin import TaggedItemInline
from taggit.contrib.suggest.models import TagKeyword, TagRegExp
class TagKeywordInline(admin.StackedInline):
model = TagKeyword
class TagRegExpInline(admin.StackedInline):
model = TagRegExp
class TagSuggestAdmin(admin.ModelAdmin):
inlines = [
TaggedItemInline,
TagKeywordInline,
TagRegExpInline,
]
admin.site.unregister(Tag)
admin.site.register(Tag, TagSuggestAdmin)
import re
from django.db import models
from django.core.exceptions import ValidationError
from taggit.models import Tag
class TagKeyword(models.Model):
""" Model to associate simple keywords to a Tag """
tag = models.ForeignKey(Tag, related_name='keywords')
keyword = models.CharField(max_length=30)
def __unicode__(self):
return "Keyword '%s' for Tag '%s'" % (self.keyword, self.tag.name)
def validate_regexp(value):
""" Make sure we have a valid regular expression """
try:
re.compile(value)
except:
raise ValidationError('Please enter a valid regular expression')
class TagRegExp(models.Model):
""" Model to associate regular expressions with a Tag """
tag = models.ForeignKey(Tag, related_name='regexps')
name = models.CharField(max_length=30)
regexp = models.CharField(max_length=250,
validators=[validate_regexp],
)
case_insensitive = models.BooleanField(default=False)
def __unicode__(self):
return self.name
def save(self, *args, **kwargs):
""" Make sure to validate """
self.full_clean()
super(TagRegExp,self).save(*args,**kwargs)
from taggit.contrib.suggest.tests.tests import AddKeywordCase, AddRegexpCase, SuggestCase
DATABASE_ENGINE = 'sqlite3'
INSTALLED_APPS = [
'django.contrib.contenttypes',
'taggit',
'taggit.contrib.suggest',
]
from django.test import TestCase
from django.core.exceptions import ValidationError
from taggit.models import Tag
from taggit.contrib.suggest.models import TagKeyword, TagRegExp
from taggit.contrib.suggest.utils import suggest_tags
class AddKeywordCase(TestCase):
def test_adding_keyword(self):
new_tag = Tag.objects.create(name='ku')
new_keyword = TagKeyword.objects.create(
tag=new_tag,
keyword='kansas university')
self.assertTrue(new_keyword)
self.assertTrue(new_keyword.tag == new_tag)
class AddRegexpCase(TestCase):
def test_adding_regexp(self):
new_tag = Tag.objects.create(name='ku')
new_regexp = TagRegExp.objects.create(
tag=new_tag,
name='Find University of Kansas',
regexp='University\s+of\s+Kansas')
self.assertTrue(new_regexp)
self.assertTrue(new_regexp.tag == new_tag)
class SuggestCase(TestCase):
def test_simple_suggest(self):
ku_tag = Tag.objects.create(name='ku')
ku_keyword1 = TagKeyword.objects.create(
tag=ku_tag,
keyword='kansas university')
suggested_tags = suggest_tags(content='I used to be a student at kansas university')
self.assertTrue(ku_tag in suggested_tags)
def test_regexp_suggest(self):
ku_tag = Tag.objects.create(name='ku')
new_regexp = TagRegExp.objects.create(
tag=ku_tag,
name='Find University of Kansas',
regexp='University\s+of\s+Kansas')
suggested_tags = suggest_tags(content='I was once a student at the University of Kansas')
self.assertTrue(ku_tag in suggested_tags)
def test_bad_regexp(self):
ku_tag = Tag.objects.create(name='ku')
ku_keyword1 = TagKeyword.objects.create(
tag=ku_tag,
keyword='kansas university')
new_regexp = TagRegExp(
tag=ku_tag,
name='Find University of Kansas',
regexp='University\s+of(\s+Kansas')
self.assertRaises(ValidationError, new_regexp.save)
suggested_tags = suggest_tags(content='I was once a student at the University of Kansas. Also known as kansas university by the way.')
self.assertTrue(ku_tag in suggested_tags)
import re
from taggit.models import Tag
from taggit.contrib.suggest.models import TagKeyword, TagRegExp
from django.conf import settings
HAS_NLTK = True
try:
from nltk.stemmer.porter import PorterStemmer
except ImportError:
HAS_NLTK = False
def _suggest_keywords(content=None):
""" Suggest by keywords """
suggested_keywords = set()
keywords = TagKeyword.objects.values_list('keyword', 'tag')
for k in keywords:
if k[0] in content:
suggested_keywords.add(str(k[1]))
return suggested_keywords
def _suggest_regexps(content=None):
""" Suggest by regular expressions """
# Grab all regular expressions and compile them
suggested_regexps = set()
regexps = set()
regexp_keywords = TagRegExp.objects.values_list(
'regexp',
'tag',
'case_insensitive')
for r in regexp_keywords:
try:
if r[2]:
reg = re.compile(r[0], re.IGNORE_CASE)
else:
reg = re.compile(r[0])
except:
# Skip any badly formed regular expressions silently
continue
regexps.add((reg,r[1]))
# Look for our regular expressions in the content
for r in regexps:
if r[0].search(content):
suggested_regexps.add(str(r[1]))
return suggested_regexps
def suggest_tags(content=None):
""" Suggest tags based on text content """
if not content:
return
MAX_LENGTH = getattr(settings, 'TAGGIT_SUGGEST_MAX_LENGTH', None)
if MAX_LENGTH:
content = content[0:settings.TAGGIT_SUGGEST_MAX_LENGTH]
suggested_keywords = _suggest_keywords(content)
suggested_regexps = _suggest_regexps(content)
suggested_tag_ids = suggested_keywords | suggested_regexps
# Turn the found IDs into tags
where_string = 'id IN (%s)' % ','.join(suggested_tag_ids)
tags = Tag.objects.extra(where=[where_string])
return tags
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment