Commit e07091a4 by Carl Meyer

Merge branch 'smart_tagstring_parse'

parents 54ce3c1b e3ea2e60
......@@ -9,3 +9,4 @@ Alex Gaynor <alex.gaynor@gmail.com>
Rob Hudson <rob@cogit8.org>
Carl Meyer <carl@oddbird.net>
Frank Wiles
Jonathan Buchanan
Changelog
=========
master (unreleased)
~~~~~~~~~~~~~~~~~~~
* Smarter tagstring parsing for form field; ported from Jonathan
Buchanan's `django-tagging
<http://django-tagging.googlecode.com>`_. Now supports tags
containing commas. See :ref:`tags-in-forms` for details.
.. _tags-in-forms:
Tags in forms
=============
The ``TaggableManager`` will show up automatically as a field in a
``ModelForm`` or in the admin. Tag input via the form field is parsed
as follows:
* If the input doesn't contain any commas or double quotes, it is simply
treated as a space-delimited list of tag names.
* If the input does contain either of these characters:
* Groups of characters which appear between double quotes take
precedence as multi-word tags (so double quoted tag names may
contain commas). An unclosed double quote will be ignored.
* Otherwise, if there are any unquoted commas in the input, it will
be treated as comma-delimited. If not, it will be treated as
space-delimited.
Examples:
====================== ======================================= ================================================
Tag input string Resulting tags Notes
====================== ======================================= ================================================
apple ball cat [``apple``], [``ball``], [``cat``] No commas, so space delimited
apple, ball cat [``apple``], [``ball cat``] Comma present, so comma delimited
"apple, ball" cat dog [``apple, ball``], [``cat``], [``dog``] All commas are quoted, so space delimited
"apple, ball", cat dog [``apple, ball``], [``cat dog``] Contains an unquoted comma, so comma delimited
apple "ball cat" dog [``apple``], [``ball cat``], [``dog``] No commas, so space delimited
"apple" "ball dog [``apple``], [``ball``], [``dog``] Unclosed double quote is ignored
====================== ======================================= ================================================
......@@ -19,3 +19,4 @@ And then to any model you want tagging on do the following::
# ... fields here
tags = TaggableManager()
......@@ -11,9 +11,11 @@ for known issues with older versions of Django), and Python 2.4-2.X.
:maxdepth: 2
getting_started
forms
api
custom_through
issues
changelog
Indices and tables
==================
......
from django import forms
from taggit.utils import parse_tags
from taggit.utils import parse_tags, edit_string_for_tags
class TagWidget(forms.TextInput):
def render(self, name, value, attrs=None):
if value is not None and not isinstance(value, basestring):
value = ", ".join(o.tag.name for o in value.select_related("tag"))
value = edit_string_for_tags([o.tag for o in value.select_related("tag")])
return super(TagWidget, self).render(name, value, attrs)
class TagField(forms.CharField):
......
from unittest import TestCase as UnitTestCase
from django.test import TestCase, TransactionTestCase
from taggit.models import Tag, TaggedItem
from taggit.utils import parse_tags, edit_string_for_tags
from taggit.tests.forms import FoodForm, DirectFoodForm, CustomPKFoodForm
from taggit.tests.models import (Food, Pet, HousePet, DirectFood, DirectPet,
DirectHousePet, TaggedPet, CustomPKFood, CustomPKPet, CustomPKHousePet,
......@@ -208,8 +211,13 @@ class TaggableFormTestCase(BaseTaggingTestCase):
self.assert_tags_equal(raspberry.tags.all(), [])
f = self.form_class(instance=apple)
self.assertEqual(str(f), """<tr><th><label for="id_name">Name:</label></th><td><input id="id_name" type="text" name="name" value="apple" maxlength="50" /></td></tr>\n<tr><th><label for="id_tags">Tags:</label></th><td><input type="text" name="tags" value="green, red, yummy, delicious" id="id_tags" /></td></tr>""")
self.assertEqual(str(f), """<tr><th><label for="id_name">Name:</label></th><td><input id="id_name" type="text" name="name" value="apple" maxlength="50" /></td></tr>\n<tr><th><label for="id_tags">Tags:</label></th><td><input type="text" name="tags" value="delicious green red yummy" id="id_tags" /></td></tr>""")
apple.tags.add('has,comma')
f = self.form_class(instance=apple)
self.assertEqual(str(f), """<tr><th><label for="id_name">Name:</label></th><td><input id="id_name" type="text" name="name" value="apple" maxlength="50" /></td></tr>\n<tr><th><label for="id_tags">Tags:</label></th><td><input type="text" name="tags" value="delicious green red yummy &quot;has,comma&quot;" id="id_tags" /></td></tr>""")
class TaggableFormDirectTestCase(TaggableFormTestCase):
form_class = DirectFoodForm
food_model = DirectFood
......@@ -217,3 +225,79 @@ class TaggableFormDirectTestCase(TaggableFormTestCase):
class TaggableFormCustomPKTestCase(TaggableFormTestCase):
form_class = CustomPKFoodForm
food_model = CustomPKFood
class TagStringParseTestCase(UnitTestCase):
"""
Ported from Jonathan Buchanan's `django-tagging
<http://django-tagging.googlecode.com/>`_
"""
def test_with_simple_space_delimited_tags(self):
""" Test with simple space-delimited tags. """
self.assertEquals(parse_tags('one'), [u'one'])
self.assertEquals(parse_tags('one two'), [u'one', u'two'])
self.assertEquals(parse_tags('one two three'), [u'one', u'three', u'two'])
self.assertEquals(parse_tags('one one two two'), [u'one', u'two'])
def test_with_comma_delimited_multiple_words(self):
""" Test with comma-delimited multiple words.
An unquoted comma in the input will trigger this. """
self.assertEquals(parse_tags(',one'), [u'one'])
self.assertEquals(parse_tags(',one two'), [u'one two'])
self.assertEquals(parse_tags(',one two three'), [u'one two three'])
self.assertEquals(parse_tags('a-one, a-two and a-three'),
[u'a-one', u'a-two and a-three'])
def test_with_double_quoted_multiple_words(self):
""" Test with double-quoted multiple words.
A completed quote will trigger this. Unclosed quotes are ignored. """
self.assertEquals(parse_tags('"one'), [u'one'])
self.assertEquals(parse_tags('"one two'), [u'one', u'two'])
self.assertEquals(parse_tags('"one two three'), [u'one', u'three', u'two'])
self.assertEquals(parse_tags('"one two"'), [u'one two'])
self.assertEquals(parse_tags('a-one "a-two and a-three"'),
[u'a-one', u'a-two and a-three'])
def test_with_no_loose_commas(self):
""" Test with no loose commas -- split on spaces. """
self.assertEquals(parse_tags('one two "thr,ee"'), [u'one', u'thr,ee', u'two'])
def test_with_loose_commas(self):
""" Loose commas - split on commas """
self.assertEquals(parse_tags('"one", two three'), [u'one', u'two three'])
def test_tags_with_double_quotes_can_contain_commas(self):
""" Double quotes can contain commas """
self.assertEquals(parse_tags('a-one "a-two, and a-three"'),
[u'a-one', u'a-two, and a-three'])
self.assertEquals(parse_tags('"two", one, one, two, "one"'),
[u'one', u'two'])
def test_with_naughty_input(self):
""" Test with naughty input. """
# Bad users! Naughty users!
self.assertEquals(parse_tags(None), [])
self.assertEquals(parse_tags(''), [])
self.assertEquals(parse_tags('"'), [])
self.assertEquals(parse_tags('""'), [])
self.assertEquals(parse_tags('"' * 7), [])
self.assertEquals(parse_tags(',,,,,,'), [])
self.assertEquals(parse_tags('",",",",",",","'), [u','])
self.assertEquals(parse_tags('a-one "a-two" and "a-three'),
[u'a-one', u'a-three', u'a-two', u'and'])
def test_recreation_of_tag_list_string_representations(self):
plain = Tag.objects.create(name='plain')
spaces = Tag.objects.create(name='spa ces')
comma = Tag.objects.create(name='com,ma')
self.assertEquals(edit_string_for_tags([plain]), u'plain')
self.assertEquals(edit_string_for_tags([plain, spaces]), u'plain, spa ces')
self.assertEquals(edit_string_for_tags([plain, spaces, comma]), u'plain, spa ces, "com,ma"')
self.assertEquals(edit_string_for_tags([plain, comma]), u'plain "com,ma"')
self.assertEquals(edit_string_for_tags([comma, spaces]), u'"com,ma", spa ces')
from django.utils.encoding import force_unicode
from django.utils.functional import wraps
def parse_tags(tags):
if tags is None:
tags = ""
return [o.strip() for o in tags.split(',') if o.strip()]
def parse_tags(tagstring):
"""
Parses tag input, with multiple word input being activated and
delineated by commas and double quotes. Quotes take precedence, so
they may contain commas.
Returns a sorted list of unique tag names.
Ported from Jonathan Buchanan's `django-tagging
<http://django-tagging.googlecode.com/>`_
"""
if not tagstring:
return []
tagstring = force_unicode(tagstring)
# Special case - if there are no commas or double quotes in the
# input, we don't *do* a recall... I mean, we know we only need to
# split on spaces.
if u',' not in tagstring and u'"' not in tagstring:
words = list(set(split_strip(tagstring, u' ')))
words.sort()
return words
words = []
buffer = []
# Defer splitting of non-quoted sections until we know if there are
# any unquoted commas.
to_be_split = []
saw_loose_comma = False
open_quote = False
i = iter(tagstring)
try:
while 1:
c = i.next()
if c == u'"':
if buffer:
to_be_split.append(u''.join(buffer))
buffer = []
# Find the matching quote
open_quote = True
c = i.next()
while c != u'"':
buffer.append(c)
c = i.next()
if buffer:
word = u''.join(buffer).strip()
if word:
words.append(word)
buffer = []
open_quote = False
else:
if not saw_loose_comma and c == u',':
saw_loose_comma = True
buffer.append(c)
except StopIteration:
# If we were parsing an open quote which was never closed treat
# the buffer as unquoted.
if buffer:
if open_quote and u',' in buffer:
saw_loose_comma = True
to_be_split.append(u''.join(buffer))
if to_be_split:
if saw_loose_comma:
delimiter = u','
else:
delimiter = u' '
for chunk in to_be_split:
words.extend(split_strip(chunk, delimiter))
words = list(set(words))
words.sort()
return words
def split_strip(string, delimiter=u','):
"""
Splits ``string`` on ``delimiter``, stripping each resulting string
and returning a list of non-empty strings.
Ported from Jonathan Buchanan's `django-tagging
<http://django-tagging.googlecode.com/>`_
"""
if not string:
return []
words = [w.strip() for w in string.split(delimiter)]
return [w for w in words if w]
def edit_string_for_tags(tags):
"""
Given list of ``Tag`` instances, creates a string representation of
the list suitable for editing by the user, such that submitting the
given string representation back without changing it will give the
same list of tags.
Tag names which contain commas will be double quoted.
If any tag name which isn't being quoted contains whitespace, the
resulting string of tag names will be comma-delimited, otherwise
it will be space-delimited.
Ported from Jonathan Buchanan's `django-tagging
<http://django-tagging.googlecode.com/>`_
"""
names = []
use_commas = False
for tag in tags:
name = tag.name
if u',' in name:
names.append('"%s"' % name)
continue
elif u' ' in name:
if not use_commas:
use_commas = True
names.append(name)
if use_commas:
glue = u', '
else:
glue = u' '
return glue.join(names)
def require_instance_manager(func):
@wraps(func)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment