I have this code for table populating.
def add_tags(count):
print "Add tags"
insert_list = []
photo_pk_lower_bound = Photo.objects.all().order_by("id")[0].pk
photo_pk_upper_bound = Photo.objects.all().order_by("-id")[0].pk
for i in range(count):
t = Tag( tag = 'tag' + str(i) )
insert_list.append(t)
Tag.objects.bulk_create(insert_list)
for i in range(count):
random_photo_pk = randint(photo_pk_lower_bound, photo_pk_upper_bound)
p = Photo.objects.get( pk = random_photo_pk )
t = Tag.objects.get( tag = 'tag' + str(i) )
t.photos.add(p)
And this is the model:
class Tag(models.Model):
tag = models.CharField(max_length=20,unique=True)
photos = models.ManyToManyField(Photo)
As I understand this answer : Django: invalid keyword argument for this function I have to save tag objects first (due to ManyToMany field) and then attach photos to them through add(). But for large count this process takes too long. Are there any ways to refactor this code to make it faster?
In general I want to populate Tag model with random dummy data.
EDIT 1 (model for photo)
class Photo(models.Model):
photo = models.ImageField(upload_to="images")
created_date = models.DateTimeField(auto_now=True)
user = models.ForeignKey(User)
def __unicode__(self):
return self.photo.name
A ManyToMany field is used when a model needs to reference multiple instances of another model. Use cases include: A user needs to assign multiple categories to a blog post. A user wants to add multiple blog posts to a publication.
The add() function allows us to add an object to this ManyToManyField. And this is how to add an object to a ManyToManyField in Django.
TL;DR Use the "through" model to bulk insert m2m relationships.
"Tag.photos.through" => Django generated Model with 3 fields [ id, photo, tag ]
photo_tag_1 = Tag.photos.through(photo_id=1, tag_id=1)
photo_tag_2 = Tag.photos.through(photo_id=1, tag_id=2)
Tag.photos.through.objects.bulk_insert([photo_tag_1, photo_tag_2, ...])
This is the fastest way that I know of, I use this all the time to create test data. I can generate millions of records in minutes.
Edit from Georgy:
def add_tags(count):
Tag.objects.bulk_create([Tag(tag='tag%s' % t) for t in range(count)])
tag_ids = list(Tag.objects.values_list('id', flat=True))
photo_ids = Photo.objects.values_list('id', flat=True)
tag_count = len(tag_ids)
for photo_id in photo_ids:
tag_to_photo_links = []
shuffle(tag_ids)
rand_num_tags = randint(0, tag_count)
photo_tags = tag_ids[:rand_num_tags]
for tag_id in photo_tags:
# through is the model generated by django to link m2m between tag and photo
photo_tag = Tag.photos.through(tag_id=tag_id, photo_id=photo_id)
tag_to_photo_links.append(photo_tag)
Tag.photos.through.objects.bulk_create(tag_to_photo_links, batch_size=7000)
I didn't create the model to test, but the structure is there you might have to tweaks some stuff to make it work. Let me know if you run into any problems.
[edited]
As shown in Du D's answer, Django ManyToMany fields use a table called through that contains three columns: the ID of the relation, the ID of the object linked to and the ID of the object linked from. You can use bulk_create on through to bulk create ManyToMany relations.
As a quick example, you could bulk create Tag to Photo relations like this:
tag1 = Tag.objects.get(id=1)
tag2 = Tag.objects.get(id=2)
photo1 = Photo.objects.get(id=1)
photo2 = Photo.objects.get(id=2)
through_objs = [
Tag.photos.through(
photo_id=photo1.id,
tag_id=tag1.id,
),
Tag.photos.through(
photo_id=photo1.id,
tag_id=tag2.id,
),
Tag.photos.through(
photo_id=photo2.id,
tag_id=tag2.id,
),
]
Tag.photos.through.objects.bulk_create(through_objs)
Here is a general solution that you can run to set up ManyToMany relations between any list of object pairs.
from typing import Iterable
from collections import namedtuple
ManyToManySpec = namedtuple(
"ManyToManySpec", ["from_object", "to_object"]
)
def bulk_create_manytomany_relations(
model_from,
field_name: str,
model_from_name: str,
model_to_name: str,
specs: Iterable[ManyToManySpec]
):
through_objs = []
for spec in specs:
through_objs.append(
getattr(model_from, field_name).through(
**{
f"{model_from_name.lower()}_id": spec.from_object.id,
f"{model_to_name.lower()}_id": spec.to_object.id,
}
)
)
getattr(model_from, field_name).through.objects.bulk_create(through_objs)
tag1 = Tag.objects.get(id=1)
tag2 = Tag.objects.get(id=2)
photo1 = Photo.objects.get(id=1)
photo2 = Photo.objects.get(id=2)
bulk_create_manytomany_relations(
model_from=Tag,
field_name="photos",
model_from_name="tag",
model_to_name="photo",
specs=[
ManyToManySpec(from_object=tag1, to_object=photo1),
ManyToManySpec(from_object=tag1, to_object=photo2),
ManyToManySpec(from_object=tag2, to_object=photo2),
]
)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With