Skip to content

Commit 62e394f

Browse files
committed
Merge branch 'qihui/store_many_vectors' of git://github.com/xieqihui/NearPy into xieqihui-qihui/store_many_vectors
2 parents 9e1dfbd + efb6d13 commit 62e394f

5 files changed

Lines changed: 77 additions & 0 deletions

File tree

nearpy/engine.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,22 @@ def store_vector(self, v, data=None):
9696
self.storage.store_vector(lshash.hash_name, bucket_key,
9797
nv, data)
9898

99+
def store_many_vectors(self, vs, data=None):
100+
"""
101+
Store a batch of vectors.
102+
Hashes vector vs and stores them in all matching buckets in the storage.
103+
The data argument must be either None or a list of JSON-serializable
104+
object. It is stored with the vector and will be returned in search
105+
results.
106+
"""
107+
# We will store the normalized vector (used during retrieval)
108+
nvs = [unitvec(i) for i in vs]
109+
# Store vector in each bucket of all hashes
110+
for lshash in self.lshashes:
111+
bucket_keys = [lshash.hash_vector(i)[0] for i in vs]
112+
self.storage.store_many_vectors(lshash.hash_name, bucket_keys,
113+
nvs, data)
114+
99115
def delete_vector(self, data, v=None):
100116
"""
101117
Deletes vector v and his id (data) in all matching buckets in the storage.

nearpy/storage/storage.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,13 @@ def store_vector(self, hash_name, bucket_key, v, data):
3030
"""
3131
raise NotImplementedError
3232

33+
def store_many_vectors(self, hash_name, bucket_keys, vs, data):
34+
"""
35+
Store a batch of vectors.
36+
Stores vector and JSON-serializable data in bucket with specified key.
37+
"""
38+
raise NotImplementedError
39+
3340
def get_all_bucket_keys(self, hash_name):
3441
"""
3542
Returns all bucket keys for the given hash as iterable of strings

nearpy/storage/storage_memory.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@
2828
# THE SOFTWARE.
2929

3030
from future.utils import viewkeys
31+
from future.builtins import zip
3132
from nearpy.storage.storage import Storage
33+
import itertools
3234

3335

3436
class MemoryStorage(Storage):
@@ -50,6 +52,16 @@ def store_vector(self, hash_name, bucket_key, v, data):
5052
self.buckets[hash_name][bucket_key] = []
5153
self.buckets[hash_name][bucket_key].append((v, data))
5254

55+
def store_many_vectors(self, hash_name, bucket_keys, vs, data):
56+
"""
57+
Store a batch of vectors.
58+
Stores vector and JSON-serializable data in bucket with specified key.
59+
"""
60+
if data is None:
61+
data = itertools.repeat(data)
62+
for v, k, d in zip(vs, bucket_keys, data):
63+
self.store_vector(hash_name, k, v, d)
64+
5365
def get_all_bucket_keys(self, hash_name):
5466
return viewkeys(self.buckets[hash_name])
5567

nearpy/storage/storage_redis.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,24 @@ def store_vector(self, hash_name, bucket_key, v, data):
5050
"""
5151
Stores vector and JSON-serializable data in bucket with specified key.
5252
"""
53+
self._add_vector(hash_name, bucket_key, v, data, self.redis_object)
54+
55+
def store_many_vectors(self, hash_name, bucket_keys, vs, data):
56+
"""
57+
Store a batch of vectors in Redis.
58+
Stores vector and JSON-serializable data in bucket with specified key.
59+
"""
60+
with self.redis_object.pipeline() as pipeline:
61+
if data is None:
62+
data = [None] * len(vs)
63+
for bucket_key, data, v in zip(bucket_keys, data, vs):
64+
self._add_vector(hash_name, bucket_key, v, data, pipeline)
65+
pipeline.execute()
66+
67+
def _add_vector(self, hash_name, bucket_key, v, data, redis_object):
68+
'''
69+
Store vector and JSON-serializable data in bucket with specified key.
70+
'''
5371
redis_key = self._format_redis_key(hash_name, bucket_key)
5472

5573
val_dict = {}

tests/storage_tests.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from mockredis import MockRedis as Redis
2828

2929
from future.builtins import range
30+
from future.builtins import zip
3031

3132
from nearpy.storage import MemoryStorage, RedisStorage
3233

@@ -55,6 +56,24 @@ def check_store_vector(self, x):
5556
self.storage.clean_all_buckets()
5657
self.assertEqual(self.storage.get_bucket('testHash', bucket_key), [])
5758

59+
def check_store_many_vectors(self, xs):
60+
num_vector = len(xs)
61+
bucket_keys = list(map(str,
62+
list(range(10000000,
63+
10000000 + num_vector))))
64+
x_data = list(range(0, num_vector))
65+
self.storage.store_many_vectors('testHash', bucket_keys, xs, x_data)
66+
for bucket_key, x, data in zip(bucket_keys, xs, x_data):
67+
bucket = self.storage.get_bucket('testHash', bucket_key)
68+
self.assertEqual(len(bucket), 1)
69+
y, y_data = bucket[0]
70+
self.assertEqual(type(y), type(x))
71+
self.assertEqual(y.shape, x.shape)
72+
self.assertEqual(max(abs(y - x)), 0)
73+
self.assertEqual(y_data, data)
74+
self.storage.clean_all_buckets()
75+
self.assertEqual(self.storage.get_bucket('testHash', bucket_key), [])
76+
5877
def check_get_all_bucket_keys(self):
5978
x, x_data = numpy.ones(100), "data"
6079
hash_config = [
@@ -135,5 +154,10 @@ def test_store_zero(self):
135154
_, data = bucket[0]
136155
self.assertEqual(data, 0)
137156

157+
def test_store_many_vectors(self):
158+
x = numpy.random.randn(100, 10)
159+
self.check_store_many_vectors(x)
160+
161+
138162
if __name__ == '__main__':
139163
unittest.main()

0 commit comments

Comments
 (0)