Spaces:
Running on Zero
Running on Zero
Commit ·
e6f20b8
1
Parent(s): 54e04d2
app building
Browse files- app.py +13 -6
- asmk-src/LICENSE +21 -0
- asmk-src/README.md +65 -0
- asmk-src/asmk.egg-info/PKG-INFO +6 -0
- asmk-src/asmk.egg-info/SOURCES.txt +19 -0
- asmk-src/asmk.egg-info/dependency_links.txt +1 -0
- asmk-src/asmk.egg-info/requires.txt +2 -0
- asmk-src/asmk.egg-info/top_level.txt +1 -0
- asmk-src/asmk.egg-info/zip-safe +1 -0
- asmk-src/asmk/__init__.py +6 -0
- asmk-src/asmk/asmk_method.py +232 -0
- asmk-src/asmk/codebook.py +85 -0
- asmk-src/asmk/functional.py +15 -0
- asmk-src/asmk/index.py +51 -0
- asmk-src/asmk/inverted_file.py +154 -0
- asmk-src/asmk/io_helpers.py +128 -0
- asmk-src/asmk/kernel.py +85 -0
- asmk-src/cython/build.sh +1 -0
- asmk-src/cython/hamming.c +0 -0
- asmk-src/cython/hamming.pyx +152 -0
- asmk-src/examples/demo_how.py +137 -0
- asmk-src/examples/params/_eccv20_how.yml +40 -0
- asmk-src/examples/params/eccv20_how_r18_1000.yml +7 -0
- asmk-src/examples/params/eccv20_how_r50-_1000.yml +7 -0
- asmk-src/examples/params/eccv20_how_r50-_2000.yml +7 -0
- asmk-src/setup.py +35 -0
- asmk-src/test/test_hamming.py +54 -0
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import spaces
|
| 2 |
import subprocess
|
| 3 |
-
import sys
|
| 4 |
from pathlib import Path
|
| 5 |
|
| 6 |
''' loading modules '''
|
|
@@ -13,21 +13,28 @@ if not CKPT.exists():
|
|
| 13 |
subprocess.check_call([sys.executable, "-m", "pip", "install", "-e", "./sam2-src[notebooks]"], cwd=ROOT)
|
| 14 |
subprocess.check_call(["bash", "download_ckpts.sh"], cwd=SAM2 / "checkpoints")
|
| 15 |
try:
|
| 16 |
-
import asmk # noqa: F401
|
| 17 |
-
except
|
| 18 |
subprocess.check_call(
|
| 19 |
-
[
|
| 20 |
)
|
| 21 |
subprocess.check_call(
|
| 22 |
-
[sys.executable, "-m", "pip", "install", './asmk', "--no-build-isolation"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
)
|
|
|
|
| 24 |
import gradio as gr
|
| 25 |
import torch
|
| 26 |
torch.no_grad().__enter__()
|
| 27 |
import numpy as np
|
| 28 |
from PIL import Image, ImageDraw
|
| 29 |
import cv2
|
| 30 |
-
import os
|
| 31 |
import copy
|
| 32 |
import json
|
| 33 |
import logging
|
|
|
|
| 1 |
import spaces
|
| 2 |
import subprocess
|
| 3 |
+
import sys, os
|
| 4 |
from pathlib import Path
|
| 5 |
|
| 6 |
''' loading modules '''
|
|
|
|
| 13 |
subprocess.check_call([sys.executable, "-m", "pip", "install", "-e", "./sam2-src[notebooks]"], cwd=ROOT)
|
| 14 |
subprocess.check_call(["bash", "download_ckpts.sh"], cwd=SAM2 / "checkpoints")
|
| 15 |
try:
|
| 16 |
+
import asmk.index # noqa: F401
|
| 17 |
+
except Exception as e:
|
| 18 |
subprocess.check_call(
|
| 19 |
+
["cythonize", "*.pyx"], cwd='./asmk-src/cython'
|
| 20 |
)
|
| 21 |
subprocess.check_call(
|
| 22 |
+
[sys.executable, "-m", "pip", "install", './asmk-src', "--no-build-isolation"]
|
| 23 |
+
)
|
| 24 |
+
if not os.path.exists('./private'):
|
| 25 |
+
from huggingface_hub import snapshot_download
|
| 26 |
+
local_dir = snapshot_download(
|
| 27 |
+
repo_id="nycu-cplab/3AM",
|
| 28 |
+
local_dir="./private",
|
| 29 |
+
repo_type="model",
|
| 30 |
)
|
| 31 |
+
|
| 32 |
import gradio as gr
|
| 33 |
import torch
|
| 34 |
torch.no_grad().__enter__()
|
| 35 |
import numpy as np
|
| 36 |
from PIL import Image, ImageDraw
|
| 37 |
import cv2
|
|
|
|
| 38 |
import copy
|
| 39 |
import json
|
| 40 |
import logging
|
asmk-src/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2020 Tomas Jenicek, Giorgos Tolias
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
asmk-src/README.md
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python ASMK (Aggregated Selective Match Kernels)
|
| 2 |
+
|
| 3 |
+
This is a Python implementation of the ASMK approach published in [ICCV 2013](http://hal.inria.fr/docs/00/86/46/84/PDF/iccv13_tolias.pdf):
|
| 4 |
+
|
| 5 |
+
```
|
| 6 |
+
@InProceedings{TAJ13,
|
| 7 |
+
author = "Giorgos Tolias and Yannis Avrithis and Herv\'e J\'egou",
|
| 8 |
+
title = "To aggregate or not to aggregate: Selective match kernels for image search",
|
| 9 |
+
booktitle = "IEEE International Conference on Computer Vision",
|
| 10 |
+
year = "2013"
|
| 11 |
+
}
|
| 12 |
+
```
|
| 13 |
+
|
| 14 |
+
This package is provided to support image retrieval with local descriptors and to reproduce the results of our [ECCV 2020 paper](https://arxiv.org/abs/2007.13172) with HOW deep local descriptors:
|
| 15 |
+
|
| 16 |
+
```
|
| 17 |
+
@InProceedings{TJ20,
|
| 18 |
+
author = "Giorgos Tolias and Tomas Jenicek and Ond\v{r}ej Chum}",
|
| 19 |
+
title = "Learning and aggregating deep local descriptors for instance-level recognition",
|
| 20 |
+
booktitle = "European Conference on Computer Vision",
|
| 21 |
+
year = "2020"
|
| 22 |
+
}
|
| 23 |
+
```
|
| 24 |
+
|
| 25 |
+
There are minor differences compared to the original ASMK approach (ICCV'13) and [implementation](https://github.com/gtolias/asmk), which are described in our ECCV'20 paper. Using the provided package to run ASMK with other local descriptors is straightforward.
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
## Running the Code
|
| 30 |
+
|
| 31 |
+
1. Install the requirements (`faiss-cpu` for cpu-only setup)
|
| 32 |
+
|
| 33 |
+
```
|
| 34 |
+
pip3 install pyaml numpy faiss-gpu
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
2. Build C library for your Python version
|
| 39 |
+
|
| 40 |
+
```
|
| 41 |
+
python3 setup.py build_ext --inplace
|
| 42 |
+
rm -r build
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
3. Download `cirtorch` and add it to your `PYTHONPATH`
|
| 47 |
+
|
| 48 |
+
```
|
| 49 |
+
wget "https://github.com/filipradenovic/cnnimageretrieval-pytorch/archive/master.zip"
|
| 50 |
+
unzip master.zip
|
| 51 |
+
rm master.zip
|
| 52 |
+
export PYTHONPATH=${PYTHONPATH}:$(realpath cnnimageretrieval-pytorch-master)
|
| 53 |
+
```
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
4. Run `examples/demo_how.py` giving it any `.yaml` parameter file from `examples/params/*.yml`
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
### Reproducing ECCV 2020 results with HOW local descriptors
|
| 60 |
+
|
| 61 |
+
Reproducing results from **Table 2.**
|
| 62 |
+
|
| 63 |
+
- R18<sub>how</sub> (n = 1000): `examples/demo_how.py eccv20_how_r18_1000`   _ROxf (M): 75.1, RPar (M): 79.4_
|
| 64 |
+
- -R50<sub>how</sub> (n = 1000): `examples/demo_how.py eccv20_how_r50-_1000`   _ROxf (M): 78.3, RPar (M): 80.1_
|
| 65 |
+
- -R50<sub>how</sub> (n = 2000): `examples/demo_how.py eccv20_how_r50-_2000`   _ROxf (M): 79.4, RPar (M): 81.6_
|
asmk-src/asmk.egg-info/PKG-INFO
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.1
|
| 2 |
+
Name: asmk
|
| 3 |
+
Version: 0.1
|
| 4 |
+
Summary: ASMK Python implementation for ECCV'20 paper "Learning and aggregating deep local descriptors for instance-level recognition"
|
| 5 |
+
Author: Tomas Jenicek, Giorgos Tolias
|
| 6 |
+
License-File: LICENSE
|
asmk-src/asmk.egg-info/SOURCES.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
LICENSE
|
| 2 |
+
README.md
|
| 3 |
+
setup.py
|
| 4 |
+
asmk/__init__.py
|
| 5 |
+
asmk/asmk_method.py
|
| 6 |
+
asmk/codebook.py
|
| 7 |
+
asmk/functional.py
|
| 8 |
+
asmk/index.py
|
| 9 |
+
asmk/inverted_file.py
|
| 10 |
+
asmk/io_helpers.py
|
| 11 |
+
asmk/kernel.py
|
| 12 |
+
asmk.egg-info/PKG-INFO
|
| 13 |
+
asmk.egg-info/SOURCES.txt
|
| 14 |
+
asmk.egg-info/dependency_links.txt
|
| 15 |
+
asmk.egg-info/requires.txt
|
| 16 |
+
asmk.egg-info/top_level.txt
|
| 17 |
+
asmk.egg-info/zip-safe
|
| 18 |
+
cython/hamming.c
|
| 19 |
+
test/test_hamming.py
|
asmk-src/asmk.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
asmk-src/asmk.egg-info/requires.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
numpy
|
| 2 |
+
pyaml
|
asmk-src/asmk.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
asmk
|
asmk-src/asmk.egg-info/zip-safe
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
asmk-src/asmk/__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Python implementation of ASMK for ECCV 2020 paper "Learning and aggregating deep local descriptors
|
| 3 |
+
for instance-level recognition"
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from .asmk_method import ASMKMethod
|
asmk-src/asmk/asmk_method.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""A wrapper around all asmk-related packages for convenient use"""
|
| 2 |
+
|
| 3 |
+
import os.path
|
| 4 |
+
import time
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
from . import kernel as kern_pkg, codebook as cdb_pkg, index as idx_pkg, inverted_file as ivf_pkg
|
| 8 |
+
from . import io_helpers
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class ASMKMethod:
|
| 12 |
+
"""
|
| 13 |
+
Class to keep necessary objects and provide easy access to asmk method's steps. Each step
|
| 14 |
+
of asmk method corresponds to one method. Use initialize_untrained() class method instead
|
| 15 |
+
of directly calling the constructor.
|
| 16 |
+
|
| 17 |
+
:param dict params: contains keys index, train_codebook, build_ivf and query_ivf, each
|
| 18 |
+
containing the corresponding step parameters
|
| 19 |
+
:param dict metadata: only stored by this object, never changed
|
| 20 |
+
:param Codebook codebook: object from the codebook module
|
| 21 |
+
:param ASMKKernel kernel: object from the kernel module
|
| 22 |
+
:param IVF inverted_file: object from the inverted_file module
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
def __init__(self, params, metadata, *, codebook=None, kernel=None, inverted_file=None):
|
| 26 |
+
self.params = params
|
| 27 |
+
self.metadata = metadata
|
| 28 |
+
|
| 29 |
+
self.codebook = codebook
|
| 30 |
+
self.kernel = kernel
|
| 31 |
+
self.inverted_file = inverted_file
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
@classmethod
|
| 35 |
+
def initialize_untrained(cls, params):
|
| 36 |
+
"""Initialize asmk method before training, provided only params (see constructor docs)"""
|
| 37 |
+
return cls(params, {})
|
| 38 |
+
|
| 39 |
+
#
|
| 40 |
+
# Method steps
|
| 41 |
+
#
|
| 42 |
+
|
| 43 |
+
def train_codebook(self, *columns, cache_path=None, step_params=None):
|
| 44 |
+
"""The first step of the method - training the codebook (or loading from cache)
|
| 45 |
+
|
| 46 |
+
:param ndarray vecs: 2D numpy array, rows are vectors for codebook training
|
| 47 |
+
:param str cache_path: trained codebook will be stored under given file path and loaded
|
| 48 |
+
next time without training (None to turn off)
|
| 49 |
+
:param dict step_params: parameters that will override stored parameters for this step
|
| 50 |
+
(self.params['train_codebook'])
|
| 51 |
+
:return: new ASMKMethod object (containing metadata of this step), do not change self
|
| 52 |
+
"""
|
| 53 |
+
assert not self.codebook, "Codebook already trained"
|
| 54 |
+
index_factory = idx_pkg.initialize_index(**self.params['index'])
|
| 55 |
+
step_params = step_params or self.params.get("train_codebook")
|
| 56 |
+
|
| 57 |
+
if cache_path and os.path.exists(cache_path):
|
| 58 |
+
time0 = time.time()
|
| 59 |
+
cdb = cdb_pkg.Codebook.initialize_from_state(io_helpers.load_pickle(cache_path),
|
| 60 |
+
index_factory=index_factory)
|
| 61 |
+
cdb.index()
|
| 62 |
+
assert cdb.params == step_params['codebook']
|
| 63 |
+
metadata = {"load_time": time.time() - time0}
|
| 64 |
+
else:
|
| 65 |
+
cdb = cdb_pkg.Codebook(**step_params['codebook'], index_factory=index_factory)
|
| 66 |
+
metadata = cdb.train(*columns)
|
| 67 |
+
if cache_path:
|
| 68 |
+
io_helpers.save_pickle(cache_path, cdb.state_dict())
|
| 69 |
+
|
| 70 |
+
metadata["index_class"] = index_factory.__class__.__name__
|
| 71 |
+
return self.__class__({**self.params, "train_codebook": step_params},
|
| 72 |
+
{**self.metadata, "train_codebook": metadata},
|
| 73 |
+
codebook=cdb)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def build_ivf(self, *columns, distractors_path=None, cache_path=None, step_params=None):
|
| 77 |
+
"""The second step of the method - building the ivf (or loading from cache)
|
| 78 |
+
|
| 79 |
+
:param ndarray vecs: 2D numpy array, rows are vectors to be indexed by the ivf
|
| 80 |
+
:param ndarray imids: 1D numpy array of image ids corresponding to 'vecs'
|
| 81 |
+
:param str distractors_path: ivf will be initialized with given distractors ivf path
|
| 82 |
+
:param str cache_path: built ivf will be stored under given file path and loaded
|
| 83 |
+
next time without training (None to turn off)
|
| 84 |
+
:param dict step_params: parameters that will override stored parameters for this step
|
| 85 |
+
(self.params['build_ivf'])
|
| 86 |
+
:return: new ASMKMethod object (containing metadata of this step), do not change self
|
| 87 |
+
"""
|
| 88 |
+
|
| 89 |
+
builder = self.create_ivf_builder(cache_path=cache_path, step_params=step_params)
|
| 90 |
+
|
| 91 |
+
# Skip if loaded, otherwise add everything at once
|
| 92 |
+
if not builder.loaded_from_cache:
|
| 93 |
+
if distractors_path:
|
| 94 |
+
builder.initialize_with_distractors(distractors_path)
|
| 95 |
+
builder.add(*columns)
|
| 96 |
+
|
| 97 |
+
return self.add_ivf_builder(builder)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def create_ivf_builder(self, *, cache_path=None, step_params=None):
|
| 101 |
+
"""Part of the second step of the method, see build_ivf() method implementation for usage
|
| 102 |
+
|
| 103 |
+
:param str cache_path: built ivf will be stored under given file path and loaded
|
| 104 |
+
next time without training (None to turn off)
|
| 105 |
+
:param dict step_params: parameters that will override stored parameters for this step
|
| 106 |
+
(self.params['build_ivf'])
|
| 107 |
+
:return: IvfBuilder object
|
| 108 |
+
"""
|
| 109 |
+
assert not self.kernel and not self.inverted_file, "Inverted file already built"
|
| 110 |
+
step_params = step_params or self.params.get("build_ivf")
|
| 111 |
+
kern = kern_pkg.ASMKKernel(self.codebook, **step_params['kernel'])
|
| 112 |
+
|
| 113 |
+
return IvfBuilder(step_params, self.codebook, kern, cache_path=cache_path)
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def add_ivf_builder(self, ivf_builder):
|
| 117 |
+
"""Part of the second step of the method, see build_ivf() method implementation for usage
|
| 118 |
+
|
| 119 |
+
:param IvfBuilder ivf_builder: Builder with vectors added
|
| 120 |
+
:return: new ASMKMethod object (containing metadata from the builder), do not change self
|
| 121 |
+
"""
|
| 122 |
+
ivf_metadata = ivf_builder.save()
|
| 123 |
+
|
| 124 |
+
return self.__class__({**self.params, "build_ivf": ivf_builder.step_params},
|
| 125 |
+
{**self.metadata, "build_ivf": ivf_metadata},
|
| 126 |
+
codebook=self.codebook, kernel=ivf_builder.kernel,
|
| 127 |
+
inverted_file=ivf_builder.ivf)
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def query_ivf(self, *columns, step_params=None, progress=None):
|
| 131 |
+
"""The last step of the method - querying the ivf
|
| 132 |
+
|
| 133 |
+
:param ndarray qvecs: 2D numpy array, rows are vectors, each acting as a query for the ivf
|
| 134 |
+
:param ndarray qimids: 1D numpy array of image ids corresponding to 'qvecs'
|
| 135 |
+
:param dict step_params: parameters that will override stored parameters for this step
|
| 136 |
+
(self.params['query_ivf'])
|
| 137 |
+
:param bool progress: step at which update progress printing (None to disable)
|
| 138 |
+
:return: tuple (dict metadata, ndarray images, 2D ndarray ranks, 2D ndarray scores), do not
|
| 139 |
+
change self
|
| 140 |
+
"""
|
| 141 |
+
|
| 142 |
+
step_params = step_params or self.params.get("query_ivf")
|
| 143 |
+
|
| 144 |
+
time0 = time.time()
|
| 145 |
+
images, ranks, scores = self.accumulate_scores(self.codebook, self.kernel, \
|
| 146 |
+
self.inverted_file, *columns, params=step_params, progress=progress)
|
| 147 |
+
metadata = {"query_avg_time": (time.time()-time0)/len(ranks)}
|
| 148 |
+
return metadata, images, ranks, scores
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
#
|
| 152 |
+
# Helper functions
|
| 153 |
+
#
|
| 154 |
+
|
| 155 |
+
@staticmethod
|
| 156 |
+
def accumulate_scores(cdb, kern, ivf, qvecs, qimids, *cols, params, progress=None):
|
| 157 |
+
"""Accumulate scores for every query image (qvecs, qimids) given codebook, kernel,
|
| 158 |
+
inverted_file and parameters."""
|
| 159 |
+
similarity_func = lambda *x: kern.similarity(*x, **params["similarity"])
|
| 160 |
+
|
| 161 |
+
acc = []
|
| 162 |
+
slices = list(io_helpers.slice_unique(qimids))
|
| 163 |
+
for imid, seq in io_helpers.progress(slices, frequency=progress, header="Query"):
|
| 164 |
+
quantized = cdb.quantize(qvecs[seq], *(x[seq] for x in cols), **params["quantize"])
|
| 165 |
+
aggregated = kern.aggregate_image(*quantized, **params["aggregate"])
|
| 166 |
+
ranks, scores = ivf.search(*aggregated, **params["search"], similarity_func=similarity_func)
|
| 167 |
+
acc.append((imid, ranks, scores))
|
| 168 |
+
|
| 169 |
+
imids_all, ranks_all, scores_all = zip(*acc)
|
| 170 |
+
return np.array(imids_all), np.vstack(ranks_all), np.vstack(scores_all)
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
class IvfBuilder:
|
| 174 |
+
"""Inverted file (IVF) wrapper simplifying vector addition
|
| 175 |
+
|
| 176 |
+
:param dict step_params: contains parameters for build_ivf step
|
| 177 |
+
:param Codebook codebook: object from the codebook module
|
| 178 |
+
:param ASMKKernel kernel: object from the kernel module
|
| 179 |
+
:param str cache_path: built ivf will be stored under given file path and loaded
|
| 180 |
+
next time without training (None to turn off)
|
| 181 |
+
"""
|
| 182 |
+
|
| 183 |
+
def __init__(self, step_params, codebook, kernel, *, cache_path):
|
| 184 |
+
self.step_params = step_params
|
| 185 |
+
self.codebook = codebook
|
| 186 |
+
self.kernel = kernel
|
| 187 |
+
|
| 188 |
+
if cache_path and os.path.exists(cache_path):
|
| 189 |
+
time0 = time.time()
|
| 190 |
+
self.ivf = ivf_pkg.IVF.initialize_from_state(io_helpers.load_pickle(cache_path))
|
| 191 |
+
self.metadata = {"load_time": time.time() - time0}
|
| 192 |
+
self.cache_path = None
|
| 193 |
+
else:
|
| 194 |
+
self.ivf = ivf_pkg.IVF.initialize_empty(**step_params['ivf'],
|
| 195 |
+
codebook_size=codebook.size)
|
| 196 |
+
self.metadata = {"index_time": 0}
|
| 197 |
+
self.cache_path = cache_path
|
| 198 |
+
|
| 199 |
+
@property
|
| 200 |
+
def loaded_from_cache(self):
|
| 201 |
+
"""If the contained IVF was loaded (otherwise, it is empty after initialization)"""
|
| 202 |
+
return "load_time" in self.metadata
|
| 203 |
+
|
| 204 |
+
def initialize_with_distractors(self, path):
|
| 205 |
+
"""Initialize with distractors ivf at given path"""
|
| 206 |
+
self.ivf = ivf_pkg.IVF.initialize_from_state(io_helpers.load_pickle(path))
|
| 207 |
+
self.ivf.imid_offset = self.ivf.n_images
|
| 208 |
+
|
| 209 |
+
def add(self, *columns, progress=None):
|
| 210 |
+
"""Add descriptors and cooresponding image ids to the IVF
|
| 211 |
+
|
| 212 |
+
:param np.ndarray vecs: 2D array of local descriptors
|
| 213 |
+
:param np.ndarray imids: 1D array of image ids
|
| 214 |
+
:param bool progress: step at which update progress printing (None to disable)
|
| 215 |
+
"""
|
| 216 |
+
time0 = time.time()
|
| 217 |
+
quantized = self.codebook.quantize(*columns, **self.step_params["quantize"])
|
| 218 |
+
if progress:
|
| 219 |
+
print(">> Descriptors quantized")
|
| 220 |
+
aggregated = self.kernel.aggregate(*quantized, **self.step_params["aggregate"], progress=progress)
|
| 221 |
+
self.ivf.add(*aggregated, progress=200*progress if progress else None)
|
| 222 |
+
self.metadata['index_time'] += time.time() - time0
|
| 223 |
+
|
| 224 |
+
def save(self):
|
| 225 |
+
"""Save to cache path if defined
|
| 226 |
+
|
| 227 |
+
:return: dict metadata with ivf stats
|
| 228 |
+
"""
|
| 229 |
+
if self.cache_path:
|
| 230 |
+
io_helpers.save_pickle(self.cache_path, self.ivf.state_dict())
|
| 231 |
+
|
| 232 |
+
return {**self.metadata, "ivf_stats": self.ivf.stats}
|
asmk-src/asmk/codebook.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Codebook implementations for quantization of features into visual words"""
|
| 2 |
+
|
| 3 |
+
import time
|
| 4 |
+
import warnings
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Codebook:
|
| 9 |
+
"""Codebook of a fixed size for feature quantization"""
|
| 10 |
+
|
| 11 |
+
def __init__(self, index_factory, *, size):
|
| 12 |
+
self.params = {
|
| 13 |
+
"size": size,
|
| 14 |
+
}
|
| 15 |
+
if isinstance(size, str) and size[-1] in "kM":
|
| 16 |
+
size = int(size[:-1]) * {"k": 1024, "M": 1024**2}[size[-1]]
|
| 17 |
+
self.size = size
|
| 18 |
+
assert isinstance(self.size, int), self.size
|
| 19 |
+
|
| 20 |
+
self.index_factory = index_factory
|
| 21 |
+
self.search_index = None
|
| 22 |
+
self.centroids = None
|
| 23 |
+
|
| 24 |
+
#
|
| 25 |
+
# Create index
|
| 26 |
+
#
|
| 27 |
+
|
| 28 |
+
def index(self, centroids=None):
|
| 29 |
+
"""Index either provided or stored centroids (when centroids=None). Return a dictionary
|
| 30 |
+
with 'index' key where value is how long it took to index the centroids."""
|
| 31 |
+
if centroids is not None:
|
| 32 |
+
assert self.size == centroids.shape[0], (self.size, centroids.shape[0])
|
| 33 |
+
self.centroids = centroids
|
| 34 |
+
time0 = time.time()
|
| 35 |
+
self.search_index = self.index_factory.create_index(self.centroids)
|
| 36 |
+
return {"index_time": time.time() - time0}
|
| 37 |
+
|
| 38 |
+
def train(self, des):
|
| 39 |
+
"""Cluster descriptors and index resulting cluster centers. Return a dictionary with
|
| 40 |
+
'cluster', 'index' and 'train' keys where the value is how long it took to cluster,
|
| 41 |
+
index or train (sum of all)."""
|
| 42 |
+
time0 = time.time()
|
| 43 |
+
centroids = self.index_factory.cluster(des, self.size)
|
| 44 |
+
time_taken = time.time() - time0
|
| 45 |
+
meta = self.index(centroids)
|
| 46 |
+
return {**meta, "cluster_time": time_taken, "train_time": sum(meta.values()) + time_taken}
|
| 47 |
+
|
| 48 |
+
#
|
| 49 |
+
# Search in index
|
| 50 |
+
#
|
| 51 |
+
|
| 52 |
+
def quantize(self, des, *cols, multiple_assignment):
|
| 53 |
+
"""Quantize given descriptors. Additional cols can be given, and this function will make
|
| 54 |
+
sure, that they still correspond to returned descriptors. Multiple assignment can be
|
| 55 |
+
applied when multiple_assignment > 1."""
|
| 56 |
+
_, centroid_ids = self.search_index.search(np.ascontiguousarray(des, dtype=np.float32),
|
| 57 |
+
multiple_assignment)
|
| 58 |
+
|
| 59 |
+
return (des, centroid_ids) + cols
|
| 60 |
+
|
| 61 |
+
#
|
| 62 |
+
# Load and save
|
| 63 |
+
#
|
| 64 |
+
|
| 65 |
+
def state_dict(self):
|
| 66 |
+
"""Return state dict which is a checkpoint of current state for future recovery"""
|
| 67 |
+
if self.centroids is None:
|
| 68 |
+
warnings.warn("Returning state_dict when codebook is not indexed (meaningless)")
|
| 69 |
+
|
| 70 |
+
return {
|
| 71 |
+
"type": self.__class__.__name__,
|
| 72 |
+
"params": self.params,
|
| 73 |
+
"state": {
|
| 74 |
+
"centroids": self.centroids,
|
| 75 |
+
}
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
@classmethod
|
| 79 |
+
def initialize_from_state(cls, state, index_factory):
|
| 80 |
+
"""Initialize from a previously stored state_dict given an index factory"""
|
| 81 |
+
assert state["type"] == cls.__name__
|
| 82 |
+
codebook = cls(**state["params"], index_factory=index_factory)
|
| 83 |
+
if state["state"]["centroids"] is not None:
|
| 84 |
+
codebook.index(state["state"]["centroids"])
|
| 85 |
+
return codebook
|
asmk-src/asmk/functional.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Mathematical functions operating on arrays"""
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def normalize_vec_l2(vecs):
|
| 7 |
+
"""Perform l2 normalization on each vector in a given matrix (axis 1)"""
|
| 8 |
+
norm = np.linalg.norm(vecs, ord=2, axis=1, keepdims=True) + 1e-6
|
| 9 |
+
return vecs / norm
|
| 10 |
+
|
| 11 |
+
def asmk_kernel(sim, image_ids, *, alpha, similarity_threshold):
|
| 12 |
+
"""Compute scores for visual words"""
|
| 13 |
+
mask = (sim>=similarity_threshold)
|
| 14 |
+
sim = np.power(sim[mask], alpha) # monomial kernel
|
| 15 |
+
return image_ids[mask], sim
|
asmk-src/asmk/index.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Index factories for efficient search (clustering)"""
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import faiss
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class FaissL2Index:
|
| 8 |
+
"""Faiss factory for indexes on cpu"""
|
| 9 |
+
|
| 10 |
+
@staticmethod
|
| 11 |
+
def _faiss_index_flat(dim):
|
| 12 |
+
"""Return initialized faiss.IndexFlatL2"""
|
| 13 |
+
return faiss.IndexFlatL2(dim)
|
| 14 |
+
|
| 15 |
+
def cluster(self, points, k, **index_kwargs):
|
| 16 |
+
"""Clustering given points into k clusters"""
|
| 17 |
+
index = self._faiss_index_flat(points.shape[1], **index_kwargs)
|
| 18 |
+
clus = faiss.Clustering(points.shape[1], k)
|
| 19 |
+
clus.verbose = False
|
| 20 |
+
clus.niter = 10
|
| 21 |
+
clus.train(np.ascontiguousarray(points, dtype=np.float32), index)
|
| 22 |
+
return faiss.vector_float_to_array(clus.centroids).reshape(clus.k, clus.d)
|
| 23 |
+
|
| 24 |
+
def create_index(self, points, **index_kwargs):
|
| 25 |
+
"""Return faiss index with given points"""
|
| 26 |
+
index = self._faiss_index_flat(points.shape[1], **index_kwargs)
|
| 27 |
+
index.add(points)
|
| 28 |
+
return index
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class FaissGpuL2Index(FaissL2Index):
|
| 32 |
+
"""Faiss factory for indexes on gpu"""
|
| 33 |
+
|
| 34 |
+
def __init__(self, gpu_id):
|
| 35 |
+
super().__init__()
|
| 36 |
+
self.gpu_id = gpu_id
|
| 37 |
+
|
| 38 |
+
def _faiss_index_flat(self, dim):
|
| 39 |
+
"""Return initialized faiss.GpuIndexFlatL2"""
|
| 40 |
+
res = faiss.StandardGpuResources()
|
| 41 |
+
flat_config = faiss.GpuIndexFlatConfig()
|
| 42 |
+
flat_config.device = self.gpu_id
|
| 43 |
+
return faiss.GpuIndexFlatL2(res, dim, flat_config)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def initialize_index(gpu_id):
|
| 47 |
+
"""Return either gpu faiss factory or cpu faiss factory (gpu_id is None)"""
|
| 48 |
+
if gpu_id is not None:
|
| 49 |
+
return FaissGpuL2Index(gpu_id)
|
| 50 |
+
|
| 51 |
+
return FaissL2Index()
|
asmk-src/asmk/inverted_file.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Inverted file implementations for efficient search in a set of visual words"""
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
from . import io_helpers
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class IVF:
|
| 9 |
+
"""Inverted File for efficient feature indexation with idf support. Can be updated."""
|
| 10 |
+
|
| 11 |
+
def __init__(self, norm_factor, n_images, ivf_vecs, ivf_image_ids, counts, idf, imid_offset, *, use_idf):
|
| 12 |
+
self.params = {
|
| 13 |
+
"use_idf": use_idf,
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
self.norm_factor = norm_factor
|
| 17 |
+
self.n_images = n_images
|
| 18 |
+
self.ivf_vecs = ivf_vecs
|
| 19 |
+
self.ivf_image_ids = ivf_image_ids
|
| 20 |
+
self.counts = counts
|
| 21 |
+
self.idf = idf
|
| 22 |
+
self.imid_offset = imid_offset
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@classmethod
|
| 26 |
+
def initialize_empty(cls, *, codebook_size, **params):
|
| 27 |
+
"""Return an empty IVF object given codebook params (size) and IVF params."""
|
| 28 |
+
ivf_vecs = [None for _ in range(codebook_size)]
|
| 29 |
+
ivf_image_ids = [None for _ in range(codebook_size)]
|
| 30 |
+
counts = np.zeros(codebook_size, dtype=np.int32)
|
| 31 |
+
idf = np.ones(counts.shape, dtype=np.float32)
|
| 32 |
+
|
| 33 |
+
return cls(**params, norm_factor=[], n_images=0, ivf_vecs=ivf_vecs,
|
| 34 |
+
ivf_image_ids=ivf_image_ids, counts=counts, idf=idf, imid_offset=0)
|
| 35 |
+
|
| 36 |
+
#
|
| 37 |
+
# Index and search
|
| 38 |
+
#
|
| 39 |
+
|
| 40 |
+
@staticmethod
|
| 41 |
+
def _append_to_np_array(arr, size, item):
|
| 42 |
+
initial_size, increase_ratio = 10, 1.5
|
| 43 |
+
|
| 44 |
+
if arr is None:
|
| 45 |
+
# Initialization
|
| 46 |
+
arr = np.zeros((initial_size,) + item.shape, dtype=item.dtype)
|
| 47 |
+
elif size >= arr.shape[0]:
|
| 48 |
+
# Extension
|
| 49 |
+
new_size = int(np.ceil(arr.shape[0] * increase_ratio))
|
| 50 |
+
arr = np.resize(arr, (new_size,) + arr.shape[1:])
|
| 51 |
+
|
| 52 |
+
arr[size] = item
|
| 53 |
+
return arr
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def add(self, des, word_ids, image_ids, *, progress=None):
|
| 57 |
+
"""Add descriptors with corresponding visual word ids and image ids to this ivf"""
|
| 58 |
+
image_ids += self.imid_offset
|
| 59 |
+
min_imid, max_imid = image_ids.min(), image_ids.max()
|
| 60 |
+
assert min_imid >= self.n_images # The next chunk must be consequtive
|
| 61 |
+
|
| 62 |
+
norm_append = np.zeros(max_imid + 1 - len(self.norm_factor))
|
| 63 |
+
self.norm_factor = np.concatenate((self.norm_factor, norm_append))
|
| 64 |
+
self.n_images = np.max((self.n_images, max_imid + 1))
|
| 65 |
+
|
| 66 |
+
for i, word in io_helpers.progress(enumerate(word_ids), size=len(word_ids),
|
| 67 |
+
frequency=progress, header="Index"):
|
| 68 |
+
self.ivf_vecs[word] = self._append_to_np_array(self.ivf_vecs[word], self.counts[word],
|
| 69 |
+
des[i])
|
| 70 |
+
self.ivf_image_ids[word] = self._append_to_np_array(self.ivf_image_ids[word],
|
| 71 |
+
self.counts[word], image_ids[i])
|
| 72 |
+
self.counts[word] += 1
|
| 73 |
+
self.norm_factor[image_ids[i]] += 1
|
| 74 |
+
|
| 75 |
+
if self.params["use_idf"]:
|
| 76 |
+
self.idf[word] = np.log(self.n_images / self.counts[word])**2
|
| 77 |
+
|
| 78 |
+
if self.params["use_idf"]:
|
| 79 |
+
# Re-compute norm_factor to use idf
|
| 80 |
+
self.norm_factor[:] = 0
|
| 81 |
+
for word, imids in enumerate(self.ivf_image_ids):
|
| 82 |
+
if imids is not None:
|
| 83 |
+
for imid in imids[:self.counts[word]]:
|
| 84 |
+
self.norm_factor[imid] += self.idf[word]
|
| 85 |
+
|
| 86 |
+
def search(self, des, word_ids, *, similarity_func, topk):
|
| 87 |
+
"""Search in this ivf with given descriptors and corresponding visual word ids. Return
|
| 88 |
+
similarity computed by provided function downweighted by idf and accumulated for all
|
| 89 |
+
visual words. Return topk results per query."""
|
| 90 |
+
scores = np.zeros(self.n_images)
|
| 91 |
+
q_norm_factor = 0
|
| 92 |
+
|
| 93 |
+
for qvec, word in zip(des, word_ids):
|
| 94 |
+
q_norm_factor += self.idf[word]
|
| 95 |
+
if self.ivf_image_ids[word] is None:
|
| 96 |
+
# Empty visual word
|
| 97 |
+
continue
|
| 98 |
+
|
| 99 |
+
image_ids, sim = similarity_func(qvec, self.ivf_vecs[word][:self.counts[word]],
|
| 100 |
+
self.ivf_image_ids[word][:self.counts[word]])
|
| 101 |
+
|
| 102 |
+
sim *= self.idf[word] # apply idf
|
| 103 |
+
sim /= np.sqrt(self.norm_factor[image_ids]) # normalize
|
| 104 |
+
scores[image_ids] += sim
|
| 105 |
+
|
| 106 |
+
scores = scores / np.sqrt(q_norm_factor)
|
| 107 |
+
ranks = np.argsort(-scores)[:topk]
|
| 108 |
+
return ranks - self.imid_offset, scores[ranks]
|
| 109 |
+
|
| 110 |
+
#
|
| 111 |
+
# Load, save and stats
|
| 112 |
+
#
|
| 113 |
+
|
| 114 |
+
@property
|
| 115 |
+
def stats(self):
|
| 116 |
+
"""Return a shallow dictionary with stats of the ivf"""
|
| 117 |
+
sum_counts = self.counts.sum()
|
| 118 |
+
imbalance_factor = self.counts.shape[0] * np.power(self.counts, 2).sum() / (sum_counts**2 or 1)
|
| 119 |
+
return {
|
| 120 |
+
"images": self.n_images,
|
| 121 |
+
"vectors_per_image": sum_counts / (self.n_images or 1),
|
| 122 |
+
"mean_entries_per_vw": self.counts.mean(),
|
| 123 |
+
"empty_vw": sum(1 for x in self.counts if x == 0),
|
| 124 |
+
"min_entries_per_vw": self.counts.min(),
|
| 125 |
+
"max_entries_per_vw": self.counts.max(),
|
| 126 |
+
"std_of_entries_per_vw": self.counts.std(),
|
| 127 |
+
"imbalance_factor_of_vw": imbalance_factor,
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def state_dict(self):
|
| 132 |
+
"""Return state dict which is a checkpoint of current state for future recovery"""
|
| 133 |
+
return {
|
| 134 |
+
"type": self.__class__.__name__,
|
| 135 |
+
"params": self.params,
|
| 136 |
+
"state": {
|
| 137 |
+
"norm_factor": self.norm_factor,
|
| 138 |
+
"n_images": self.n_images,
|
| 139 |
+
"ivf_vecs": self.ivf_vecs,
|
| 140 |
+
"ivf_image_ids": self.ivf_image_ids,
|
| 141 |
+
"counts": self.counts,
|
| 142 |
+
"idf": self.idf,
|
| 143 |
+
"imid_offset": self.imid_offset,
|
| 144 |
+
}
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
@classmethod
|
| 148 |
+
def initialize_from_state(cls, state):
|
| 149 |
+
"""Initialize from a previously stored state_dict given an index factory"""
|
| 150 |
+
assert state["type"] == cls.__name__
|
| 151 |
+
if "imid_offset" not in state['state']:
|
| 152 |
+
# For backwards compatibility
|
| 153 |
+
state['state']['imid_offset'] = 0
|
| 154 |
+
return cls(**state["params"], **state["state"])
|
asmk-src/asmk/io_helpers.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Helper functions related to io"""
|
| 2 |
+
|
| 3 |
+
import os.path
|
| 4 |
+
import time
|
| 5 |
+
import sys
|
| 6 |
+
import logging
|
| 7 |
+
import pickle
|
| 8 |
+
import urllib.request
|
| 9 |
+
from io import StringIO
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
import yaml
|
| 12 |
+
import numpy as np
|
| 13 |
+
|
| 14 |
+
# Params
|
| 15 |
+
|
| 16 |
+
def load_params(path):
|
| 17 |
+
"""Return loaded parameters from a yaml file"""
|
| 18 |
+
with open(path, "r") as handle:
|
| 19 |
+
content = yaml.safe_load(handle)
|
| 20 |
+
if "__template__" in content:
|
| 21 |
+
# Treat template as defaults
|
| 22 |
+
template_path = os.path.expanduser(content.pop("__template__"))
|
| 23 |
+
template = load_params(os.path.join(os.path.dirname(path), template_path))
|
| 24 |
+
content = dict_deep_overlay(template, content)
|
| 25 |
+
return content
|
| 26 |
+
|
| 27 |
+
def dict_deep_overlay(defaults, params):
|
| 28 |
+
"""If defaults and params are both dictionaries, perform deep overlay (use params value for
|
| 29 |
+
keys defined in params, otherwise use defaults value)"""
|
| 30 |
+
if isinstance(defaults, dict) and isinstance(params, dict):
|
| 31 |
+
for key in params:
|
| 32 |
+
defaults[key] = dict_deep_overlay(defaults.get(key, None), params[key])
|
| 33 |
+
return defaults
|
| 34 |
+
|
| 35 |
+
return params
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# Logging
|
| 39 |
+
|
| 40 |
+
def init_logger(log_path):
|
| 41 |
+
"""Return a logger instance which logs to stdout and, if log_path is not None, also to a file"""
|
| 42 |
+
logger = logging.getLogger("ASMK")
|
| 43 |
+
logger.setLevel(logging.DEBUG)
|
| 44 |
+
|
| 45 |
+
stdout_handler = logging.StreamHandler()
|
| 46 |
+
stdout_handler.setLevel(logging.INFO)
|
| 47 |
+
stdout_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))
|
| 48 |
+
logger.addHandler(stdout_handler)
|
| 49 |
+
|
| 50 |
+
if log_path:
|
| 51 |
+
file_handler = logging.FileHandler(log_path)
|
| 52 |
+
file_handler.setLevel(logging.DEBUG)
|
| 53 |
+
formatter = logging.Formatter('%(asctime)s %(name)s %(levelname)s: %(message)s')
|
| 54 |
+
file_handler.setFormatter(formatter)
|
| 55 |
+
logger.addHandler(file_handler)
|
| 56 |
+
|
| 57 |
+
return logger
|
| 58 |
+
|
| 59 |
+
def progress(iterable, *, size=None, frequency=1, header=""):
|
| 60 |
+
"""Generator that wraps an iterable and prints progress"""
|
| 61 |
+
if size is None:
|
| 62 |
+
size = len(iterable)
|
| 63 |
+
header = f"{header.capitalize()}: " if header else ""
|
| 64 |
+
charsize = len(str(size))
|
| 65 |
+
if frequency:
|
| 66 |
+
print(f"{header}[{'0'.rjust(charsize)}/{size}]", end=" ")
|
| 67 |
+
sys.stdout.flush()
|
| 68 |
+
time0 = time.time()
|
| 69 |
+
for i, element in enumerate(iterable):
|
| 70 |
+
yield element
|
| 71 |
+
i1 = i+1
|
| 72 |
+
if frequency and (i1 % frequency == 0 or i1 == size):
|
| 73 |
+
avg_time = (time.time() - time0) / i1
|
| 74 |
+
print(f"\r{header}[{str(i1).rjust(charsize)}/{size}] " \
|
| 75 |
+
f"elapsed {int(avg_time*i1/60):02d}m/{int(avg_time*size/60):02d}m", end=" ")
|
| 76 |
+
sys.stdout.flush()
|
| 77 |
+
if frequency:
|
| 78 |
+
print()
|
| 79 |
+
|
| 80 |
+
def capture_stdout(func, logger):
|
| 81 |
+
"""Redirect stdout to logger"""
|
| 82 |
+
sys.stdout, stdout = StringIO(), sys.stdout
|
| 83 |
+
func()
|
| 84 |
+
sys.stdout, out_text = stdout, sys.stdout.getvalue()
|
| 85 |
+
for line in out_text.strip().split("\n"):
|
| 86 |
+
logger.info(line)
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
# Load and save state dicts
|
| 90 |
+
|
| 91 |
+
def load_pickle(path):
|
| 92 |
+
"""Load pickled data from path"""
|
| 93 |
+
with open(path, 'rb') as handle:
|
| 94 |
+
return pickle.load(handle)
|
| 95 |
+
|
| 96 |
+
def save_pickle(path, data):
|
| 97 |
+
"""Save data to path using pickle"""
|
| 98 |
+
with open(path, 'wb') as handle:
|
| 99 |
+
pickle.dump(data, handle)
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
# Download
|
| 103 |
+
|
| 104 |
+
def download_files(names, root_path, base_url, logfunc=None):
|
| 105 |
+
"""Download file names from given url to given directory path. If logfunc given, use it to log
|
| 106 |
+
status."""
|
| 107 |
+
root_path = Path(root_path)
|
| 108 |
+
for name in names:
|
| 109 |
+
path = root_path / name
|
| 110 |
+
if path.exists():
|
| 111 |
+
continue
|
| 112 |
+
if logfunc:
|
| 113 |
+
logfunc(f"Downloading file '{name}'")
|
| 114 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 115 |
+
urllib.request.urlretrieve(base_url + name, path)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
# Iteration
|
| 119 |
+
|
| 120 |
+
def slice_unique(ids):
|
| 121 |
+
"""Generate slices that mark a sequence of identical values in a given array of ids. The
|
| 122 |
+
sequence must be uninterrupted (compact)."""
|
| 123 |
+
pointer = 0
|
| 124 |
+
for i, counts in zip(*np.unique(ids, return_counts=True)):
|
| 125 |
+
seq = slice(pointer, pointer+counts)
|
| 126 |
+
assert (ids[seq] == i).all()
|
| 127 |
+
yield i, seq
|
| 128 |
+
pointer += counts
|
asmk-src/asmk/kernel.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Kernel functionality implementation - aggregation and similarity computation"""
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
from . import functional, hamming, io_helpers
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class ASMKKernel:
|
| 9 |
+
"""Kernel for ASMK with the option of binarization."""
|
| 10 |
+
|
| 11 |
+
binary_shortcuts = {"bin": True, "nobin": False}
|
| 12 |
+
|
| 13 |
+
def __init__(self, codebook, *, binary):
|
| 14 |
+
self.params = {
|
| 15 |
+
"binary": binary,
|
| 16 |
+
}
|
| 17 |
+
self.binary = self.binary_shortcuts.get(binary, binary)
|
| 18 |
+
assert self.binary in self.binary_shortcuts.values()
|
| 19 |
+
|
| 20 |
+
self.codebook = codebook
|
| 21 |
+
|
| 22 |
+
#
|
| 23 |
+
# Aggregation
|
| 24 |
+
#
|
| 25 |
+
|
| 26 |
+
def aggregate_image(self, des, word_ids):
|
| 27 |
+
"""Aggregate descriptors (with corresponding visual word ids) for a single image"""
|
| 28 |
+
unique_ids = np.unique(word_ids)
|
| 29 |
+
ades = np.empty((unique_ids.shape[0], des.shape[1]), dtype=np.float32)
|
| 30 |
+
|
| 31 |
+
for i, word in enumerate(unique_ids):
|
| 32 |
+
ades[i] = (des[(word_ids==word).any(axis=1)] - self.codebook.centroids[word]).sum(0)
|
| 33 |
+
|
| 34 |
+
if self.binary:
|
| 35 |
+
ades = hamming.binarize_and_pack_2D(ades)
|
| 36 |
+
else:
|
| 37 |
+
ades = functional.normalize_vec_l2(ades)
|
| 38 |
+
|
| 39 |
+
return ades, unique_ids
|
| 40 |
+
|
| 41 |
+
def aggregate(self, des, word_ids, image_ids, *, progress=None, **kwargs):
|
| 42 |
+
"""Aggregate descriptors with corresponding visual word ids for corresponding image ids"""
|
| 43 |
+
acc = []
|
| 44 |
+
slices = list(io_helpers.slice_unique(image_ids))
|
| 45 |
+
for imid, seq in io_helpers.progress(slices, frequency=progress, header="Aggregate"):
|
| 46 |
+
ades, ids = self.aggregate_image(des[seq], word_ids[seq], **kwargs)
|
| 47 |
+
acc.append((ades, ids, np.full(ids.shape[0], imid)))
|
| 48 |
+
|
| 49 |
+
agg_des, agg_words, agg_imids = zip(*acc)
|
| 50 |
+
return np.vstack(agg_des), np.hstack(agg_words), np.hstack(agg_imids)
|
| 51 |
+
|
| 52 |
+
#
|
| 53 |
+
# Similarity
|
| 54 |
+
#
|
| 55 |
+
|
| 56 |
+
def similarity(self, qvec, vecs, image_ids, *, alpha, similarity_threshold):
|
| 57 |
+
"""Compute similarity between given query vector and database feature vectors with their
|
| 58 |
+
corresponding image ids. Alpha is the similarity exponent after the similarity
|
| 59 |
+
threshold is applied."""
|
| 60 |
+
# Compute similarity with vw residuals for all other images
|
| 61 |
+
if self.binary:
|
| 62 |
+
norm_hdist = hamming.hamming_cdist_packed(qvec.reshape(1, -1), vecs)
|
| 63 |
+
sim = -2*norm_hdist.squeeze(0) + 1 # normalized hamming dist -> similarity in [-1, 1]
|
| 64 |
+
else:
|
| 65 |
+
sim = np.matmul(vecs, qvec)
|
| 66 |
+
|
| 67 |
+
return functional.asmk_kernel(sim, image_ids, alpha=alpha,
|
| 68 |
+
similarity_threshold=similarity_threshold)
|
| 69 |
+
|
| 70 |
+
#
|
| 71 |
+
# Load and save
|
| 72 |
+
#
|
| 73 |
+
|
| 74 |
+
def state_dict(self):
|
| 75 |
+
"""Return state dict which is a checkpoint of current state for future recovery"""
|
| 76 |
+
return {
|
| 77 |
+
"type": self.__class__.__name__,
|
| 78 |
+
"params": self.params,
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
@classmethod
|
| 82 |
+
def initialize_from_state(cls, state, codebook):
|
| 83 |
+
"""Initialize from a previously stored state_dict given a codebook"""
|
| 84 |
+
assert state["type"] == cls.__name__
|
| 85 |
+
return cls(**state["params"], codebook=codebook)
|
asmk-src/cython/build.sh
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
cythonize *.pyx
|
asmk-src/cython/hamming.c
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
asmk-src/cython/hamming.pyx
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# cython: language_level=3, boundscheck=False, wraparound=False
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
cimport cython
|
| 5 |
+
from libc.math cimport ceil
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
cdef unsigned int BIT_MASK_1 = 0x55555555
|
| 9 |
+
cdef unsigned int BIT_MASK_2 = 0x33333333
|
| 10 |
+
cdef unsigned int BIT_MASK_4 = 0x0f0f0f0f
|
| 11 |
+
cdef unsigned int BIT_MASK_8 = 0x00ff00ff
|
| 12 |
+
cdef unsigned int BIT_MASK_16 = 0x0000ffff
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
cdef int c_count_bits(unsigned int n) nogil:
|
| 16 |
+
n = (n & BIT_MASK_1) + ((n >> 1) & BIT_MASK_1)
|
| 17 |
+
n = (n & BIT_MASK_2) + ((n >> 2) & BIT_MASK_2)
|
| 18 |
+
n = (n & BIT_MASK_4) + ((n >> 4) & BIT_MASK_4)
|
| 19 |
+
n = (n & BIT_MASK_8) + ((n >> 8) & BIT_MASK_8)
|
| 20 |
+
n = (n & BIT_MASK_16) + ((n >> 16) & BIT_MASK_16)
|
| 21 |
+
return n
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
cdef unsigned int c_binarize_and_pack_uint32(float[::1] arr, Py_ssize_t length, int threshold) nogil:
|
| 25 |
+
cdef unsigned int tmp = 0
|
| 26 |
+
cdef Py_ssize_t i
|
| 27 |
+
|
| 28 |
+
for i in range(length):
|
| 29 |
+
tmp = (tmp << 1) + (arr[i] > threshold)
|
| 30 |
+
return tmp
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@cython.cdivision(True)
|
| 34 |
+
cdef float c_hamming_dist_uint32_arr(unsigned int[::1] n1, unsigned int[::1] n2, float normalization) nogil:
|
| 35 |
+
cdef Py_ssize_t length = n1.shape[0]
|
| 36 |
+
if normalization == 0:
|
| 37 |
+
normalization = length * 32
|
| 38 |
+
|
| 39 |
+
cdef int sum = 0
|
| 40 |
+
for i in range(length):
|
| 41 |
+
sum += c_count_bits(n1[i] ^ n2[i])
|
| 42 |
+
return sum / normalization
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
#
|
| 46 |
+
# Python API
|
| 47 |
+
#
|
| 48 |
+
|
| 49 |
+
def binarize_and_pack(float[::1] arr, int threshold = 0):
|
| 50 |
+
"""
|
| 51 |
+
binarize_and_pack(float[::1] arr, int threshold = 0)
|
| 52 |
+
|
| 53 |
+
Binarizes given 1D numpy array by 'arr = arr > threshold' and packs its elements into bits
|
| 54 |
+
in uint32 array. Returns a 1D uint32 array where each element corresponds to a started set
|
| 55 |
+
of 32 bits.
|
| 56 |
+
|
| 57 |
+
>> binarize_and_pack((np.random.rand(10) - 0.5).astype(np.float32))
|
| 58 |
+
array([2206203904], dtype=uint32)
|
| 59 |
+
"""
|
| 60 |
+
cdef Py_ssize_t dim_orig = arr.shape[0]
|
| 61 |
+
cdef Py_ssize_t dim = int(ceil(dim_orig / 32.0))
|
| 62 |
+
result = np.zeros(dim, dtype=np.uint32)
|
| 63 |
+
cdef unsigned int[::1] result_view = result
|
| 64 |
+
|
| 65 |
+
cdef Py_ssize_t i, offset
|
| 66 |
+
cdef unsigned int tmp
|
| 67 |
+
offset = 0
|
| 68 |
+
for i in range(dim-1):
|
| 69 |
+
result_view[i] = c_binarize_and_pack_uint32(arr[offset:], 32, threshold)
|
| 70 |
+
offset += 32
|
| 71 |
+
|
| 72 |
+
# Last iteration
|
| 73 |
+
tmp = c_binarize_and_pack_uint32(arr[offset:], dim_orig - offset, threshold)
|
| 74 |
+
result_view[dim-1] = tmp << (offset + 32 - dim_orig)
|
| 75 |
+
|
| 76 |
+
return result
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def binarize_and_pack_2D(float[:,::1] arr, int threshold = 0):
|
| 80 |
+
"""
|
| 81 |
+
binarize_and_pack_2D(float[:,::1] arr, int threshold = 0)
|
| 82 |
+
|
| 83 |
+
Binarizes given 2D numpy array by 'arr = arr > threshold' and packs its elements into bits
|
| 84 |
+
in uint32 array. Returns a 2D uint32 array where each row corresponds to row in the original
|
| 85 |
+
array and each element to a started set of 32 bits.
|
| 86 |
+
|
| 87 |
+
>> binarize_and_pack_2D((np.random.rand(2, 10) - 0.5).astype(np.float32))
|
| 88 |
+
array([[1786773504]
|
| 89 |
+
[1509949440]], dtype=uint32)
|
| 90 |
+
"""
|
| 91 |
+
cdef Py_ssize_t dim0 = arr.shape[0]
|
| 92 |
+
cdef Py_ssize_t dim1_orig = arr.shape[1]
|
| 93 |
+
cdef Py_ssize_t dim1 = int(ceil(dim1_orig / 32.0))
|
| 94 |
+
result = np.zeros((dim0, dim1), dtype=np.uint32)
|
| 95 |
+
cdef unsigned int[:, ::1] result_view = result
|
| 96 |
+
|
| 97 |
+
cdef Py_ssize_t i, j, offset
|
| 98 |
+
cdef unsigned int tmp
|
| 99 |
+
for i in range(dim0):
|
| 100 |
+
offset = 0
|
| 101 |
+
for j in range(dim1-1):
|
| 102 |
+
result_view[i,j] = c_binarize_and_pack_uint32(arr[i][offset:], 32, threshold)
|
| 103 |
+
offset += 32
|
| 104 |
+
|
| 105 |
+
# Last iteration
|
| 106 |
+
tmp = c_binarize_and_pack_uint32(arr[i][offset:], dim1_orig - offset, threshold)
|
| 107 |
+
result_view[i,dim1-1] = tmp << (offset + 32 - dim1_orig)
|
| 108 |
+
|
| 109 |
+
return result
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def hamming_dist_packed(unsigned int[::1] n1, unsigned int[::1] n2, float normalization = 0):
|
| 113 |
+
"""
|
| 114 |
+
hamming_dist_packed(unsigned int[::1] n1, unsigned int[::1] n2, float normalization = 0)
|
| 115 |
+
|
| 116 |
+
Computes a hamming distance between two bit arrays packed into uint32 arrays and divides
|
| 117 |
+
it by normalization, if provided, otherwise by the number of bits in an array (always
|
| 118 |
+
a multiplication of 32).
|
| 119 |
+
|
| 120 |
+
>> hamming_dist_packed(np.array([3], dtype=np.uint32), np.array([1], dtype=np.uint32), 2)
|
| 121 |
+
0.5
|
| 122 |
+
"""
|
| 123 |
+
assert n1 is not None and n2 is not None
|
| 124 |
+
assert n1.shape[0] == n2.shape[0]
|
| 125 |
+
return c_hamming_dist_uint32_arr(n1, n2, normalization)
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def hamming_cdist_packed(unsigned int[:,::1] arr1, unsigned int[:,::1] arr2, float normalization = 0):
|
| 129 |
+
"""
|
| 130 |
+
hamming_cdist_packed(unsigned int[:,::1] arr1, unsigned int[:,::1] arr2, float normalization = 0)
|
| 131 |
+
|
| 132 |
+
Computes a hamming distance between two sets of bit arrays packed into uint32 using
|
| 133 |
+
hamming_dist_packed. Returns an array of size (arr1.shape[0], arr2.shape[0]).
|
| 134 |
+
|
| 135 |
+
>> hamming_cdist_packed(np.array([[3], [1]], dtype=np.uint32), np.array([[1], [2]], dtype=np.uint32), 2)
|
| 136 |
+
array([[0.5, 0.5],
|
| 137 |
+
[0. , 1. ]], dtype=float32)
|
| 138 |
+
"""
|
| 139 |
+
assert arr1 is not None and arr2 is not None
|
| 140 |
+
assert arr1.shape[1] == arr2.shape[1]
|
| 141 |
+
|
| 142 |
+
cdef Py_ssize_t dim0 = arr1.shape[0]
|
| 143 |
+
cdef Py_ssize_t dim1 = arr2.shape[0]
|
| 144 |
+
result = np.zeros((dim0, dim1), dtype=np.float32)
|
| 145 |
+
cdef float[:, ::1] result_view = result
|
| 146 |
+
|
| 147 |
+
cdef Py_ssize_t i, j
|
| 148 |
+
for i in range(dim0):
|
| 149 |
+
for j in range(dim1):
|
| 150 |
+
result_view[i, j] = c_hamming_dist_uint32_arr(arr1[i], arr2[j], normalization)
|
| 151 |
+
|
| 152 |
+
return result
|
asmk-src/examples/demo_how.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
import os.path
|
| 3 |
+
import sys
|
| 4 |
+
import time
|
| 5 |
+
import argparse
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
import numpy as np
|
| 8 |
+
|
| 9 |
+
# Add package root to pythonpath
|
| 10 |
+
sys.path.append(os.path.realpath(f"{__file__}/../../"))
|
| 11 |
+
|
| 12 |
+
from cirtorch.datasets.testdataset import configdataset
|
| 13 |
+
from cirtorch.utils.evaluate import compute_map_and_print
|
| 14 |
+
from asmk import io_helpers, ASMKMethod
|
| 15 |
+
|
| 16 |
+
FEATURES_URL = "http://ptak.felk.cvut.cz/personal/toliageo/share/how/features/"
|
| 17 |
+
DATASETS_URL = "http://cmp.felk.cvut.cz/cnnimageretrieval/data/test/"
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def initialize(params, demo_params, globals, logger):
|
| 21 |
+
"""Download necessary files and initialize structures"""
|
| 22 |
+
logger.info(f"ECCV20 demo with parameters '{globals['exp_path'].name}'")
|
| 23 |
+
|
| 24 |
+
# Download featues
|
| 25 |
+
features = ["%s_%s.pkl" % (x, demo_params['eval_features']) \
|
| 26 |
+
for x in demo_params['eval_datasets']]
|
| 27 |
+
features.append("%s_%s.pkl" % (demo_params['codebook_dataset'],
|
| 28 |
+
demo_params['codebook_features']))
|
| 29 |
+
io_helpers.download_files(features, globals['root_path'] / "features", FEATURES_URL,
|
| 30 |
+
logfunc=logger.info)
|
| 31 |
+
|
| 32 |
+
# Download test datasets
|
| 33 |
+
pkls = ["%s/gnd_%s.pkl" % (x, x) for x in demo_params['eval_datasets']]
|
| 34 |
+
io_helpers.download_files(pkls, globals['root_path'] / "test", DATASETS_URL,
|
| 35 |
+
logfunc=logger.info)
|
| 36 |
+
|
| 37 |
+
# Initialize asmk method wrapper
|
| 38 |
+
return ASMKMethod.initialize_untrained(params)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def train_codebook(asmk, demo_params, globals, logger):
|
| 42 |
+
"""The first step of asmk method - training the codebook"""
|
| 43 |
+
codebook_path = f"{globals['exp_path']}/codebook.pkl"
|
| 44 |
+
features_path = f"{globals['root_path']}/features/{demo_params['codebook_dataset']}_" \
|
| 45 |
+
f"{demo_params['codebook_features']}.pkl"
|
| 46 |
+
|
| 47 |
+
desc = io_helpers.load_pickle(features_path)
|
| 48 |
+
logger.info(f"Loaded descriptors for codebook")
|
| 49 |
+
asmk = asmk.train_codebook(desc['vecs'], cache_path=codebook_path)
|
| 50 |
+
|
| 51 |
+
metadata = asmk.metadata['train_codebook']
|
| 52 |
+
logger.debug(f"Using {metadata['index_class']} index")
|
| 53 |
+
if "load_time" in metadata:
|
| 54 |
+
logger.info("Loaded pre-trained codebook")
|
| 55 |
+
else:
|
| 56 |
+
logger.info(f"Codebook trained in {metadata['train_time']:.1f}s")
|
| 57 |
+
logger.debug(f"Vectors for codebook clustered in {metadata['cluster_time']:.1f}s " \
|
| 58 |
+
f"and indexed in {metadata['index_time']:.1f}s")
|
| 59 |
+
return asmk
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def build_ivf(asmk, dataset, desc, globals, logger):
|
| 63 |
+
"""The second step of asmk method - building the ivf"""
|
| 64 |
+
ivf_path = f"{globals['exp_path']}/ivf_{dataset}.pkl"
|
| 65 |
+
|
| 66 |
+
asmk = asmk.build_ivf(desc['vecs'], desc['imids'], cache_path=ivf_path)
|
| 67 |
+
|
| 68 |
+
metadata = asmk.metadata['build_ivf']
|
| 69 |
+
if "load_time" in metadata:
|
| 70 |
+
logger.info("Loaded indexed ivf")
|
| 71 |
+
else:
|
| 72 |
+
logger.info(f"Indexed descriptors in {metadata['index_time']:.2f}s")
|
| 73 |
+
|
| 74 |
+
logger.debug(f"IVF stats: {metadata['ivf_stats']}")
|
| 75 |
+
return asmk
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def query_ivf(asmk, dataset, desc, globals, logger):
|
| 79 |
+
"""The last step of asmk method - querying the ivf"""
|
| 80 |
+
metadata, _images, ranks, _scores = asmk.query_ivf(desc['qvecs'], desc['qimids'])
|
| 81 |
+
logger.debug(f"Average query time (quant+aggr+search) is {metadata['query_avg_time']:.3f}s")
|
| 82 |
+
gnd = configdataset(dataset, f"{globals['root_path']}/test/")['gnd']
|
| 83 |
+
io_helpers.capture_stdout(lambda: compute_map_and_print(dataset, ranks.T, gnd), logger)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def demo_how(params, globals, logger):
|
| 87 |
+
"""Demo where asmk is applied to the HOW descriptors from eccv'20 paper, replicating reported
|
| 88 |
+
results. Params is a dictionary with parameters for each step."""
|
| 89 |
+
demo_params = params.pop("demo_how")
|
| 90 |
+
asmk = initialize(params, demo_params, globals, logger)
|
| 91 |
+
|
| 92 |
+
asmk = train_codebook(asmk, demo_params, globals, logger)
|
| 93 |
+
|
| 94 |
+
# Create db and evaluate datasets
|
| 95 |
+
for dataset in demo_params['eval_datasets']:
|
| 96 |
+
desc = io_helpers.load_pickle(f"{globals['root_path']}/features/{dataset}_" \
|
| 97 |
+
f"{demo_params['eval_features']}.pkl")
|
| 98 |
+
logger.info(f"Loaded DB and query descriptors for {dataset}")
|
| 99 |
+
|
| 100 |
+
asmk_dataset = build_ivf(asmk, dataset, desc, globals, logger)
|
| 101 |
+
|
| 102 |
+
query_ivf(asmk_dataset, dataset, desc, globals, logger)
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def main(args):
|
| 106 |
+
"""Argument parsing and parameter preparation for the demo"""
|
| 107 |
+
# Arguments
|
| 108 |
+
parser = argparse.ArgumentParser(description="ASMK demo replicating results for HOW " \
|
| 109 |
+
"descriptors from ECCV 2020.")
|
| 110 |
+
parser.add_argument('parameters', nargs='+', type=str,
|
| 111 |
+
help="Relative path to a yaml file that contains parameters.")
|
| 112 |
+
args = parser.parse_args(args)
|
| 113 |
+
|
| 114 |
+
package_root = Path(__file__).resolve().parent.parent
|
| 115 |
+
for parameters_path in args.parameters:
|
| 116 |
+
# Load yaml params
|
| 117 |
+
if not parameters_path.endswith(".yml"):
|
| 118 |
+
parameters_path = package_root / "examples" / ("params/%s.yml" % parameters_path)
|
| 119 |
+
params = io_helpers.load_params(parameters_path)
|
| 120 |
+
|
| 121 |
+
# Resolve data folders
|
| 122 |
+
globals = {}
|
| 123 |
+
globals["root_path"] = (package_root / params['demo_how']['data_folder'])
|
| 124 |
+
globals["root_path"].mkdir(parents=True, exist_ok=True)
|
| 125 |
+
exp_name = Path(parameters_path).name[:-len(".yml")]
|
| 126 |
+
globals["exp_path"] = (package_root / params['demo_how']['exp_folder']) / exp_name
|
| 127 |
+
globals["exp_path"].mkdir(parents=True, exist_ok=True)
|
| 128 |
+
|
| 129 |
+
# Setup logging
|
| 130 |
+
logger = io_helpers.init_logger(globals["exp_path"] / "output.log")
|
| 131 |
+
|
| 132 |
+
# Run demo
|
| 133 |
+
demo_how(params, globals, logger)
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
if __name__ == "__main__":
|
| 137 |
+
main(sys.argv[1:])
|
asmk-src/examples/params/_eccv20_how.yml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Common parameters for HOW descriptors
|
| 2 |
+
|
| 3 |
+
demo_how:
|
| 4 |
+
data_folder: data # Data path relative to the package root (can be absolute path)
|
| 5 |
+
exp_folder: data/experiments # Experiment path relative to the package root (can be absolute path)
|
| 6 |
+
eval_datasets: [roxford5k, rparis6k] # Datasets to evaluate on
|
| 7 |
+
eval_features: null # Name of the local features for evaluation (will be downloaded for each dataset)
|
| 8 |
+
codebook_dataset: sfm120k_subset20k # Dataset to train the codebook on
|
| 9 |
+
codebook_features: null # Name of the local features to train the codebook on (will be downloaded for codebook_dataset)
|
| 10 |
+
|
| 11 |
+
# General properties
|
| 12 |
+
|
| 13 |
+
index:
|
| 14 |
+
gpu_id: 0 # Gpu id to use, None to run on cpu
|
| 15 |
+
|
| 16 |
+
# Steps
|
| 17 |
+
|
| 18 |
+
train_codebook:
|
| 19 |
+
codebook:
|
| 20 |
+
size: "64k" # Number of visual words (i.e. clusters of local features) in the codebook
|
| 21 |
+
|
| 22 |
+
build_ivf:
|
| 23 |
+
kernel:
|
| 24 |
+
binary: True # Binarization option in the asmk
|
| 25 |
+
ivf:
|
| 26 |
+
use_idf: False # IDF weighting in the inverted file
|
| 27 |
+
|
| 28 |
+
quantize:
|
| 29 |
+
multiple_assignment: 1 # Number of assignments for the db vectors
|
| 30 |
+
aggregate: {}
|
| 31 |
+
|
| 32 |
+
query_ivf:
|
| 33 |
+
quantize:
|
| 34 |
+
multiple_assignment: 5 # Number of assignments for the query vectors
|
| 35 |
+
aggregate: {}
|
| 36 |
+
search:
|
| 37 |
+
topk: null # Limit the number of results (db images) returned for each query (null means unlimited)
|
| 38 |
+
similarity:
|
| 39 |
+
similarity_threshold: 0.0 # Feature similarity threshold
|
| 40 |
+
alpha: 3.0 # Feature similarity exponent
|
asmk-src/examples/params/eccv20_how_r18_1000.yml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Demo scenario for HOW descriptors with Resnet 18 backbone
|
| 2 |
+
|
| 3 |
+
__template__: _eccv20_how.yml
|
| 4 |
+
|
| 5 |
+
demo_how:
|
| 6 |
+
eval_features: how_r18_1000
|
| 7 |
+
codebook_features: how_r18_1000
|
asmk-src/examples/params/eccv20_how_r50-_1000.yml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Demo scenario for HOW descriptors with Resnet 50 backbone, without the last block
|
| 2 |
+
|
| 3 |
+
__template__: _eccv20_how.yml
|
| 4 |
+
|
| 5 |
+
demo_how:
|
| 6 |
+
eval_features: how_r50-_1000
|
| 7 |
+
codebook_features: how_r50-_1000
|
asmk-src/examples/params/eccv20_how_r50-_2000.yml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Demo scenario for HOW descriptors with Resnet 50 backbone, without the last block
|
| 2 |
+
|
| 3 |
+
__template__: _eccv20_how.yml
|
| 4 |
+
|
| 5 |
+
demo_how:
|
| 6 |
+
eval_features: how_r50-_2000
|
| 7 |
+
codebook_features: how_r50-_1000
|
asmk-src/setup.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
from setuptools import setup, Extension
|
| 3 |
+
from setuptools.command.install import install
|
| 4 |
+
|
| 5 |
+
class InstallWrapper(install):
|
| 6 |
+
|
| 7 |
+
def run(self):
|
| 8 |
+
try:
|
| 9 |
+
import faiss
|
| 10 |
+
except ImportError:
|
| 11 |
+
sys.stderr.write("\nERROR: faiss package not installed (install either faiss-cpu or " \
|
| 12 |
+
"faiss-gpu before installing this package.).\n\n")
|
| 13 |
+
sys.exit(1)
|
| 14 |
+
|
| 15 |
+
install.run(self)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
setup(
|
| 19 |
+
name="asmk",
|
| 20 |
+
version="0.1",
|
| 21 |
+
description="ASMK Python implementation for ECCV'20 paper \"Learning and aggregating deep " \
|
| 22 |
+
"local descriptors for instance-level recognition\"",
|
| 23 |
+
author="Tomas Jenicek, Giorgos Tolias",
|
| 24 |
+
packages=[
|
| 25 |
+
"asmk",
|
| 26 |
+
],
|
| 27 |
+
ext_modules=[Extension("asmk.hamming", ["cython/hamming.c"])],
|
| 28 |
+
install_requires=[
|
| 29 |
+
"numpy",
|
| 30 |
+
"pyaml",
|
| 31 |
+
],
|
| 32 |
+
cmdclass={
|
| 33 |
+
"install": InstallWrapper,
|
| 34 |
+
},
|
| 35 |
+
zip_safe=True)
|
asmk-src/test/test_hamming.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Unit tests of asmk.hamming"""
|
| 2 |
+
|
| 3 |
+
import unittest
|
| 4 |
+
from asmk import hamming
|
| 5 |
+
import numpy as np
|
| 6 |
+
from scipy.spatial.distance import cdist
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class TestFunctions(unittest.TestCase):
|
| 10 |
+
"""Unit test of functions"""
|
| 11 |
+
|
| 12 |
+
@staticmethod
|
| 13 |
+
def _numpy_pack_uint32(arr):
|
| 14 |
+
res = np.empty((arr.shape[0], int(np.ceil(arr.shape[1] / 32))), dtype=np.uint32)
|
| 15 |
+
packed = np.packbits(arr, axis=1).astype(np.uint32)
|
| 16 |
+
packed = np.pad(packed, ((0, 0), (0, 4 - packed.shape[1] % 4)), 'constant')
|
| 17 |
+
for i in range(res.shape[1]):
|
| 18 |
+
res[:,i] = (packed[:,4*i+0] << 24) + (packed[:,4*i+1] << 16) + (packed[:,4*i+2] << 8) + packed[:,4*i+3]
|
| 19 |
+
return res
|
| 20 |
+
|
| 21 |
+
def test_binarize_and_pack(self):
|
| 22 |
+
for dim1 in range(1, 40):
|
| 23 |
+
arr = (np.random.rand(dim1) - 0.5).astype(np.float32)
|
| 24 |
+
self.assertTrue(np.allclose(self._numpy_pack_uint32(np.expand_dims(arr, 0) > 0).squeeze(),
|
| 25 |
+
hamming.binarize_and_pack(arr)))
|
| 26 |
+
|
| 27 |
+
def test_binarize_and_pack_2D(self):
|
| 28 |
+
for dim1 in range(1, 40):
|
| 29 |
+
arr = (np.random.rand(10, dim1) - 0.5).astype(np.float32)
|
| 30 |
+
self.assertTrue(np.allclose(self._numpy_pack_uint32(arr > 0), hamming.binarize_and_pack_2D(arr)))
|
| 31 |
+
|
| 32 |
+
def test_hamming_dist_packed(self):
|
| 33 |
+
for dim1 in range(100, 140):
|
| 34 |
+
arr1 = (np.random.rand(1, dim1) - 0.5).astype(np.float32)
|
| 35 |
+
arr2 = (np.random.rand(1, dim1) - 0.5).astype(np.float32)
|
| 36 |
+
res = hamming.hamming_dist_packed(hamming.binarize_and_pack_2D(arr1).squeeze(0),
|
| 37 |
+
hamming.binarize_and_pack_2D(arr2).squeeze(0), dim1)
|
| 38 |
+
if dim1 % 32 == 0:
|
| 39 |
+
# Test default behaviour
|
| 40 |
+
res = hamming.hamming_dist_packed(hamming.binarize_and_pack_2D(arr1).squeeze(0),
|
| 41 |
+
hamming.binarize_and_pack_2D(arr2).squeeze(0))
|
| 42 |
+
self.assertTrue(np.allclose(res, cdist(arr1 > 0, arr2 > 0, 'hamming').squeeze()))
|
| 43 |
+
|
| 44 |
+
def test_hamming_cdist_packed(self):
|
| 45 |
+
for dim1 in range(100, 140):
|
| 46 |
+
arr1 = (np.random.rand(10, dim1) - 0.5).astype(np.float32)
|
| 47 |
+
arr2 = (np.random.rand(10, dim1) - 0.5).astype(np.float32)
|
| 48 |
+
if dim1 % 32 == 0:
|
| 49 |
+
# Test default behaviour
|
| 50 |
+
res = hamming.hamming_cdist_packed(hamming.binarize_and_pack_2D(arr1),
|
| 51 |
+
hamming.binarize_and_pack_2D(arr2))
|
| 52 |
+
res = hamming.hamming_cdist_packed(hamming.binarize_and_pack_2D(arr1),
|
| 53 |
+
hamming.binarize_and_pack_2D(arr2), dim1)
|
| 54 |
+
self.assertTrue(np.allclose(res, cdist(arr1 > 0, arr2 > 0, 'hamming')))
|