Coverage for trimesh/caching.py: 89%
267 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-24 04:40 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-24 04:40 +0000
1"""
2caching.py
3-----------
5Functions and classes that help with tracking changes
6in `numpy.ndarray` and clearing cached values based
7on those changes.
9You should really `pip install xxhash`:
11```
12In [23]: %timeit int(blake2b(d).hexdigest(), 16)
13102 us +/- 684 ns per loop
15In [24]: %timeit int(sha256(d).hexdigest(), 16)
16142 us +/- 3.73 us
18In [25]: %timeit xxh3_64_intdigest(d)
193.37 us +/- 116 ns per loop
20```
21"""
23import os
24import time
25from functools import wraps
26from hashlib import blake2b as _blake2b
27from hashlib import sha256 as _sha256
29import numpy as np
31from .constants import log
32from .util import is_sequence
34try:
35 from collections.abc import Mapping
36except BaseException:
37 from collections.abc import Mapping
40def sha256(item) -> int:
41 return int(_sha256(item).hexdigest(), 16)
44def hash_fallback(item):
45 return int(_blake2b(item, usedforsecurity=False).hexdigest(), 16)
48# xxhash is up to 30x faster than sha256:
49# `pip install xxhash`
50try:
51 # newest version of algorithm
52 from xxhash import xxh3_64_intdigest as hash_fast
53except BaseException:
54 try:
55 # older version of the algorithm
56 from xxhash import xxh64_intdigest as hash_fast
57 except BaseException:
58 # use hashlib as a fallback hashing library
59 log.debug(
60 "falling back to hashlib "
61 + "hashing: `pip install xxhash`"
62 + "for 50x faster cache checks"
63 )
64 hash_fast = hash_fallback
67def tracked_array(array, dtype=None):
68 """
69 Properly subclass a numpy ndarray to track changes.
71 Avoids some pitfalls of subclassing by forcing contiguous
72 arrays and does a view into a TrackedArray.
74 Parameters
75 ------------
76 array : array- like object
77 To be turned into a TrackedArray
78 dtype : np.dtype
79 Which dtype to use for the array
81 Returns
82 ------------
83 tracked : TrackedArray
84 Contains input array data.
85 """
86 # if someone passed us None, just create an empty array
87 if array is None:
88 array = []
89 # make sure it is contiguous then view it as our subclass
90 tracked = np.ascontiguousarray(array, dtype=dtype).view(TrackedArray)
91 # should always be contiguous here
92 assert tracked.flags["C_CONTIGUOUS"]
94 return tracked
97def cache_decorator(function):
98 """
99 A decorator for class methods, replaces @property
100 but will store and retrieve function return values
101 in object cache.
103 Parameters
104 ------------
105 function : method
106 This is used as a decorator:
107 ```
108 @cache_decorator
109 def foo(self, things):
110 return 'happy days'
111 ```
112 """
114 # use wraps to preserve docstring
115 @wraps(function)
116 def get_cached(*args, **kwargs):
117 """
118 Only execute the function if its value isn't stored
119 in cache already.
120 """
121 self = args[0]
122 # use function name as key in cache
123 name = function.__name__
124 # do the dump logic ourselves to avoid
125 # verifying cache twice per call
126 self._cache.verify()
127 # access cache dict to avoid automatic validation
128 # since we already called cache.verify manually
129 if name in self._cache.cache:
130 # already stored so return value
131 return self._cache.cache[name]
132 # value not in cache so execute the function
133 value = function(*args, **kwargs)
134 # store the value
135 if (
136 self._cache.force_immutable
137 and hasattr(value, "flags")
138 and len(value.shape) > 0
139 ):
140 value.flags.writeable = False
142 self._cache.cache[name] = value
144 return value
146 # all cached values are also properties
147 # so they can be accessed like value attributes
148 # rather than functions
149 return property(get_cached)
152class TrackedArray(np.ndarray):
153 """
154 Subclass of numpy.ndarray that provides hash methods
155 to track changes.
157 General method is to aggressively set 'modified' flags
158 on operations which might (but don't necessarily) alter
159 the array, ideally we sometimes compute hashes when we
160 don't need to, but we don't return wrong hashes ever.
162 We store boolean modified flag for each hash type to
163 make checks fast even for queries of different hashes.
165 Methods
166 ----------
167 __hash__ : int
168 Runs the fastest available hash in this order:
169 `xxh3_64, xxh_64, blake2b, sha256`
170 """
172 def __array_finalize__(self, obj):
173 """
174 Sets a modified flag on every TrackedArray
175 This flag will be set on every change as well as
176 during copies and certain types of slicing.
177 """
179 self._dirty_hash = True
180 if isinstance(obj, type(self)):
181 obj._dirty_hash = True
183 def __array_wrap__(self, out_arr, context=None, *args, **kwargs):
184 """
185 Return a numpy scalar if array is 0d.
186 See https://github.com/numpy/numpy/issues/5819
187 """
188 if out_arr.ndim:
189 return np.ndarray.__array_wrap__(self, out_arr, context, *args, **kwargs)
190 # Match numpy's behavior and return a numpy dtype scalar
191 return out_arr[()]
193 @property
194 def mutable(self):
195 return self.flags["WRITEABLE"]
197 @mutable.setter
198 def mutable(self, value):
199 self.flags.writeable = value
201 def __hash__(self):
202 """
203 Return a fast hash of the contents of the array.
205 Returns
206 -------------
207 hash : long int
208 A hash of the array contents.
209 """
210 # repeat the bookkeeping to get a contiguous array
211 if not self._dirty_hash and hasattr(self, "_hashed"):
212 # we have a valid hash without recomputing.
213 return self._hashed
215 # run a hashing function on the C-order bytes copy
216 hashed = hash_fast(self.tobytes(order="C"))
218 # assign the value and set the flag
219 self._hashed = hashed
220 self._dirty_hash = False
222 return hashed
224 def __iadd__(self, *args, **kwargs):
225 """
226 In-place addition.
228 The i* operations are in- place and modify the array,
229 so we better catch all of them.
230 """
232 self._dirty_hash = True
233 return super(self.__class__, self).__iadd__(*args, **kwargs)
235 def __isub__(self, *args, **kwargs):
236 self._dirty_hash = True
237 return super(self.__class__, self).__isub__(*args, **kwargs)
239 def fill(self, *args, **kwargs):
240 self._dirty_hash = True
241 return super(self.__class__, self).fill(*args, **kwargs)
243 def partition(self, *args, **kwargs):
244 self._dirty_hash = True
245 return super(self.__class__, self).partition(*args, **kwargs)
247 def put(self, *args, **kwargs):
248 self._dirty_hash = True
249 return super(self.__class__, self).put(*args, **kwargs)
251 def byteswap(self, *args, **kwargs):
252 self._dirty_hash = True
253 return super(self.__class__, self).byteswap(*args, **kwargs)
255 def itemset(self, *args, **kwargs):
256 self._dirty_hash = True
257 return super(self.__class__, self).itemset(*args, **kwargs)
259 def sort(self, *args, **kwargs):
260 self._dirty_hash = True
261 return super(self.__class__, self).sort(*args, **kwargs)
263 def setflags(self, *args, **kwargs):
264 self._dirty_hash = True
265 return super(self.__class__, self).setflags(*args, **kwargs)
267 def __imul__(self, *args, **kwargs):
268 self._dirty_hash = True
269 return super(self.__class__, self).__imul__(*args, **kwargs)
271 def __idiv__(self, *args, **kwargs):
272 self._dirty_hash = True
273 return super(self.__class__, self).__idiv__(*args, **kwargs)
275 def __itruediv__(self, *args, **kwargs):
276 self._dirty_hash = True
277 return super(self.__class__, self).__itruediv__(*args, **kwargs)
279 def __imatmul__(self, *args, **kwargs):
280 self._dirty_hash = True
281 return super(self.__class__, self).__imatmul__(*args, **kwargs)
283 def __ipow__(self, *args, **kwargs):
284 self._dirty_hash = True
285 return super(self.__class__, self).__ipow__(*args, **kwargs)
287 def __imod__(self, *args, **kwargs):
288 self._dirty_hash = True
289 return super(self.__class__, self).__imod__(*args, **kwargs)
291 def __ifloordiv__(self, *args, **kwargs):
292 self._dirty_hash = True
293 return super(self.__class__, self).__ifloordiv__(*args, **kwargs)
295 def __ilshift__(self, *args, **kwargs):
296 self._dirty_hash = True
297 return super(self.__class__, self).__ilshift__(*args, **kwargs)
299 def __irshift__(self, *args, **kwargs):
300 self._dirty_hash = True
301 return super(self.__class__, self).__irshift__(*args, **kwargs)
303 def __iand__(self, *args, **kwargs):
304 self._dirty_hash = True
305 return super(self.__class__, self).__iand__(*args, **kwargs)
307 def __ixor__(self, *args, **kwargs):
308 self._dirty_hash = True
309 return super(self.__class__, self).__ixor__(*args, **kwargs)
311 def __ior__(self, *args, **kwargs):
312 self._dirty_hash = True
313 return super(self.__class__, self).__ior__(*args, **kwargs)
315 def __setitem__(self, *args, **kwargs):
316 self._dirty_hash = True
317 return super(self.__class__, self).__setitem__(*args, **kwargs)
319 def __setslice__(self, *args, **kwargs):
320 self._dirty_hash = True
321 return super(self.__class__, self).__setslice__(*args, **kwargs)
324class Cache:
325 """
326 Class to cache values which will be stored until the
327 result of an ID function changes.
328 """
330 def __init__(self, id_function, force_immutable=False):
331 """
332 Create a cache object.
334 Parameters
335 ------------
336 id_function : function
337 Returns hashable value
338 force_immutable : bool
339 If set will make all numpy arrays read-only
340 """
341 self._id_function = id_function
342 # for stored numpy arrays set `flags.writable = False`
343 self.force_immutable = bool(force_immutable)
344 # call the id function for initial value
345 self.id_current = None
346 # a counter for locks
347 self._lock = 0
348 # actual store for data
349 self.cache = {}
351 def delete(self, key):
352 """
353 Remove a key from the cache.
354 """
355 if key in self.cache:
356 self.cache.pop(key, None)
358 def verify(self):
359 """
360 Verify that the cached values are still for the same
361 value of id_function and delete all stored items if
362 the value of id_function has changed.
363 """
364 # if we are in a lock don't check anything
365 if self._lock != 0:
366 return
368 # check the hash of our data
369 id_new = self._id_function()
371 # things changed
372 if id_new != self.id_current:
373 if len(self.cache) > 0:
374 log.debug(
375 "%d items cleared from cache: %s",
376 len(self.cache),
377 str(list(self.cache.keys())),
378 )
379 # hash changed, so dump the cache
380 # do it manually rather than calling clear()
381 # as we are internal logic and can avoid function calls
382 self.cache = {}
383 # set the id to the new data hash
384 self.id_current = id_new
386 def clear(self, exclude=None):
387 """
388 Remove elements in the cache.
390 Parameters
391 -----------
392 exclude : list
393 List of keys in cache to not clear.
394 """
395 if exclude is None:
396 self.cache = {}
397 else:
398 self.cache = {k: v for k, v in self.cache.items() if k in exclude}
400 def update(self, items):
401 """
402 Update the cache with a set of key, value pairs without
403 checking id_function.
404 """
405 self.cache.update(items)
407 if self.force_immutable:
408 for v in self.cache.values():
409 if hasattr(v, "flags") and len(v.shape) > 0:
410 v.flags.writeable = False
411 self.id_set()
413 def id_set(self):
414 """
415 Set the current ID to the value of the ID function.
416 """
417 self.id_current = self._id_function()
419 def __getitem__(self, key):
420 """
421 Get an item from the cache. If the item
422 is not in the cache, it will return None
424 Parameters
425 -------------
426 key : hashable
427 Key in dict
429 Returns
430 -------------
431 cached : object, or None
432 Object that was stored
433 """
434 self.verify()
435 if key in self.cache:
436 return self.cache[key]
437 return None
439 def __setitem__(self, key, value):
440 """
441 Add an item to the cache.
443 Parameters
444 ------------
445 key : hashable
446 Key to reference value
447 value : any
448 Value to store in cache
449 """
450 # dumpy cache if ID function has changed
451 self.verify()
452 # make numpy arrays read-only if asked to
453 if self.force_immutable and hasattr(value, "flags") and len(value.shape) > 0:
454 value.flags.writeable = False
455 # assign data to dict
456 self.cache[key] = value
458 return value
460 def __contains__(self, key):
461 self.verify()
462 return key in self.cache
464 def __len__(self):
465 self.verify()
466 return len(self.cache)
468 def __enter__(self):
469 self._lock += 1
471 def __exit__(self, *args):
472 self._lock -= 1
473 self.id_current = self._id_function()
476class DiskCache:
477 """
478 Store results of expensive operations on disk
479 with an option to expire the results. This is used
480 to cache the multi-gigabyte test corpuses in
481 `tests/corpus.py`
482 """
484 def __init__(self, path, expire_days=30):
485 """
486 Create a cache on disk for storing expensive results.
488 Parameters
489 --------------
490 path : str
491 A writeable location on the current file path.
492 expire_days : int or float
493 How old should results be considered expired.
495 """
496 # store how old we allow results to be
497 self.expire_days = expire_days
498 # store the location for saving results
499 self.path = os.path.abspath(os.path.expanduser(path))
500 # make sure the specified path exists
501 os.makedirs(self.path, exist_ok=True)
503 def get(self, key, fetch):
504 """
505 Get a key from the cache or run a calculation.
507 Parameters
508 -----------
509 key : str
510 Key to reference item with
511 fetch : function
512 If key isn't stored and recent run this
513 function and store its result on disk.
514 """
515 # hash the key so we have a fixed length string
516 key_hash = _sha256(key.encode("utf-8")).hexdigest()
517 # full path of result on local disk
518 path = os.path.join(self.path, key_hash)
520 # check to see if we can use the cache
521 if os.path.isfile(path):
522 # compute the age of the existing file in days
523 age_days = (time.time() - os.stat(path).st_mtime) / 86400.0
524 if age_days < self.expire_days:
525 # this nested condition means that
526 # the file both exists and is recent
527 # enough, so just return its contents
528 with open(path, "rb") as f:
529 return f.read()
531 log.debug(f"not in cache fetching: `{key}`")
532 # since we made it here our data isn't cached
533 # run the expensive function to fetch the file
534 raw = fetch()
535 # write the data so we can save it
536 with open(path, "wb") as f:
537 f.write(raw)
539 # return the data
540 return raw
543class DataStore(Mapping):
544 """
545 A class to store multiple numpy arrays and track them all
546 for changes.
548 Operates like a dict that only stores numpy.ndarray
549 """
551 def __init__(self):
552 self.data = {}
554 def __iter__(self):
555 return iter(self.data)
557 def pop(self, key):
558 return self.data.pop(key, None)
560 def __delitem__(self, key):
561 self.data.pop(key, None)
563 @property
564 def mutable(self):
565 """
566 Is data allowed to be altered or not.
568 Returns
569 -----------
570 is_mutable : bool
571 Can data be altered in the DataStore
572 """
573 return getattr(self, "_mutable", True)
575 @mutable.setter
576 def mutable(self, value):
577 """
578 Is data allowed to be altered or not.
580 Parameters
581 ------------
582 is_mutable : bool
583 Should data be allowed to be altered
584 """
585 # make sure passed value is a bool
586 is_mutable = bool(value)
587 # apply the flag to any data stored
588 for v in self.data.values():
589 if isinstance(v, TrackedArray):
590 v.mutable = value
591 # save the mutable setting
592 self._mutable = is_mutable
594 def is_empty(self):
595 """
596 Is the current DataStore empty or not.
598 Returns
599 ----------
600 empty : bool
601 False if there are items in the DataStore
602 """
603 if len(self.data) == 0:
604 return True
605 for v in self.data.values():
606 if is_sequence(v):
607 if len(v) == 0:
608 return True
609 else:
610 return False
611 elif bool(np.isreal(v)):
612 return False
613 return True
615 def clear(self):
616 """
617 Remove all data from the DataStore.
618 """
619 self.data = {}
621 def __getitem__(self, key):
622 return self.data[key]
624 def __setitem__(self, key, data):
625 """
626 Store an item in the DataStore.
628 Parameters
629 -------------
630 key
631 A hashable key to store under
632 data
633 Usually a numpy array which will be subclassed
634 but anything hashable should be able to be stored.
635 """
636 # we shouldn't allow setting on immutable datastores
637 if not self.mutable:
638 raise ValueError("DataStore is configured immutable!")
640 if isinstance(data, TrackedArray):
641 # don't bother to re-track TrackedArray
642 tracked = data
643 elif isinstance(data, (np.ndarray, list, set, tuple)):
644 # wrap data if it is array-like
645 tracked = tracked_array(data)
646 else:
647 try:
648 # will raise if this is not a hashable type
649 hash(data)
650 except BaseException:
651 raise ValueError(f"unhashable `{key}:{type(data)}`")
652 tracked = data
654 # apply our mutability setting
655 if hasattr(self, "_mutable"):
656 # apply our mutability setting only if it was explicitly set
657 tracked.mutable = self.mutable
658 # store data
659 self.data[key] = tracked
661 def __contains__(self, key):
662 return key in self.data
664 def __len__(self):
665 return len(self.data)
667 def update(self, values):
668 if not isinstance(values, dict):
669 raise ValueError("Update only implemented for dicts")
670 for key, value in values.items():
671 self[key] = value
673 def __hash__(self):
674 """
675 Get a hash reflecting everything in the DataStore.
677 Returns
678 ----------
679 hash : str
680 hash of data in hexadecimal
681 """
682 # only hash values that aren't None
683 # or if they are arrays require length greater than zero
684 return hash_fast(
685 np.array(
686 [
687 hash(v)
688 for v in self.data.values()
689 if v is not None and (not hasattr(v, "__len__") or len(v) > 0)
690 ],
691 dtype=np.int64,
692 ).tobytes()
693 )