Coverage for trimesh/caching.py: 89%

267 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-24 04:40 +0000

1""" 

2caching.py 

3----------- 

4 

5Functions and classes that help with tracking changes 

6in `numpy.ndarray` and clearing cached values based 

7on those changes. 

8 

9You should really `pip install xxhash`: 

10 

11``` 

12In [23]: %timeit int(blake2b(d).hexdigest(), 16) 

13102 us +/- 684 ns per loop 

14 

15In [24]: %timeit int(sha256(d).hexdigest(), 16) 

16142 us +/- 3.73 us 

17 

18In [25]: %timeit xxh3_64_intdigest(d) 

193.37 us +/- 116 ns per loop 

20``` 

21""" 

22 

23import os 

24import time 

25from functools import wraps 

26from hashlib import blake2b as _blake2b 

27from hashlib import sha256 as _sha256 

28 

29import numpy as np 

30 

31from .constants import log 

32from .util import is_sequence 

33 

34try: 

35 from collections.abc import Mapping 

36except BaseException: 

37 from collections.abc import Mapping 

38 

39 

40def sha256(item) -> int: 

41 return int(_sha256(item).hexdigest(), 16) 

42 

43 

44def hash_fallback(item): 

45 return int(_blake2b(item, usedforsecurity=False).hexdigest(), 16) 

46 

47 

48# xxhash is up to 30x faster than sha256: 

49# `pip install xxhash` 

50try: 

51 # newest version of algorithm 

52 from xxhash import xxh3_64_intdigest as hash_fast 

53except BaseException: 

54 try: 

55 # older version of the algorithm 

56 from xxhash import xxh64_intdigest as hash_fast 

57 except BaseException: 

58 # use hashlib as a fallback hashing library 

59 log.debug( 

60 "falling back to hashlib " 

61 + "hashing: `pip install xxhash`" 

62 + "for 50x faster cache checks" 

63 ) 

64 hash_fast = hash_fallback 

65 

66 

67def tracked_array(array, dtype=None): 

68 """ 

69 Properly subclass a numpy ndarray to track changes. 

70 

71 Avoids some pitfalls of subclassing by forcing contiguous 

72 arrays and does a view into a TrackedArray. 

73 

74 Parameters 

75 ------------ 

76 array : array- like object 

77 To be turned into a TrackedArray 

78 dtype : np.dtype 

79 Which dtype to use for the array 

80 

81 Returns 

82 ------------ 

83 tracked : TrackedArray 

84 Contains input array data. 

85 """ 

86 # if someone passed us None, just create an empty array 

87 if array is None: 

88 array = [] 

89 # make sure it is contiguous then view it as our subclass 

90 tracked = np.ascontiguousarray(array, dtype=dtype).view(TrackedArray) 

91 # should always be contiguous here 

92 assert tracked.flags["C_CONTIGUOUS"] 

93 

94 return tracked 

95 

96 

97def cache_decorator(function): 

98 """ 

99 A decorator for class methods, replaces @property 

100 but will store and retrieve function return values 

101 in object cache. 

102 

103 Parameters 

104 ------------ 

105 function : method 

106 This is used as a decorator: 

107 ``` 

108 @cache_decorator 

109 def foo(self, things): 

110 return 'happy days' 

111 ``` 

112 """ 

113 

114 # use wraps to preserve docstring 

115 @wraps(function) 

116 def get_cached(*args, **kwargs): 

117 """ 

118 Only execute the function if its value isn't stored 

119 in cache already. 

120 """ 

121 self = args[0] 

122 # use function name as key in cache 

123 name = function.__name__ 

124 # do the dump logic ourselves to avoid 

125 # verifying cache twice per call 

126 self._cache.verify() 

127 # access cache dict to avoid automatic validation 

128 # since we already called cache.verify manually 

129 if name in self._cache.cache: 

130 # already stored so return value 

131 return self._cache.cache[name] 

132 # value not in cache so execute the function 

133 value = function(*args, **kwargs) 

134 # store the value 

135 if ( 

136 self._cache.force_immutable 

137 and hasattr(value, "flags") 

138 and len(value.shape) > 0 

139 ): 

140 value.flags.writeable = False 

141 

142 self._cache.cache[name] = value 

143 

144 return value 

145 

146 # all cached values are also properties 

147 # so they can be accessed like value attributes 

148 # rather than functions 

149 return property(get_cached) 

150 

151 

152class TrackedArray(np.ndarray): 

153 """ 

154 Subclass of numpy.ndarray that provides hash methods 

155 to track changes. 

156 

157 General method is to aggressively set 'modified' flags 

158 on operations which might (but don't necessarily) alter 

159 the array, ideally we sometimes compute hashes when we 

160 don't need to, but we don't return wrong hashes ever. 

161 

162 We store boolean modified flag for each hash type to 

163 make checks fast even for queries of different hashes. 

164 

165 Methods 

166 ---------- 

167 __hash__ : int 

168 Runs the fastest available hash in this order: 

169 `xxh3_64, xxh_64, blake2b, sha256` 

170 """ 

171 

172 def __array_finalize__(self, obj): 

173 """ 

174 Sets a modified flag on every TrackedArray 

175 This flag will be set on every change as well as 

176 during copies and certain types of slicing. 

177 """ 

178 

179 self._dirty_hash = True 

180 if isinstance(obj, type(self)): 

181 obj._dirty_hash = True 

182 

183 def __array_wrap__(self, out_arr, context=None, *args, **kwargs): 

184 """ 

185 Return a numpy scalar if array is 0d. 

186 See https://github.com/numpy/numpy/issues/5819 

187 """ 

188 if out_arr.ndim: 

189 return np.ndarray.__array_wrap__(self, out_arr, context, *args, **kwargs) 

190 # Match numpy's behavior and return a numpy dtype scalar 

191 return out_arr[()] 

192 

193 @property 

194 def mutable(self): 

195 return self.flags["WRITEABLE"] 

196 

197 @mutable.setter 

198 def mutable(self, value): 

199 self.flags.writeable = value 

200 

201 def __hash__(self): 

202 """ 

203 Return a fast hash of the contents of the array. 

204 

205 Returns 

206 ------------- 

207 hash : long int 

208 A hash of the array contents. 

209 """ 

210 # repeat the bookkeeping to get a contiguous array 

211 if not self._dirty_hash and hasattr(self, "_hashed"): 

212 # we have a valid hash without recomputing. 

213 return self._hashed 

214 

215 # run a hashing function on the C-order bytes copy 

216 hashed = hash_fast(self.tobytes(order="C")) 

217 

218 # assign the value and set the flag 

219 self._hashed = hashed 

220 self._dirty_hash = False 

221 

222 return hashed 

223 

224 def __iadd__(self, *args, **kwargs): 

225 """ 

226 In-place addition. 

227 

228 The i* operations are in- place and modify the array, 

229 so we better catch all of them. 

230 """ 

231 

232 self._dirty_hash = True 

233 return super(self.__class__, self).__iadd__(*args, **kwargs) 

234 

235 def __isub__(self, *args, **kwargs): 

236 self._dirty_hash = True 

237 return super(self.__class__, self).__isub__(*args, **kwargs) 

238 

239 def fill(self, *args, **kwargs): 

240 self._dirty_hash = True 

241 return super(self.__class__, self).fill(*args, **kwargs) 

242 

243 def partition(self, *args, **kwargs): 

244 self._dirty_hash = True 

245 return super(self.__class__, self).partition(*args, **kwargs) 

246 

247 def put(self, *args, **kwargs): 

248 self._dirty_hash = True 

249 return super(self.__class__, self).put(*args, **kwargs) 

250 

251 def byteswap(self, *args, **kwargs): 

252 self._dirty_hash = True 

253 return super(self.__class__, self).byteswap(*args, **kwargs) 

254 

255 def itemset(self, *args, **kwargs): 

256 self._dirty_hash = True 

257 return super(self.__class__, self).itemset(*args, **kwargs) 

258 

259 def sort(self, *args, **kwargs): 

260 self._dirty_hash = True 

261 return super(self.__class__, self).sort(*args, **kwargs) 

262 

263 def setflags(self, *args, **kwargs): 

264 self._dirty_hash = True 

265 return super(self.__class__, self).setflags(*args, **kwargs) 

266 

267 def __imul__(self, *args, **kwargs): 

268 self._dirty_hash = True 

269 return super(self.__class__, self).__imul__(*args, **kwargs) 

270 

271 def __idiv__(self, *args, **kwargs): 

272 self._dirty_hash = True 

273 return super(self.__class__, self).__idiv__(*args, **kwargs) 

274 

275 def __itruediv__(self, *args, **kwargs): 

276 self._dirty_hash = True 

277 return super(self.__class__, self).__itruediv__(*args, **kwargs) 

278 

279 def __imatmul__(self, *args, **kwargs): 

280 self._dirty_hash = True 

281 return super(self.__class__, self).__imatmul__(*args, **kwargs) 

282 

283 def __ipow__(self, *args, **kwargs): 

284 self._dirty_hash = True 

285 return super(self.__class__, self).__ipow__(*args, **kwargs) 

286 

287 def __imod__(self, *args, **kwargs): 

288 self._dirty_hash = True 

289 return super(self.__class__, self).__imod__(*args, **kwargs) 

290 

291 def __ifloordiv__(self, *args, **kwargs): 

292 self._dirty_hash = True 

293 return super(self.__class__, self).__ifloordiv__(*args, **kwargs) 

294 

295 def __ilshift__(self, *args, **kwargs): 

296 self._dirty_hash = True 

297 return super(self.__class__, self).__ilshift__(*args, **kwargs) 

298 

299 def __irshift__(self, *args, **kwargs): 

300 self._dirty_hash = True 

301 return super(self.__class__, self).__irshift__(*args, **kwargs) 

302 

303 def __iand__(self, *args, **kwargs): 

304 self._dirty_hash = True 

305 return super(self.__class__, self).__iand__(*args, **kwargs) 

306 

307 def __ixor__(self, *args, **kwargs): 

308 self._dirty_hash = True 

309 return super(self.__class__, self).__ixor__(*args, **kwargs) 

310 

311 def __ior__(self, *args, **kwargs): 

312 self._dirty_hash = True 

313 return super(self.__class__, self).__ior__(*args, **kwargs) 

314 

315 def __setitem__(self, *args, **kwargs): 

316 self._dirty_hash = True 

317 return super(self.__class__, self).__setitem__(*args, **kwargs) 

318 

319 def __setslice__(self, *args, **kwargs): 

320 self._dirty_hash = True 

321 return super(self.__class__, self).__setslice__(*args, **kwargs) 

322 

323 

324class Cache: 

325 """ 

326 Class to cache values which will be stored until the 

327 result of an ID function changes. 

328 """ 

329 

330 def __init__(self, id_function, force_immutable=False): 

331 """ 

332 Create a cache object. 

333 

334 Parameters 

335 ------------ 

336 id_function : function 

337 Returns hashable value 

338 force_immutable : bool 

339 If set will make all numpy arrays read-only 

340 """ 

341 self._id_function = id_function 

342 # for stored numpy arrays set `flags.writable = False` 

343 self.force_immutable = bool(force_immutable) 

344 # call the id function for initial value 

345 self.id_current = None 

346 # a counter for locks 

347 self._lock = 0 

348 # actual store for data 

349 self.cache = {} 

350 

351 def delete(self, key): 

352 """ 

353 Remove a key from the cache. 

354 """ 

355 if key in self.cache: 

356 self.cache.pop(key, None) 

357 

358 def verify(self): 

359 """ 

360 Verify that the cached values are still for the same 

361 value of id_function and delete all stored items if 

362 the value of id_function has changed. 

363 """ 

364 # if we are in a lock don't check anything 

365 if self._lock != 0: 

366 return 

367 

368 # check the hash of our data 

369 id_new = self._id_function() 

370 

371 # things changed 

372 if id_new != self.id_current: 

373 if len(self.cache) > 0: 

374 log.debug( 

375 "%d items cleared from cache: %s", 

376 len(self.cache), 

377 str(list(self.cache.keys())), 

378 ) 

379 # hash changed, so dump the cache 

380 # do it manually rather than calling clear() 

381 # as we are internal logic and can avoid function calls 

382 self.cache = {} 

383 # set the id to the new data hash 

384 self.id_current = id_new 

385 

386 def clear(self, exclude=None): 

387 """ 

388 Remove elements in the cache. 

389 

390 Parameters 

391 ----------- 

392 exclude : list 

393 List of keys in cache to not clear. 

394 """ 

395 if exclude is None: 

396 self.cache = {} 

397 else: 

398 self.cache = {k: v for k, v in self.cache.items() if k in exclude} 

399 

400 def update(self, items): 

401 """ 

402 Update the cache with a set of key, value pairs without 

403 checking id_function. 

404 """ 

405 self.cache.update(items) 

406 

407 if self.force_immutable: 

408 for v in self.cache.values(): 

409 if hasattr(v, "flags") and len(v.shape) > 0: 

410 v.flags.writeable = False 

411 self.id_set() 

412 

413 def id_set(self): 

414 """ 

415 Set the current ID to the value of the ID function. 

416 """ 

417 self.id_current = self._id_function() 

418 

419 def __getitem__(self, key): 

420 """ 

421 Get an item from the cache. If the item 

422 is not in the cache, it will return None 

423 

424 Parameters 

425 ------------- 

426 key : hashable 

427 Key in dict 

428 

429 Returns 

430 ------------- 

431 cached : object, or None 

432 Object that was stored 

433 """ 

434 self.verify() 

435 if key in self.cache: 

436 return self.cache[key] 

437 return None 

438 

439 def __setitem__(self, key, value): 

440 """ 

441 Add an item to the cache. 

442 

443 Parameters 

444 ------------ 

445 key : hashable 

446 Key to reference value 

447 value : any 

448 Value to store in cache 

449 """ 

450 # dumpy cache if ID function has changed 

451 self.verify() 

452 # make numpy arrays read-only if asked to 

453 if self.force_immutable and hasattr(value, "flags") and len(value.shape) > 0: 

454 value.flags.writeable = False 

455 # assign data to dict 

456 self.cache[key] = value 

457 

458 return value 

459 

460 def __contains__(self, key): 

461 self.verify() 

462 return key in self.cache 

463 

464 def __len__(self): 

465 self.verify() 

466 return len(self.cache) 

467 

468 def __enter__(self): 

469 self._lock += 1 

470 

471 def __exit__(self, *args): 

472 self._lock -= 1 

473 self.id_current = self._id_function() 

474 

475 

476class DiskCache: 

477 """ 

478 Store results of expensive operations on disk 

479 with an option to expire the results. This is used 

480 to cache the multi-gigabyte test corpuses in 

481 `tests/corpus.py` 

482 """ 

483 

484 def __init__(self, path, expire_days=30): 

485 """ 

486 Create a cache on disk for storing expensive results. 

487 

488 Parameters 

489 -------------- 

490 path : str 

491 A writeable location on the current file path. 

492 expire_days : int or float 

493 How old should results be considered expired. 

494 

495 """ 

496 # store how old we allow results to be 

497 self.expire_days = expire_days 

498 # store the location for saving results 

499 self.path = os.path.abspath(os.path.expanduser(path)) 

500 # make sure the specified path exists 

501 os.makedirs(self.path, exist_ok=True) 

502 

503 def get(self, key, fetch): 

504 """ 

505 Get a key from the cache or run a calculation. 

506 

507 Parameters 

508 ----------- 

509 key : str 

510 Key to reference item with 

511 fetch : function 

512 If key isn't stored and recent run this 

513 function and store its result on disk. 

514 """ 

515 # hash the key so we have a fixed length string 

516 key_hash = _sha256(key.encode("utf-8")).hexdigest() 

517 # full path of result on local disk 

518 path = os.path.join(self.path, key_hash) 

519 

520 # check to see if we can use the cache 

521 if os.path.isfile(path): 

522 # compute the age of the existing file in days 

523 age_days = (time.time() - os.stat(path).st_mtime) / 86400.0 

524 if age_days < self.expire_days: 

525 # this nested condition means that 

526 # the file both exists and is recent 

527 # enough, so just return its contents 

528 with open(path, "rb") as f: 

529 return f.read() 

530 

531 log.debug(f"not in cache fetching: `{key}`") 

532 # since we made it here our data isn't cached 

533 # run the expensive function to fetch the file 

534 raw = fetch() 

535 # write the data so we can save it 

536 with open(path, "wb") as f: 

537 f.write(raw) 

538 

539 # return the data 

540 return raw 

541 

542 

543class DataStore(Mapping): 

544 """ 

545 A class to store multiple numpy arrays and track them all 

546 for changes. 

547 

548 Operates like a dict that only stores numpy.ndarray 

549 """ 

550 

551 def __init__(self): 

552 self.data = {} 

553 

554 def __iter__(self): 

555 return iter(self.data) 

556 

557 def pop(self, key): 

558 return self.data.pop(key, None) 

559 

560 def __delitem__(self, key): 

561 self.data.pop(key, None) 

562 

563 @property 

564 def mutable(self): 

565 """ 

566 Is data allowed to be altered or not. 

567 

568 Returns 

569 ----------- 

570 is_mutable : bool 

571 Can data be altered in the DataStore 

572 """ 

573 return getattr(self, "_mutable", True) 

574 

575 @mutable.setter 

576 def mutable(self, value): 

577 """ 

578 Is data allowed to be altered or not. 

579 

580 Parameters 

581 ------------ 

582 is_mutable : bool 

583 Should data be allowed to be altered 

584 """ 

585 # make sure passed value is a bool 

586 is_mutable = bool(value) 

587 # apply the flag to any data stored 

588 for v in self.data.values(): 

589 if isinstance(v, TrackedArray): 

590 v.mutable = value 

591 # save the mutable setting 

592 self._mutable = is_mutable 

593 

594 def is_empty(self): 

595 """ 

596 Is the current DataStore empty or not. 

597 

598 Returns 

599 ---------- 

600 empty : bool 

601 False if there are items in the DataStore 

602 """ 

603 if len(self.data) == 0: 

604 return True 

605 for v in self.data.values(): 

606 if is_sequence(v): 

607 if len(v) == 0: 

608 return True 

609 else: 

610 return False 

611 elif bool(np.isreal(v)): 

612 return False 

613 return True 

614 

615 def clear(self): 

616 """ 

617 Remove all data from the DataStore. 

618 """ 

619 self.data = {} 

620 

621 def __getitem__(self, key): 

622 return self.data[key] 

623 

624 def __setitem__(self, key, data): 

625 """ 

626 Store an item in the DataStore. 

627 

628 Parameters 

629 ------------- 

630 key 

631 A hashable key to store under 

632 data 

633 Usually a numpy array which will be subclassed 

634 but anything hashable should be able to be stored. 

635 """ 

636 # we shouldn't allow setting on immutable datastores 

637 if not self.mutable: 

638 raise ValueError("DataStore is configured immutable!") 

639 

640 if isinstance(data, TrackedArray): 

641 # don't bother to re-track TrackedArray 

642 tracked = data 

643 elif isinstance(data, (np.ndarray, list, set, tuple)): 

644 # wrap data if it is array-like 

645 tracked = tracked_array(data) 

646 else: 

647 try: 

648 # will raise if this is not a hashable type 

649 hash(data) 

650 except BaseException: 

651 raise ValueError(f"unhashable `{key}:{type(data)}`") 

652 tracked = data 

653 

654 # apply our mutability setting 

655 if hasattr(self, "_mutable"): 

656 # apply our mutability setting only if it was explicitly set 

657 tracked.mutable = self.mutable 

658 # store data 

659 self.data[key] = tracked 

660 

661 def __contains__(self, key): 

662 return key in self.data 

663 

664 def __len__(self): 

665 return len(self.data) 

666 

667 def update(self, values): 

668 if not isinstance(values, dict): 

669 raise ValueError("Update only implemented for dicts") 

670 for key, value in values.items(): 

671 self[key] = value 

672 

673 def __hash__(self): 

674 """ 

675 Get a hash reflecting everything in the DataStore. 

676 

677 Returns 

678 ---------- 

679 hash : str 

680 hash of data in hexadecimal 

681 """ 

682 # only hash values that aren't None 

683 # or if they are arrays require length greater than zero 

684 return hash_fast( 

685 np.array( 

686 [ 

687 hash(v) 

688 for v in self.data.values() 

689 if v is not None and (not hasattr(v, "__len__") or len(v) > 0) 

690 ], 

691 dtype=np.int64, 

692 ).tobytes() 

693 )