Coverage for trimesh/grouping.py: 94%

249 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-24 04:40 +0000

1""" 

2grouping.py 

3------------- 

4 

5Functions for grouping values and rows. 

6""" 

7 

8import numpy as np 

9 

10from . import util 

11from .constants import log, tol 

12from .typed import ArrayLike, Integer, NDArray, Number, Sequence 

13 

14try: 

15 from scipy.spatial import cKDTree 

16except BaseException as E: 

17 # wrapping just ImportError fails in some cases 

18 # will raise the error when someone tries to use KDtree 

19 from . import exceptions 

20 

21 cKDTree = exceptions.ExceptionWrapper(E) 

22 

23 

24def merge_vertices( 

25 mesh, 

26 merge_tex: bool | None = None, 

27 merge_norm: bool | None = None, 

28 digits_vertex: Integer | None = None, 

29 digits_norm: Integer | None = None, 

30 digits_uv: Integer | None = None, 

31): 

32 """ 

33 Removes duplicate vertices, grouped by position and 

34 optionally texture coordinate and normal. 

35 

36 Parameters 

37 ------------- 

38 mesh : Trimesh object 

39 Mesh to merge vertices on 

40 merge_tex : bool 

41 If True textured meshes with UV coordinates will 

42 have vertices merged regardless of UV coordinates 

43 merge_norm : bool 

44 If True, meshes with vertex normals will have 

45 vertices merged ignoring different normals 

46 digits_vertex : None or int 

47 Number of digits to consider for vertex position 

48 digits_norm : int 

49 Number of digits to consider for unit normals 

50 digits_uv : int 

51 Number of digits to consider for UV coordinates 

52 """ 

53 # no vertices so exit early 

54 if len(mesh.vertices) == 0: 

55 return 

56 if merge_tex is None: 

57 merge_tex = False 

58 if merge_norm is None: 

59 merge_norm = False 

60 if digits_norm is None: 

61 digits_norm = 2 

62 if digits_uv is None: 

63 digits_uv = 4 

64 if digits_vertex is None: 

65 # use tol.merge if digit precision not passed 

66 digits_vertex = util.decimal_to_digits(tol.merge) 

67 

68 # if we have a ton of unreferenced vertices it will 

69 # make the unique_rows call super slow so cull first 

70 if hasattr(mesh, "faces") and len(mesh.faces) > 0: 

71 referenced = np.zeros(len(mesh.vertices), dtype=bool) 

72 referenced[mesh.faces] = True 

73 else: 

74 # this is used for geometry without faces 

75 referenced = np.ones(len(mesh.vertices), dtype=bool) 

76 

77 # collect vertex attributes into sequence we can stack 

78 stacked = [mesh.vertices * (10**digits_vertex)] 

79 

80 # UV texture visuals require us to update the 

81 # vertices and normals differently 

82 if ( 

83 not merge_tex 

84 and mesh.visual.defined 

85 and mesh.visual.kind == "texture" 

86 and mesh.visual.uv is not None 

87 and len(mesh.visual.uv) == len(mesh.vertices) 

88 ): 

89 # get an array with vertices and UV coordinates 

90 # converted to integers at requested precision 

91 stacked.append(mesh.visual.uv * (10**digits_uv)) 

92 

93 # check to see if we have vertex normals 

94 normals = mesh._cache["vertex_normals"] 

95 if not merge_norm and np.shape(normals) == mesh.vertices.shape: 

96 stacked.append(normals * (10**digits_norm)) 

97 

98 # stack collected vertex properties and round to integer 

99 stacked = np.column_stack(stacked).round().astype(np.int64) 

100 

101 # check unique rows of referenced vertices 

102 u, i = unique_rows(stacked[referenced], keep_order=True) 

103 

104 # construct an inverse using the subset 

105 inverse = np.zeros(len(mesh.vertices), dtype=np.int64) 

106 inverse[referenced] = i 

107 # get the vertex mask 

108 mask = np.nonzero(referenced)[0][u] 

109 # run the update including normals and UV coordinates 

110 mesh.update_vertices(mask=mask, inverse=inverse) 

111 

112 

113def group(values, min_len: Integer | None = None, max_len: Integer | None = None): 

114 """ 

115 Return the indices of values that are identical 

116 

117 Parameters 

118 ---------- 

119 values : (n,) int 

120 Values to group 

121 min_len : int 

122 The shortest group allowed 

123 All groups will have len >= min_length 

124 max_len : int 

125 The longest group allowed 

126 All groups will have len <= max_length 

127 

128 Returns 

129 ---------- 

130 groups : sequence 

131 Contains indices to form groups 

132 IE [0,1,0,1] returns [[0,2], [1,3]] 

133 """ 

134 original = np.asanyarray(values) 

135 

136 # save the sorted order and then apply it 

137 order = original.argsort() 

138 values = original[order] 

139 

140 # find the indexes which are duplicates 

141 if values.dtype.kind == "f": 

142 # for floats in a sorted array, neighbors are not duplicates 

143 # if the difference between them is greater than approximate zero 

144 nondupe = np.greater(np.abs(np.diff(values)), tol.zero) 

145 else: 

146 # for ints and strings we can check exact non- equality 

147 # for all other types this will only work if they defined 

148 # an __eq__ 

149 nondupe = values[1:] != values[:-1] 

150 

151 dupe_idx = np.append(0, np.nonzero(nondupe)[0] + 1) 

152 

153 # start with a mask that marks everything as ok 

154 dupe_ok = np.ones(len(dupe_idx), dtype=bool) 

155 

156 # calculate the length of each group from their index 

157 dupe_len = np.diff(np.concatenate((dupe_idx, [len(values)]))) 

158 

159 # cull by length if requested 

160 if min_len is not None or max_len is not None: 

161 if min_len is not None: 

162 dupe_ok &= dupe_len >= min_len 

163 if max_len is not None: 

164 dupe_ok &= dupe_len <= max_len 

165 

166 groups = [order[i : (i + j)] for i, j in zip(dupe_idx[dupe_ok], dupe_len[dupe_ok])] 

167 return groups 

168 

169 

170def hashable_rows( 

171 data: ArrayLike, digits: Integer | None = None, allow_int: bool = True 

172) -> NDArray: 

173 """ 

174 We turn our array into integers based on the precision 

175 given by digits and then put them in a hashable format. 

176 

177 Parameters 

178 --------- 

179 data : (n, m) array 

180 Input data 

181 digits : int or None 

182 How many digits to add to hash if data is floating point 

183 If None, tol.merge will be used 

184 

185 Returns 

186 --------- 

187 hashable : (n,) 

188 May return as a `np.void` or a `np.uint64` 

189 """ 

190 # if there is no data return immediately 

191 if len(data) == 0: 

192 return np.array([], dtype=np.uint64) 

193 

194 # get array as integer to precision we care about 

195 as_int = float_to_int(data, digits=digits) 

196 

197 # if it is flat integers already return 

198 if len(as_int.shape) == 1: 

199 return as_int 

200 

201 # if array is 2D and smallish, we can try bitbanging 

202 # this is significantly faster than the custom dtype 

203 if allow_int and len(as_int.shape) == 2 and as_int.shape[1] <= 4: 

204 # can we pack the whole row into a single 64 bit integer 

205 precision = int(np.floor(64 / as_int.shape[1])) 

206 

207 # get the extreme values of the data set 

208 d_min, d_max = as_int.min(), as_int.max() 

209 # since we are quantizing the data down we need every value 

210 # to fit in a partial integer so we have to check against extrema 

211 threshold = (2 ** (precision - 1)) - 1 

212 

213 # if the data is within the range of our precision threshold 

214 if d_max < threshold and d_min > -threshold: 

215 # the resulting package 

216 hashable = np.zeros(len(as_int), dtype=np.uint64) 

217 # offset to the middle of the unsigned integer range 

218 # this array should contain only positive values 

219 bitbang = (as_int.T + (threshold + 1)).astype(np.uint64) 

220 # loop through each column and bitwise xor to combine 

221 # make sure as_int is int64 otherwise bit offset won't work 

222 for offset, column in enumerate(bitbang): 

223 # will modify hashable in place 

224 np.bitwise_xor(hashable, column << (offset * precision), out=hashable) 

225 return hashable 

226 

227 # reshape array into magical data type that is weird but works with unique 

228 dtype = np.dtype((np.void, as_int.dtype.itemsize * as_int.shape[1])) 

229 # make sure result is contiguous and flat 

230 result = np.ascontiguousarray(as_int).view(dtype).reshape(-1) 

231 result.flags["WRITEABLE"] = False 

232 

233 return result 

234 

235 

236def float_to_int(data, digits: Integer | None = None) -> NDArray[np.int64]: 

237 """ 

238 Given a numpy array of float/bool/int, return as integers. 

239 

240 Parameters 

241 ------------- 

242 data : (n, d) float, int, or bool 

243 Input data 

244 digits : float or int 

245 Precision for float conversion 

246 

247 Returns 

248 ------------- 

249 as_int : (n, d) int 

250 Data as integers 

251 """ 

252 # convert to any numpy array 

253 data = np.asanyarray(data) 

254 

255 # we can early-exit if we've been passed data that is already 

256 # an integer, unsigned integer, boolean, or empty 

257 if data.dtype == np.int64: 

258 return data 

259 elif data.dtype.kind in "iub" or data.size == 0: 

260 return data.astype(np.int64) 

261 elif data.dtype.kind != "f": 

262 # if it's not a floating point try to make it one 

263 data = data.astype(np.float64) 

264 

265 if digits is None: 

266 # get digits from `tol.merge` 

267 digits = util.decimal_to_digits(tol.merge) 

268 elif not isinstance(digits, (int, np.integer)): 

269 raise TypeError(f"Digits must be `None` or `int`, not `{type(digits)}`") 

270 

271 # multiply by requested power of ten 

272 # then subtract small epsilon to avoid "go either way" rounding 

273 # then do the rounding and convert to integer 

274 return np.round((data * 10**digits) - 1e-6).astype(np.int64) 

275 

276 

277def unique_ordered( 

278 data: ArrayLike, return_index: bool = False, return_inverse: bool = False 

279): 

280 """ 

281 Returns the same as np.unique, but ordered as per the 

282 first occurrence of the unique value in data. 

283 

284 Examples 

285 --------- 

286 In [1]: a = [0, 3, 3, 4, 1, 3, 0, 3, 2, 1] 

287 

288 In [2]: np.unique(a) 

289 Out[2]: array([0, 1, 2, 3, 4]) 

290 

291 In [3]: trimesh.grouping.unique_ordered(a) 

292 Out[3]: array([0, 3, 4, 1, 2]) 

293 """ 

294 # uniques are the values, sorted 

295 # index is the value in the original `data` 

296 # i.e. `data[index] == unique` 

297 # inverse is how to re-construct `data` from `unique` 

298 # i.e. `unique[inverse] == data` 

299 unique, index, inverse = np.unique(data, return_index=True, return_inverse=True) 

300 

301 # we want to maintain the original index order 

302 order = index.argsort() 

303 

304 if not return_index and not return_inverse: 

305 return unique[order] 

306 

307 # collect return values 

308 # start with the unique values in original order 

309 result = [unique[order]] 

310 # the new index values 

311 if return_index: 

312 # re-order the index in the original array 

313 result.append(index[order]) 

314 if return_inverse: 

315 # create the new inverse from the order of the order 

316 result.append(order.argsort()[inverse]) 

317 

318 return result 

319 

320 

321def unique_bincount( 

322 values: ArrayLike, 

323 minlength: Integer = 0, 

324 return_inverse: bool = False, 

325 return_counts: bool = False, 

326): 

327 """ 

328 For arrays of integers find unique values using bin counting. 

329 Roughly 10x faster for correct input than np.unique 

330 

331 Parameters 

332 -------------- 

333 values : (n,) int 

334 Values to find unique members of 

335 minlength : int 

336 Maximum value that will occur in values (values.max()) 

337 return_inverse : bool 

338 If True, return an inverse such that unique[inverse] == values 

339 return_counts : bool 

340 If True, also return the number of times each 

341 unique item appears in values 

342 

343 Returns 

344 ------------ 

345 unique : (m,) int 

346 Unique values in original array 

347 inverse : (n,) int, optional 

348 An array such that unique[inverse] == values 

349 Only returned if return_inverse is True 

350 counts : (m,) int, optional 

351 An array holding the counts of each unique item in values 

352 Only returned if return_counts is True 

353 """ 

354 values = np.asanyarray(values) 

355 if len(values.shape) != 1 or values.dtype.kind != "i": 

356 raise ValueError("input must be 1D integers!") 

357 

358 try: 

359 # count the number of occurrences of each value 

360 counts = np.bincount(values, minlength=minlength) 

361 except TypeError: 

362 # casting failed on 32 bit windows 

363 log.warning("casting failed, falling back!") 

364 # fall back to numpy unique 

365 return np.unique( 

366 values, return_inverse=return_inverse, return_counts=return_counts 

367 ) 

368 

369 # which bins are occupied at all 

370 # counts are integers so this works 

371 unique_bin = counts.astype(bool) 

372 

373 # which values are unique 

374 # indexes correspond to original values 

375 unique = np.where(unique_bin)[0] 

376 ret = (unique,) 

377 

378 if return_inverse: 

379 # find the inverse to reconstruct original 

380 inverse = (np.cumsum(unique_bin) - 1)[values] 

381 ret += (inverse,) 

382 

383 if return_counts: 

384 unique_counts = counts[unique] 

385 ret += (unique_counts,) 

386 

387 if len(ret) == 1: 

388 return ret[0] 

389 return ret 

390 

391 

392def merge_runs(data: ArrayLike, digits: Integer | None = None): 

393 """ 

394 Merge duplicate sequential values. This differs from unique_ordered 

395 in that values can occur in multiple places in the sequence, but 

396 only consecutive repeats are removed 

397 

398 Parameters 

399 ----------- 

400 data: (n,) float or int 

401 

402 Returns 

403 -------- 

404 merged: (m,) float or int 

405 

406 Examples 

407 --------- 

408 In [1]: a 

409 Out[1]: 

410 array([-1, -1, -1, 0, 0, 1, 1, 2, 0, 

411 3, 3, 4, 4, 5, 5, 6, 6, 7, 

412 7, 8, 8, 9, 9, 9]) 

413 

414 In [2]: trimesh.grouping.merge_runs(a) 

415 Out[2]: array([-1, 0, 1, 2, 0, 3, 4, 5, 6, 7, 8, 9]) 

416 """ 

417 if digits is None: 

418 epsilon = tol.merge 

419 else: 

420 epsilon = 10 ** (-digits) 

421 

422 data = np.asanyarray(data) 

423 mask = np.zeros(len(data), dtype=bool) 

424 mask[0] = True 

425 mask[1:] = np.abs(data[1:] - data[:-1]) > epsilon 

426 

427 return data[mask] 

428 

429 

430def unique_float( 

431 data, 

432 return_index: bool = False, 

433 return_inverse: bool = False, 

434 digits: Integer | None = None, 

435): 

436 """ 

437 Identical to the numpy.unique command, except evaluates floating point 

438 numbers, using a specified number of digits. 

439 

440 If digits isn't specified, the library default TOL_MERGE will be used. 

441 """ 

442 data = np.asanyarray(data) 

443 as_int = float_to_int(data, digits) 

444 _junk, unique, inverse = np.unique(as_int, return_index=True, return_inverse=True) 

445 

446 if (not return_index) and (not return_inverse): 

447 return data[unique] 

448 

449 result = [data[unique]] 

450 

451 if return_index: 

452 result.append(unique) 

453 if return_inverse: 

454 result.append(inverse) 

455 return tuple(result) 

456 

457 

458def unique_rows(data, digits=None, keep_order=False): 

459 """ 

460 Returns indices of unique rows. It will return the 

461 first occurrence of a row that is duplicated: 

462 [[1,2], [3,4], [1,2]] will return [0,1] 

463 

464 Parameters 

465 --------- 

466 data : (n, m) array 

467 Floating point data 

468 digits : int or None 

469 How many digits to consider 

470 

471 Returns 

472 -------- 

473 unique : (j,) int 

474 Index in data which is a unique row 

475 inverse : (n,) int 

476 Array to reconstruct original 

477 Example: data[unique][inverse] == data 

478 """ 

479 # get rows hashable so we can run unique function on it 

480 rows = hashable_rows(data, digits=digits) 

481 

482 # we are throwing away the first value which is the 

483 # garbage row-hash and only returning index and inverse 

484 if keep_order: 

485 # keeps order of original occurrence 

486 return unique_ordered(rows, return_index=True, return_inverse=True)[1:] 

487 # returns values sorted by row-hash but since our row-hash 

488 # were pretty much garbage the sort order isn't meaningful 

489 return np.unique(rows, return_index=True, return_inverse=True)[1:] 

490 

491 

492def unique_value_in_row(data, unique=None): 

493 """ 

494 For a 2D array of integers find the position of a 

495 value in each row which only occurs once. 

496 

497 If there are more than one value per row which 

498 occur once, the last one is returned. 

499 

500 Parameters 

501 ---------- 

502 data : (n, d) int 

503 Data to check values 

504 unique : (m,) int 

505 List of unique values contained in data. 

506 Generated from np.unique if not passed 

507 

508 Returns 

509 --------- 

510 result : (n, d) bool 

511 With one or zero True values per row. 

512 

513 

514 Examples 

515 ------------------------------------- 

516 In [0]: r = np.array([[-1, 1, 1], 

517 [-1, 1, -1], 

518 [-1, 1, 1], 

519 [-1, 1, -1], 

520 [-1, 1, -1]], dtype=np.int8) 

521 

522 In [1]: unique_value_in_row(r) 

523 Out[1]: 

524 array([[ True, False, False], 

525 [False, True, False], 

526 [ True, False, False], 

527 [False, True, False], 

528 [False, True, False]], dtype=bool) 

529 

530 In [2]: unique_value_in_row(r).sum(axis=1) 

531 Out[2]: array([1, 1, 1, 1, 1]) 

532 

533 In [3]: r[unique_value_in_row(r)] 

534 Out[3]: array([-1, 1, -1, 1, 1], dtype=int8) 

535 """ 

536 if unique is None: 

537 unique = np.unique(data) 

538 data = np.asanyarray(data) 

539 result = np.zeros_like(data, dtype=bool, subok=False) 

540 for value in unique: 

541 test = np.equal(data, value) 

542 test_ok = test.sum(axis=1) == 1 

543 result[test_ok] = test[test_ok] 

544 return result 

545 

546 

547def group_rows(data, require_count=None, digits=None): 

548 """ 

549 Returns index groups of duplicate rows, for example: 

550 [[1,2], [3,4], [1,2]] will return [[0,2], [1]] 

551 

552 

553 Note that using require_count allows numpy advanced 

554 indexing to be used in place of looping and 

555 checking hashes and is ~10x faster. 

556 

557 

558 Parameters 

559 ---------- 

560 data : (n, m) array 

561 Data to group 

562 require_count : None or int 

563 Only return groups of a specified length, eg: 

564 require_count = 2 

565 [[1,2], [3,4], [1,2]] will return [[0,2]] 

566 digits : None or int 

567 If data is floating point how many decimals 

568 to consider, or calculated from tol.merge 

569 

570 Returns 

571 ---------- 

572 groups : sequence (*,) int 

573 Indices from in indicating identical rows. 

574 """ 

575 

576 # start with getting a sortable format 

577 hashable = hashable_rows(data, digits=digits) 

578 

579 # if there isn't a constant column size use more complex logic 

580 if require_count is None: 

581 return group(hashable) 

582 

583 # record the order of the rows so we can get the original indices back 

584 order = hashable.argsort() 

585 # but for now, we want our hashes sorted 

586 hashable = hashable[order] 

587 # this is checking each neighbour for equality, example: 

588 # example: hashable = [1, 1, 1]; dupe = [0, 0] 

589 dupe = hashable[1:] != hashable[:-1] 

590 # we want the first index of a group, so we can slice from that location 

591 # example: hashable = [0 1 1]; dupe = [1,0]; dupe_idx = [0,1] 

592 dupe_idx = np.append(0, np.nonzero(dupe)[0] + 1) 

593 # if you wanted to use this one function to deal with non- regular groups 

594 # you could use: np.array_split(dupe_idx) 

595 # this is roughly 3x slower than using the group_dict method above. 

596 start_ok = np.diff(np.concatenate((dupe_idx, [len(hashable)]))) == require_count 

597 groups = np.tile(dupe_idx[start_ok].reshape((-1, 1)), require_count) + np.arange( 

598 require_count 

599 ) 

600 groups_idx = order[groups] 

601 

602 if require_count == 1: 

603 return groups_idx.reshape(-1) 

604 return groups_idx 

605 

606 

607def boolean_rows( 

608 a: ArrayLike, b: ArrayLike, operation=np.intersect1d 

609) -> NDArray[np.int64]: 

610 """ 

611 Find the rows in two arrays which occur in both rows. 

612 

613 Parameters 

614 --------- 

615 a: (n, d) int 

616 Array with row vectors 

617 b: (m, d) int 

618 Array with row vectors 

619 operation : function 

620 Numpy boolean set operation function: 

621 -np.intersect1d 

622 -np.setdiff1d 

623 

624 Returns 

625 -------- 

626 shared : (p, d) int64 

627 Array containing requested rows in both a and b 

628 """ 

629 a = np.asanyarray(a, dtype=np.int64) 

630 b = np.asanyarray(b, dtype=np.int64) 

631 

632 av = a.view([("", a.dtype)] * a.shape[1]).ravel() 

633 bv = b.view([("", b.dtype)] * b.shape[1]).ravel() 

634 return operation(av, bv).view(a.dtype).reshape(-1, a.shape[1]) 

635 

636 

637def group_vectors(vectors, angle=1e-4, include_negative=False): 

638 """ 

639 Group vectors based on an angle tolerance, with the option to 

640 include negative vectors. 

641 

642 Parameters 

643 ----------- 

644 vectors : (n,3) float 

645 Direction vector 

646 angle : float 

647 Group vectors closer than this angle in radians 

648 include_negative : bool 

649 If True consider the same: 

650 [0,0,1] and [0,0,-1] 

651 

652 Returns 

653 ------------ 

654 new_vectors : (m,3) float 

655 Direction vector 

656 groups : (m,) sequence of int 

657 Indices of source vectors 

658 """ 

659 

660 vectors = np.asanyarray(vectors, dtype=np.float64) 

661 angle = float(angle) 

662 

663 if include_negative: 

664 vectors = util.vector_hemisphere(vectors) 

665 

666 spherical = util.vector_to_spherical(vectors) 

667 angles, groups = group_distance(spherical, angle) 

668 new_vectors = util.spherical_to_vector(angles) 

669 return new_vectors, groups 

670 

671 

672def group_distance( 

673 values: ArrayLike, distance: Number 

674) -> tuple[NDArray[np.float64], Sequence]: 

675 """ 

676 Find non-overlapping groups of points where no two points in a 

677 group are farther than 2*distance apart. 

678 

679 Parameters 

680 --------- 

681 values : (n, d) float 

682 Points of dimension d 

683 distance : float 

684 Max distance between points in a cluster 

685 

686 Returns 

687 ---------- 

688 unique : (m, d) float 

689 Median value of each group 

690 groups : (m) sequence of int 

691 Indexes of points that make up a group 

692 

693 """ 

694 values = np.asanyarray(values, dtype=np.float64) 

695 

696 consumed = np.zeros(len(values), dtype=bool) 

697 tree = cKDTree(values) 

698 

699 # (n, d) set of values that are unique 

700 unique = [] 

701 # (n) sequence of indices in values 

702 groups = [] 

703 

704 for index, value in enumerate(values): 

705 if consumed[index]: 

706 continue 

707 group = np.array(tree.query_ball_point(value, distance), dtype=np.int64) 

708 group = group[~consumed[group]] 

709 consumed[group] = True 

710 unique.append(np.median(values[group], axis=0)) 

711 groups.append(group) 

712 return np.array(unique), groups 

713 

714 

715def clusters(points, radius): 

716 """ 

717 Find clusters of points which have neighbours closer than radius 

718 

719 Parameters 

720 --------- 

721 points : (n, d) float 

722 Points of dimension d 

723 radius : float 

724 Max distance between points in a cluster 

725 

726 Returns 

727 ---------- 

728 groups : (m,) sequence of int 

729 Indices of points in a cluster 

730 

731 """ 

732 from . import graph 

733 

734 tree = cKDTree(points) 

735 

736 # some versions return pairs as a set of tuples 

737 pairs = tree.query_pairs(r=radius, output_type="ndarray") 

738 # group connected components 

739 groups = graph.connected_components(pairs) 

740 

741 return groups 

742 

743 

744def blocks(data, min_len=2, max_len=np.inf, wrap=False, digits=None, only_nonzero=False): 

745 """ 

746 Find the indices in an array of contiguous blocks 

747 of equal values. 

748 

749 Parameters 

750 ------------ 

751 data : (n,) array 

752 Data to find blocks on 

753 min_len : int 

754 The minimum length group to be returned 

755 max_len : int 

756 The maximum length group to be retuurned 

757 wrap : bool 

758 Combine blocks on both ends of 1D array 

759 digits : None or int 

760 If dealing with floats how many digits to consider 

761 only_nonzero : bool 

762 Only return blocks of non- zero values 

763 

764 Returns 

765 --------- 

766 blocks : (m) sequence of (*,) int 

767 Indices referencing data 

768 """ 

769 data = float_to_int(data, digits=digits) 

770 

771 # keep an integer range around so we can slice 

772 arange = np.arange(len(data)) 

773 arange.flags["WRITEABLE"] = False 

774 

775 nonzero = arange[1:][data[1:] != data[:-1]] 

776 infl = np.zeros(len(nonzero) + 2, dtype=int) 

777 infl[-1] = len(data) 

778 infl[1:-1] = nonzero 

779 

780 # the length of each chunk 

781 infl_len = infl[1:] - infl[:-1] 

782 

783 # check the length of each group 

784 infl_ok = np.logical_and(infl_len >= min_len, infl_len <= max_len) 

785 

786 if only_nonzero: 

787 # check to make sure the values of each contiguous block 

788 # are True by checking the first value of each block 

789 infl_ok = np.logical_and(infl_ok, data[infl[:-1]]) 

790 

791 # inflate start/end indexes into full ranges of values 

792 blocks = [arange[infl[i] : infl[i + 1]] for i, ok in enumerate(infl_ok) if ok] 

793 

794 if wrap: 

795 # wrap only matters if first and last points are the same 

796 if data[0] != data[-1]: 

797 return blocks 

798 # if we are only grouping nonzero things and 

799 # the first and last point are zero we can exit 

800 if only_nonzero and not bool(data[0]): 

801 return blocks 

802 

803 # if all values are True or False we can exit 

804 if len(blocks) == 1 and len(blocks[0]) == len(data): 

805 return blocks 

806 

807 # so now first point equals last point, so the cases are: 

808 # - first and last point are in a block: combine two blocks 

809 # - first OR last point are in block: add other point to block 

810 # - neither are in a block: check if combined is eligible block 

811 

812 # first point is in a block 

813 first = len(blocks) > 0 and blocks[0][0] == 0 

814 # last point is in a block 

815 last = len(blocks) > 0 and blocks[-1][-1] == (len(data) - 1) 

816 

817 # CASE: first and last point are BOTH in block: combine blocks 

818 if first and last: 

819 blocks[0] = np.append(blocks[-1], blocks[0]) 

820 blocks.pop() 

821 else: 

822 # combined length 

823 combined = infl_len[0] + infl_len[-1] 

824 # exit if lengths aren't OK 

825 if combined < min_len or combined > max_len: 

826 return blocks 

827 # new block combines both ends 

828 new_block = np.append( 

829 np.arange(infl[-2], infl[-1]), np.arange(infl[0], infl[1]) 

830 ) 

831 # we are in a first OR last situation now 

832 if first: 

833 # first was already in a block so replace it with combined 

834 blocks[0] = new_block 

835 elif last: 

836 # last was already in a block so replace with superset 

837 blocks[-1] = new_block 

838 else: 

839 # both are false 

840 # combined length generated new block 

841 blocks.append(new_block) 

842 

843 return blocks 

844 

845 

846def group_min(groups, data): 

847 """ 

848 Given a list of groups find the minimum element of data 

849 within each group 

850 

851 Parameters 

852 ----------- 

853 groups : (n,) sequence of (q,) int 

854 Indexes of each group corresponding to each element in data 

855 data : (m,) 

856 The data that groups indexes reference 

857 

858 Returns 

859 ----------- 

860 minimums : (n,) 

861 Minimum value of data per group 

862 

863 """ 

864 # sort with major key groups, minor key data 

865 order = np.lexsort((data, groups)) 

866 groups = groups[order] # this is only needed if groups is unsorted 

867 data = data[order] 

868 # construct an index which marks borders between groups 

869 index = np.zeros(len(groups), "bool") 

870 index[0] = True 

871 index[1:] = groups[1:] != groups[:-1] 

872 return data[index]