Coverage for trimesh/grouping.py: 94%

1"""

2grouping.py

3-------------

5Functions for grouping values and rows.

6"""

8import numpy as np

10from . import util

11from .constants import log, tol

12from .typed import ArrayLike, Integer, NDArray, Number, Sequence

14try:

15 from scipy.spatial import cKDTree

16except BaseException as E:

17 # wrapping just ImportError fails in some cases

18 # will raise the error when someone tries to use KDtree

19 from . import exceptions

21 cKDTree = exceptions.ExceptionWrapper(E)

24def merge_vertices(

25 mesh,

26 merge_tex: bool | None = None,

27 merge_norm: bool | None = None,

28 digits_vertex: Integer | None = None,

29 digits_norm: Integer | None = None,

30 digits_uv: Integer | None = None,

31):

32 """

33 Removes duplicate vertices, grouped by position and

34 optionally texture coordinate and normal.

36 Parameters

37 -------------

38 mesh : Trimesh object

39 Mesh to merge vertices on

40 merge_tex : bool

41 If True textured meshes with UV coordinates will

42 have vertices merged regardless of UV coordinates

43 merge_norm : bool

44 If True, meshes with vertex normals will have

45 vertices merged ignoring different normals

46 digits_vertex : None or int

47 Number of digits to consider for vertex position

48 digits_norm : int

49 Number of digits to consider for unit normals

50 digits_uv : int

51 Number of digits to consider for UV coordinates

52 """

53 # no vertices so exit early

54 if len(mesh.vertices) == 0:

55 return

56 if merge_tex is None:

57 merge_tex = False

58 if merge_norm is None:

59 merge_norm = False

60 if digits_norm is None:

61 digits_norm = 2

62 if digits_uv is None:

63 digits_uv = 4

64 if digits_vertex is None:

65 # use tol.merge if digit precision not passed

66 digits_vertex = util.decimal_to_digits(tol.merge)

68 # if we have a ton of unreferenced vertices it will

69 # make the unique_rows call super slow so cull first

70 if hasattr(mesh, "faces") and len(mesh.faces) > 0:

71 referenced = np.zeros(len(mesh.vertices), dtype=bool)

72 referenced[mesh.faces] = True

73 else:

74 # this is used for geometry without faces

75 referenced = np.ones(len(mesh.vertices), dtype=bool)

77 # collect vertex attributes into sequence we can stack

78 stacked = [mesh.vertices * (10**digits_vertex)]

80 # UV texture visuals require us to update the

81 # vertices and normals differently

82 if (

83 not merge_tex

84 and mesh.visual.defined

85 and mesh.visual.kind == "texture"

86 and mesh.visual.uv is not None

87 and len(mesh.visual.uv) == len(mesh.vertices)

88 ):

89 # get an array with vertices and UV coordinates

90 # converted to integers at requested precision

91 stacked.append(mesh.visual.uv * (10**digits_uv))

93 # check to see if we have vertex normals

94 normals = mesh._cache["vertex_normals"]

95 if not merge_norm and np.shape(normals) == mesh.vertices.shape:

96 stacked.append(normals * (10**digits_norm))

98 # stack collected vertex properties and round to integer

99 stacked = np.column_stack(stacked).round().astype(np.int64)

100

101 # check unique rows of referenced vertices

102 u, i = unique_rows(stacked[referenced], keep_order=True)

103

104 # construct an inverse using the subset

105 inverse = np.zeros(len(mesh.vertices), dtype=np.int64)

106 inverse[referenced] = i

107 # get the vertex mask

108 mask = np.nonzero(referenced)[0][u]

109 # run the update including normals and UV coordinates

110 mesh.update_vertices(mask=mask, inverse=inverse)

111

112

113def group(values, min_len: Integer | None = None, max_len: Integer | None = None):

114 """

115 Return the indices of values that are identical

116

117 Parameters

118 ----------

119 values : (n,) int

120 Values to group

121 min_len : int

122 The shortest group allowed

123 All groups will have len >= min_length

124 max_len : int

125 The longest group allowed

126 All groups will have len <= max_length

127

128 Returns

129 ----------

130 groups : sequence

131 Contains indices to form groups

132 IE [0,1,0,1] returns [[0,2], [1,3]]

133 """

134 original = np.asanyarray(values)

135

136 # save the sorted order and then apply it

137 order = original.argsort()

138 values = original[order]

139

140 # find the indexes which are duplicates

141 if values.dtype.kind == "f":

142 # for floats in a sorted array, neighbors are not duplicates

143 # if the difference between them is greater than approximate zero

144 nondupe = np.greater(np.abs(np.diff(values)), tol.zero)

145 else:

146 # for ints and strings we can check exact non- equality

147 # for all other types this will only work if they defined

148 # an __eq__

149 nondupe = values[1:] != values[:-1]

150

151 dupe_idx = np.append(0, np.nonzero(nondupe)[0] + 1)

152

153 # start with a mask that marks everything as ok

154 dupe_ok = np.ones(len(dupe_idx), dtype=bool)

155

156 # calculate the length of each group from their index

157 dupe_len = np.diff(np.concatenate((dupe_idx, [len(values)])))

158

159 # cull by length if requested

160 if min_len is not None or max_len is not None:

161 if min_len is not None:

162 dupe_ok &= dupe_len >= min_len

163 if max_len is not None:

164 dupe_ok &= dupe_len <= max_len

165

166 groups = [order[i : (i + j)] for i, j in zip(dupe_idx[dupe_ok], dupe_len[dupe_ok])]

167 return groups

168

169

170def hashable_rows(

171 data: ArrayLike, digits: Integer | None = None, allow_int: bool = True

172) -> NDArray:

173 """

174 We turn our array into integers based on the precision

175 given by digits and then put them in a hashable format.

176

177 Parameters

178 ---------

179 data : (n, m) array

180 Input data

181 digits : int or None

182 How many digits to add to hash if data is floating point

183 If None, tol.merge will be used

184

185 Returns

186 ---------

187 hashable : (n,)

188 May return as a `np.void` or a `np.uint64`

189 """

190 # if there is no data return immediately

191 if len(data) == 0:

192 return np.array([], dtype=np.uint64)

193

194 # get array as integer to precision we care about

195 as_int = float_to_int(data, digits=digits)

196

197 # if it is flat integers already return

198 if len(as_int.shape) == 1:

199 return as_int

200

201 # if array is 2D and smallish, we can try bitbanging

202 # this is significantly faster than the custom dtype

203 if allow_int and len(as_int.shape) == 2 and as_int.shape[1] <= 4:

204 # can we pack the whole row into a single 64 bit integer

205 precision = int(np.floor(64 / as_int.shape[1]))

206

207 # get the extreme values of the data set

208 d_min, d_max = as_int.min(), as_int.max()

209 # since we are quantizing the data down we need every value

210 # to fit in a partial integer so we have to check against extrema

211 threshold = (2 ** (precision - 1)) - 1

212

213 # if the data is within the range of our precision threshold

214 if d_max < threshold and d_min > -threshold:

215 # the resulting package

216 hashable = np.zeros(len(as_int), dtype=np.uint64)

217 # offset to the middle of the unsigned integer range

218 # this array should contain only positive values

219 bitbang = (as_int.T + (threshold + 1)).astype(np.uint64)

220 # loop through each column and bitwise xor to combine

221 # make sure as_int is int64 otherwise bit offset won't work

222 for offset, column in enumerate(bitbang):

223 # will modify hashable in place

224 np.bitwise_xor(hashable, column << (offset * precision), out=hashable)

225 return hashable

226

227 # reshape array into magical data type that is weird but works with unique

228 dtype = np.dtype((np.void, as_int.dtype.itemsize * as_int.shape[1]))

229 # make sure result is contiguous and flat

230 result = np.ascontiguousarray(as_int).view(dtype).reshape(-1)

231 result.flags["WRITEABLE"] = False

232

233 return result

234

235

236def float_to_int(data, digits: Integer | None = None) -> NDArray[np.int64]:

237 """

238 Given a numpy array of float/bool/int, return as integers.

239

240 Parameters

241 -------------

242 data : (n, d) float, int, or bool

243 Input data

244 digits : float or int

245 Precision for float conversion

246

247 Returns

248 -------------

249 as_int : (n, d) int

250 Data as integers

251 """

252 # convert to any numpy array

253 data = np.asanyarray(data)

254

255 # we can early-exit if we've been passed data that is already

256 # an integer, unsigned integer, boolean, or empty

257 if data.dtype == np.int64:

258 return data

259 elif data.dtype.kind in "iub" or data.size == 0:

260 return data.astype(np.int64)

261 elif data.dtype.kind != "f":

262 # if it's not a floating point try to make it one

263 data = data.astype(np.float64)

264

265 if digits is None:

266 # get digits from `tol.merge`

267 digits = util.decimal_to_digits(tol.merge)

268 elif not isinstance(digits, (int, np.integer)):

269 raise TypeError(f"Digits must be `None` or `int`, not `{type(digits)}`")

270

271 # multiply by requested power of ten

272 # then subtract small epsilon to avoid "go either way" rounding

273 # then do the rounding and convert to integer

274 return np.round((data * 10**digits) - 1e-6).astype(np.int64)

275

276

277def unique_ordered(

278 data: ArrayLike, return_index: bool = False, return_inverse: bool = False

279):

280 """

281 Returns the same as np.unique, but ordered as per the

282 first occurrence of the unique value in data.

283

284 Examples

285 ---------

286 In [1]: a = [0, 3, 3, 4, 1, 3, 0, 3, 2, 1]

287

288 In [2]: np.unique(a)

289 Out[2]: array([0, 1, 2, 3, 4])

290

291 In [3]: trimesh.grouping.unique_ordered(a)

292 Out[3]: array([0, 3, 4, 1, 2])

293 """

294 # uniques are the values, sorted

295 # index is the value in the original `data`

296 # i.e. `data[index] == unique`

297 # inverse is how to re-construct `data` from `unique`

298 # i.e. `unique[inverse] == data`

299 unique, index, inverse = np.unique(data, return_index=True, return_inverse=True)

300

301 # we want to maintain the original index order

302 order = index.argsort()

303

304 if not return_index and not return_inverse:

305 return unique[order]

306

307 # collect return values

308 # start with the unique values in original order

309 result = [unique[order]]

310 # the new index values

311 if return_index:

312 # re-order the index in the original array

313 result.append(index[order])

314 if return_inverse:

315 # create the new inverse from the order of the order

316 result.append(order.argsort()[inverse])

317

318 return result

319

320

321def unique_bincount(

322 values: ArrayLike,

323 minlength: Integer = 0,

324 return_inverse: bool = False,

325 return_counts: bool = False,

326):

327 """

328 For arrays of integers find unique values using bin counting.

329 Roughly 10x faster for correct input than np.unique

330

331 Parameters

332 --------------

333 values : (n,) int

334 Values to find unique members of

335 minlength : int

336 Maximum value that will occur in values (values.max())

337 return_inverse : bool

338 If True, return an inverse such that unique[inverse] == values

339 return_counts : bool

340 If True, also return the number of times each

341 unique item appears in values

342

343 Returns

344 ------------

345 unique : (m,) int

346 Unique values in original array

347 inverse : (n,) int, optional

348 An array such that unique[inverse] == values

349 Only returned if return_inverse is True

350 counts : (m,) int, optional

351 An array holding the counts of each unique item in values

352 Only returned if return_counts is True

353 """

354 values = np.asanyarray(values)

355 if len(values.shape) != 1 or values.dtype.kind != "i":

356 raise ValueError("input must be 1D integers!")

357

358 try:

359 # count the number of occurrences of each value

360 counts = np.bincount(values, minlength=minlength)

361 except TypeError:

362 # casting failed on 32 bit windows

363 log.warning("casting failed, falling back!")

364 # fall back to numpy unique

365 return np.unique(

366 values, return_inverse=return_inverse, return_counts=return_counts

367 )

368

369 # which bins are occupied at all

370 # counts are integers so this works

371 unique_bin = counts.astype(bool)

372

373 # which values are unique

374 # indexes correspond to original values

375 unique = np.where(unique_bin)[0]

376 ret = (unique,)

377

378 if return_inverse:

379 # find the inverse to reconstruct original

380 inverse = (np.cumsum(unique_bin) - 1)[values]

381 ret += (inverse,)

382

383 if return_counts:

384 unique_counts = counts[unique]

385 ret += (unique_counts,)

386

387 if len(ret) == 1:

388 return ret[0]

389 return ret

390

391

392def merge_runs(data: ArrayLike, digits: Integer | None = None):

393 """

394 Merge duplicate sequential values. This differs from unique_ordered

395 in that values can occur in multiple places in the sequence, but

396 only consecutive repeats are removed

397

398 Parameters

399 -----------

400 data: (n,) float or int

401

402 Returns

403 --------

404 merged: (m,) float or int

405

406 Examples

407 ---------

408 In [1]: a

409 Out[1]:

410 array([-1, -1, -1, 0, 0, 1, 1, 2, 0,

411 3, 3, 4, 4, 5, 5, 6, 6, 7,

412 7, 8, 8, 9, 9, 9])

413

414 In [2]: trimesh.grouping.merge_runs(a)

415 Out[2]: array([-1, 0, 1, 2, 0, 3, 4, 5, 6, 7, 8, 9])

416 """

417 if digits is None:

418 epsilon = tol.merge

419 else:

420 epsilon = 10 ** (-digits)

421

422 data = np.asanyarray(data)

423 mask = np.zeros(len(data), dtype=bool)

424 mask[0] = True

425 mask[1:] = np.abs(data[1:] - data[:-1]) > epsilon

426

427 return data[mask]

428

429

430def unique_float(

431 data,

432 return_index: bool = False,

433 return_inverse: bool = False,

434 digits: Integer | None = None,

435):

436 """

437 Identical to the numpy.unique command, except evaluates floating point

438 numbers, using a specified number of digits.

439

440 If digits isn't specified, the library default TOL_MERGE will be used.

441 """

442 data = np.asanyarray(data)

443 as_int = float_to_int(data, digits)

444 _junk, unique, inverse = np.unique(as_int, return_index=True, return_inverse=True)

445

446 if (not return_index) and (not return_inverse):

447 return data[unique]

448

449 result = [data[unique]]

450

451 if return_index:

452 result.append(unique)

453 if return_inverse:

454 result.append(inverse)

455 return tuple(result)

456

457

458def unique_rows(data, digits=None, keep_order=False):

459 """

460 Returns indices of unique rows. It will return the

461 first occurrence of a row that is duplicated:

462 [[1,2], [3,4], [1,2]] will return [0,1]

463

464 Parameters

465 ---------

466 data : (n, m) array

467 Floating point data

468 digits : int or None

469 How many digits to consider

470

471 Returns

472 --------

473 unique : (j,) int

474 Index in data which is a unique row

475 inverse : (n,) int

476 Array to reconstruct original

477 Example: data[unique][inverse] == data

478 """

479 # get rows hashable so we can run unique function on it

480 rows = hashable_rows(data, digits=digits)

481

482 # we are throwing away the first value which is the

483 # garbage row-hash and only returning index and inverse

484 if keep_order:

485 # keeps order of original occurrence

486 return unique_ordered(rows, return_index=True, return_inverse=True)[1:]

487 # returns values sorted by row-hash but since our row-hash

488 # were pretty much garbage the sort order isn't meaningful

489 return np.unique(rows, return_index=True, return_inverse=True)[1:]

490

491

492def unique_value_in_row(data, unique=None):

493 """

494 For a 2D array of integers find the position of a

495 value in each row which only occurs once.

496

497 If there are more than one value per row which

498 occur once, the last one is returned.

499

500 Parameters

501 ----------

502 data : (n, d) int

503 Data to check values

504 unique : (m,) int

505 List of unique values contained in data.

506 Generated from np.unique if not passed

507

508 Returns

509 ---------

510 result : (n, d) bool

511 With one or zero True values per row.

512

513

514 Examples

515 -------------------------------------

516 In [0]: r = np.array([[-1, 1, 1],

517 [-1, 1, -1],

518 [-1, 1, 1],

519 [-1, 1, -1],

520 [-1, 1, -1]], dtype=np.int8)

521

522 In [1]: unique_value_in_row(r)

523 Out[1]:

524 array([[ True, False, False],

525 [False, True, False],

526 [ True, False, False],

527 [False, True, False],

528 [False, True, False]], dtype=bool)

529

530 In [2]: unique_value_in_row(r).sum(axis=1)

531 Out[2]: array([1, 1, 1, 1, 1])

532

533 In [3]: r[unique_value_in_row(r)]

534 Out[3]: array([-1, 1, -1, 1, 1], dtype=int8)

535 """

536 if unique is None:

537 unique = np.unique(data)

538 data = np.asanyarray(data)

539 result = np.zeros_like(data, dtype=bool, subok=False)

540 for value in unique:

541 test = np.equal(data, value)

542 test_ok = test.sum(axis=1) == 1

543 result[test_ok] = test[test_ok]

544 return result

545

546

547def group_rows(data, require_count=None, digits=None):

548 """

549 Returns index groups of duplicate rows, for example:

550 [[1,2], [3,4], [1,2]] will return [[0,2], [1]]

551

552

553 Note that using require_count allows numpy advanced

554 indexing to be used in place of looping and

555 checking hashes and is ~10x faster.

556

557

558 Parameters

559 ----------

560 data : (n, m) array

561 Data to group

562 require_count : None or int

563 Only return groups of a specified length, eg:

564 require_count = 2

565 [[1,2], [3,4], [1,2]] will return [[0,2]]

566 digits : None or int

567 If data is floating point how many decimals

568 to consider, or calculated from tol.merge

569

570 Returns

571 ----------

572 groups : sequence (*,) int

573 Indices from in indicating identical rows.

574 """

575

576 # start with getting a sortable format

577 hashable = hashable_rows(data, digits=digits)

578

579 # if there isn't a constant column size use more complex logic

580 if require_count is None:

581 return group(hashable)

582

583 # record the order of the rows so we can get the original indices back

584 order = hashable.argsort()

585 # but for now, we want our hashes sorted

586 hashable = hashable[order]

587 # this is checking each neighbour for equality, example:

588 # example: hashable = [1, 1, 1]; dupe = [0, 0]

589 dupe = hashable[1:] != hashable[:-1]

590 # we want the first index of a group, so we can slice from that location

591 # example: hashable = [0 1 1]; dupe = [1,0]; dupe_idx = [0,1]

592 dupe_idx = np.append(0, np.nonzero(dupe)[0] + 1)

593 # if you wanted to use this one function to deal with non- regular groups

594 # you could use: np.array_split(dupe_idx)

595 # this is roughly 3x slower than using the group_dict method above.

596 start_ok = np.diff(np.concatenate((dupe_idx, [len(hashable)]))) == require_count

597 groups = np.tile(dupe_idx[start_ok].reshape((-1, 1)), require_count) + np.arange(

598 require_count

599 )

600 groups_idx = order[groups]

601

602 if require_count == 1:

603 return groups_idx.reshape(-1)

604 return groups_idx

605

606

607def boolean_rows(

608 a: ArrayLike, b: ArrayLike, operation=np.intersect1d

609) -> NDArray[np.int64]:

610 """

611 Find the rows in two arrays which occur in both rows.

612

613 Parameters

614 ---------

615 a: (n, d) int

616 Array with row vectors

617 b: (m, d) int

618 Array with row vectors

619 operation : function

620 Numpy boolean set operation function:

621 -np.intersect1d

622 -np.setdiff1d

623

624 Returns

625 --------

626 shared : (p, d) int64

627 Array containing requested rows in both a and b

628 """

629 a = np.asanyarray(a, dtype=np.int64)

630 b = np.asanyarray(b, dtype=np.int64)

631

632 av = a.view([("", a.dtype)] * a.shape[1]).ravel()

633 bv = b.view([("", b.dtype)] * b.shape[1]).ravel()

634 return operation(av, bv).view(a.dtype).reshape(-1, a.shape[1])

635

636

637def group_vectors(vectors, angle=1e-4, include_negative=False):

638 """

639 Group vectors based on an angle tolerance, with the option to

640 include negative vectors.

641

642 Parameters

643 -----------

644 vectors : (n,3) float

645 Direction vector

646 angle : float

647 Group vectors closer than this angle in radians

648 include_negative : bool

649 If True consider the same:

650 [0,0,1] and [0,0,-1]

651

652 Returns

653 ------------

654 new_vectors : (m,3) float

655 Direction vector

656 groups : (m,) sequence of int

657 Indices of source vectors

658 """

659

660 vectors = np.asanyarray(vectors, dtype=np.float64)

661 angle = float(angle)

662

663 if include_negative:

664 vectors = util.vector_hemisphere(vectors)

665

666 spherical = util.vector_to_spherical(vectors)

667 angles, groups = group_distance(spherical, angle)

668 new_vectors = util.spherical_to_vector(angles)

669 return new_vectors, groups

670

671

672def group_distance(

673 values: ArrayLike, distance: Number

674) -> tuple[NDArray[np.float64], Sequence]:

675 """

676 Find non-overlapping groups of points where no two points in a

677 group are farther than 2*distance apart.

678

679 Parameters

680 ---------

681 values : (n, d) float

682 Points of dimension d

683 distance : float

684 Max distance between points in a cluster

685

686 Returns

687 ----------

688 unique : (m, d) float

689 Median value of each group

690 groups : (m) sequence of int

691 Indexes of points that make up a group

692

693 """

694 values = np.asanyarray(values, dtype=np.float64)

695

696 consumed = np.zeros(len(values), dtype=bool)

697 tree = cKDTree(values)

698

699 # (n, d) set of values that are unique

700 unique = []

701 # (n) sequence of indices in values

702 groups = []

703

704 for index, value in enumerate(values):

705 if consumed[index]:

706 continue

707 group = np.array(tree.query_ball_point(value, distance), dtype=np.int64)

708 group = group[~consumed[group]]

709 consumed[group] = True

710 unique.append(np.median(values[group], axis=0))

711 groups.append(group)

712 return np.array(unique), groups

713

714

715def clusters(points, radius):

716 """

717 Find clusters of points which have neighbours closer than radius

718

719 Parameters

720 ---------

721 points : (n, d) float

722 Points of dimension d

723 radius : float

724 Max distance between points in a cluster

725

726 Returns

727 ----------

728 groups : (m,) sequence of int

729 Indices of points in a cluster

730

731 """

732 from . import graph

733

734 tree = cKDTree(points)

735

736 # some versions return pairs as a set of tuples

737 pairs = tree.query_pairs(r=radius, output_type="ndarray")

738 # group connected components

739 groups = graph.connected_components(pairs)

740

741 return groups

742

743

744def blocks(data, min_len=2, max_len=np.inf, wrap=False, digits=None, only_nonzero=False):

745 """

746 Find the indices in an array of contiguous blocks

747 of equal values.

748

749 Parameters

750 ------------

751 data : (n,) array

752 Data to find blocks on

753 min_len : int

754 The minimum length group to be returned

755 max_len : int

756 The maximum length group to be retuurned

757 wrap : bool

758 Combine blocks on both ends of 1D array

759 digits : None or int

760 If dealing with floats how many digits to consider

761 only_nonzero : bool

762 Only return blocks of non- zero values

763

764 Returns

765 ---------

766 blocks : (m) sequence of (*,) int

767 Indices referencing data

768 """

769 data = float_to_int(data, digits=digits)

770

771 # keep an integer range around so we can slice

772 arange = np.arange(len(data))

773 arange.flags["WRITEABLE"] = False

774

775 nonzero = arange[1:][data[1:] != data[:-1]]

776 infl = np.zeros(len(nonzero) + 2, dtype=int)

777 infl[-1] = len(data)

778 infl[1:-1] = nonzero

779

780 # the length of each chunk

781 infl_len = infl[1:] - infl[:-1]

782

783 # check the length of each group

784 infl_ok = np.logical_and(infl_len >= min_len, infl_len <= max_len)

785

786 if only_nonzero:

787 # check to make sure the values of each contiguous block

788 # are True by checking the first value of each block

789 infl_ok = np.logical_and(infl_ok, data[infl[:-1]])

790

791 # inflate start/end indexes into full ranges of values

792 blocks = [arange[infl[i] : infl[i + 1]] for i, ok in enumerate(infl_ok) if ok]

793

794 if wrap:

795 # wrap only matters if first and last points are the same

796 if data[0] != data[-1]:

797 return blocks

798 # if we are only grouping nonzero things and

799 # the first and last point are zero we can exit

800 if only_nonzero and not bool(data[0]):

801 return blocks

802

803 # if all values are True or False we can exit

804 if len(blocks) == 1 and len(blocks[0]) == len(data):

805 return blocks

806

807 # so now first point equals last point, so the cases are:

808 # - first and last point are in a block: combine two blocks

809 # - first OR last point are in block: add other point to block

810 # - neither are in a block: check if combined is eligible block

811

812 # first point is in a block

813 first = len(blocks) > 0 and blocks[0][0] == 0

814 # last point is in a block

815 last = len(blocks) > 0 and blocks[-1][-1] == (len(data) - 1)

816

817 # CASE: first and last point are BOTH in block: combine blocks

818 if first and last:

819 blocks[0] = np.append(blocks[-1], blocks[0])

820 blocks.pop()

821 else:

822 # combined length

823 combined = infl_len[0] + infl_len[-1]

824 # exit if lengths aren't OK

825 if combined < min_len or combined > max_len:

826 return blocks

827 # new block combines both ends

828 new_block = np.append(

829 np.arange(infl[-2], infl[-1]), np.arange(infl[0], infl[1])

830 )

831 # we are in a first OR last situation now

832 if first:

833 # first was already in a block so replace it with combined

834 blocks[0] = new_block

835 elif last:

836 # last was already in a block so replace with superset

837 blocks[-1] = new_block

838 else:

839 # both are false

840 # combined length generated new block

841 blocks.append(new_block)

842

843 return blocks

844

845

846def group_min(groups, data):

847 """

848 Given a list of groups find the minimum element of data

849 within each group

850

851 Parameters

852 -----------

853 groups : (n,) sequence of (q,) int

854 Indexes of each group corresponding to each element in data

855 data : (m,)

856 The data that groups indexes reference

857

858 Returns

859 -----------

860 minimums : (n,)

861 Minimum value of data per group

862

863 """

864 # sort with major key groups, minor key data

865 order = np.lexsort((data, groups))

866 groups = groups[order] # this is only needed if groups is unsorted

867 data = data[order]

868 # construct an index which marks borders between groups

869 index = np.zeros(len(groups), "bool")

870 index[0] = True

871 index[1:] = groups[1:] != groups[:-1]

872 return data[index]