Welcome to the NetCologne GmbH open source mirroring service!

This machine mirrors various open-source projects. 20 Gbit/s uplink.

If there are any issues or you want another project mirrored, please contact mirror-service -=AT=- netcologne DOT de !

GetFEM: src/bgeot_sparse_tensors.cc Source File
GetFEM  5.5
bgeot_sparse_tensors.cc
1 /*===========================================================================
2 
3  Copyright (C) 2000-2026 Julien Pommier
4 
5  This file is a part of GetFEM
6 
7  GetFEM is free software; you can redistribute it and/or modify it
8  under the terms of the GNU Lesser General Public License as published
9  by the Free Software Foundation; either version 3 of the License, or
10  (at your option) any later version along with the GCC Runtime Library
11  Exception either version 3.1 or (at your option) any later version.
12  This program is distributed in the hope that it will be useful, but
13  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15  License and GCC Runtime Library Exception for more details.
16  You should have received a copy of the GNU Lesser General Public License
17  along with this program. If not, see https://www.gnu.org/licenses/.
18 
19 ===========================================================================*/
20 
21 #include <bitset>
22 #include "gmm/gmm_blas_interface.h"
24 
25 
26 namespace bgeot {
27  std::ostream& operator<<(std::ostream& o, const tensor_ranges& r) {
28  for (size_type i=0; i < r.size(); ++i) {
29  if (i) o << "x";
30  o << "[0.." << r[i] << "]";
31  }
32  return o;
33  }
34 
35  /*
36  strides:
37  0 1 2 3 4 5 6 7
38  ---------------
39  x x x
40  x x x
41  x x x x x x
42  x x x x x
43 
44  => n={0,4,6,8,8}
45 
46  basic iterator on a set of full tensors, just used for iterating over binary masks
47  */
48  template<typename IT> class basic_multi_iterator {
49  unsigned N;
50  index_set idxnums;
51  tensor_ranges ranges;
52  tensor_strides strides;
53  tensor_ranges cnt;
54  index_set ilst2idxnums;
55  std::vector<const tensor_strides*> slst;
56  std::vector<IT> iter;
57  std::vector<int> n;
58  public:
59  const tensor_ranges& getcnt() const { return cnt; }
60  basic_multi_iterator() {
61  N = 0;
62  idxnums.reserve(16);
63  strides.reserve(64);
64  slst.reserve(16);
65  ilst2idxnums.reserve(64); iter.reserve(4);
66  }
67  const tensor_ranges& all_ranges() const { return ranges; }
68  const index_set& all_indexes() const { return idxnums; }
69  /* /!!!!!\ attention les strides ont 1 elt de plus que r et idxs (pour les tensor_masks)
70  (ca intervient aussi dans prepare()) */
71  void insert(const index_set& idxs, const tensor_ranges& r, const tensor_strides& s, IT it_) {
72  assert(idxs.size() == r.size()); assert(s.size() == r.size()+1);
73  slst.push_back(&s);
74  for (unsigned int i=0; i < idxs.size(); ++i) {
75  index_set::const_iterator f=std::find(idxnums.begin(), idxnums.end(), idxs[i]);
76  if (f == idxnums.end()) {
77  ilst2idxnums.push_back(dim_type(idxnums.size()));
78  idxnums.push_back(idxs[i]);
79  ranges.push_back(r[i]);
80  } else {
81  ilst2idxnums.push_back(dim_type(f-idxnums.begin()));
82  assert(ranges[ilst2idxnums.back()] == r[i]);
83  }
84  }
85  iter.push_back(it_);
86  N++;
87  }
88  void prepare() {
89  unsigned c = 0;
90  strides.assign(N*idxnums.size(),0);
91  for (unsigned int i=0; i < N; ++i) {
92  for (unsigned int j=0; j < slst[i]->size()-1; ++j) {
93  stride_type s = (*slst[i])[j];
94  strides[ilst2idxnums[c]*N + i] = s;
95  c++;
96  }
97  }
98  n.assign(N+1,-1);
99  for (unsigned int i=0; i < idxnums.size(); ++i) {
100  for (unsigned int j=0; j < N; ++j)
101  if (strides[i*N+j])
102  n[j+1] = i;
103  }
104  cnt.assign(idxnums.size(),0);
105  }
106  /* unfortunatly these two function don't inline
107  and are not optimized by the compiler
108  (when b and N are known at compile-time) (at least for gcc-3.2) */
109  bool next(unsigned int b) { return next(b,N); }
110  bool next(unsigned int b, unsigned int NN) {
111  int i0 = n[b+1];
112  while (i0 > n[b]) {
113  if (++cnt[i0] < ranges[i0]) {
114  for (unsigned int i=b; i < NN; ++i)
115  iter[i] += strides[i0*NN+i];
116  return true;
117  } else {
118  for (unsigned int i=b; i < NN; ++i)
119  iter[i] -= strides[i0*NN+i]*(ranges[i0]-1);
120  cnt[i0]=0;
121  i0--;
122  }
123  }
124  return false;
125  }
126  template<unsigned b, unsigned NN> bool qnext() { return next(b,NN); }
127  IT it(unsigned int b) const { return iter[b]; }
128  };
129 
130 
131  /*
132  constructeur par fusion de deux masques binaires
133  (avec potentiellement une intersection non vide)
134  */
135  tensor_mask::tensor_mask(const tensor_mask& tm1, const tensor_mask& tm2, bool and_op) {
136  assign(tm1,tm2,and_op); unset_card();
137  }
138 #if 0
139  void tensor_mask::assign(const tensor_mask& tm1, const tensor_mask& tm2, bool and_op) {
140  clear(); unset_card();
141  if (tm1.ndim()==0) { assign(tm2); return; }
142  if (tm2.ndim()==0) { assign(tm1); return; }
143  r = tm1.ranges();
144  idxs = tm1.indexes();
145 
146  /* ce tableau va permettre de faire une boucle commune sur les
147  deux masques, les dimension inutilisees etant mises a 1 pour
148  ne pas gener */
149  tensor_ranges global_r(std::max(tm1.max_dim(),tm2.max_dim())+1, index_type(1));
150 
151  for (index_type i = 0; i < tm1.indexes().size(); ++i)
152  global_r[tm1.indexes()[i]] = tm1.ranges()[i];
153 
154  /* detect new indices */
155  for (index_type i = 0; i < tm2.indexes().size(); ++i) {
156  index_set::const_iterator f=std::find(tm1.indexes().begin(), tm1.indexes().end(), tm2.indexes()[i]);
157  if (f == tm1.indexes().end()) {
158  assert(global_r[tm2.indexes()[i]]==1);
159  global_r[tm2.indexes()[i]] = tm2.ranges()[i];
160  r.push_back(tm2.ranges()[i]);
161  idxs.push_back(tm2.indexes()[i]);
162  }
163  else assert(global_r[tm2.indexes()[i]] = tm2.ranges()[i]);
164  }
165  eval_strides();
166  assert(size());
167  m.assign(size(),false);
168  /* sans doute pas tres optimal, mais la fonction n'est pas critique .. */
169  for (tensor_ranges_loop l(global_r); !l.finished(); l.next()) {
170  if (and_op) {
171  if (tm1(l.cnt) && tm2(l.cnt))
172  m.add(pos(l.cnt));
173  } else {
174  if (tm1(l.cnt) || tm2(l.cnt))
175  m.add(pos(l.cnt));
176  }
177  }
178  unset_card();
179  }
180 #endif
181 
182 
183  void tensor_mask::assign(const tensor_mask& tm1, const tensor_mask& tm2, bool and_op) {
184  clear(); unset_card();
185  /* check simple cases first, since this function can be called often and
186  is quite expensive */
187  if (tm1.ndim()==0) { assign(tm2); return; }
188  if (tm2.ndim()==0) { assign(tm1); return; }
189 
190  /* same masks ? */
191  if (tm1.indexes() == tm2.indexes() &&
192  tm1.ranges() == tm2.ranges() &&
193  tm1.strides() == tm2.strides()) {
194  r = tm1.ranges(); idxs=tm1.indexes(); s=tm1.strides();
195  assert(tm1.m.size() == tm2.m.size());
196  m = tm1.m;
197  if (and_op) {
198  for (index_type i=0; i < tm2.m.size(); ++i)
199  if (!tm2.m[i])
200  m[i] = false;
201  } else {
202  for (index_type i=0; i < tm2.m.size(); ++i)
203  if (tm2.m[i])
204  m[i] = true;
205  }
206  return;
207  }
208 
209  basic_multi_iterator<unsigned> bmit;
210  bmit.insert(tm1.indexes(), tm1.ranges(), tm1.strides(), 0);
211  bmit.insert(tm2.indexes(), tm2.ranges(), tm2.strides(), 0);
212  r = bmit.all_ranges(); idxs = bmit.all_indexes(); eval_strides();
213  assert(size());
214  m.assign(size(),false);
215  bmit.insert(indexes(), ranges(), strides(), 0);
216  bmit.prepare();
217  //cout << "tm1=" << tm1 << "\ntm2=" << tm2 << endl;
218  if (and_op) {
219  do {
220  if (tm1.m[bmit.it(0)]) {
221  do {
222  if (tm2.m[bmit.it(1)])
223  m[bmit.it(2)] = 1;
224  // cout << "at cnt=" << bmit.getcnt() << ", it0=" << bmit.it(0) << "=" << tm1.m[bmit.it(0)]
225  // << ", it1=" << bmit.it(1) << "=" << tm2.m[bmit.it(1)] << ", res=" << bmit.it(2) << "=" << m[bmit.it(2)] << endl;
226  } while (bmit.qnext<1,3>());
227  }
228  } while (bmit.qnext<0,3>());
229  } else {
230  do {
231  bool v1 = tm1.m[bmit.it(0)];
232  do {
233  if (v1 || tm2.m[bmit.it(1)])
234  m[bmit.it(2)] = 1;
235  } while (bmit.qnext<1,3>());
236  } while (bmit.qnext<0,3>());
237  }
238  // cout << "output: " << *this << endl;
239  }
240 
241 
242  /* construit un masque forme du produit cartesien de plusieurs masques (disjoints)
243  /!\ aucune verif sur la validite des arguments */
244  tensor_mask::tensor_mask(const std::vector<const tensor_mask*>& tm) {
245  assign(tm);
246  }
247 #if 0
248  // REMPLACE PAR LA FONCTION SUIVANTE
249  void tensor_mask::assign(const std::vector<const tensor_mask*> & tm) {
250  index_set mask_start; unset_card();
251  clear();
252  r.reserve(255); idxs.reserve(255); mask_start.reserve(255);
253  for (dim_type i=0; i < tm.size(); ++i) {
254  mask_start.push_back(r.size());
255  for (dim_type j=0; j < tm[i]->ndim(); ++j) {
256  r.push_back(tm[i]->ranges()[j]);
257  idxs.push_back(tm[i]->indexes()[j]);
258  }
259  }
260  eval_strides(); assert(size());
261  m.assign(size(),false);
262  for (tensor_ranges_loop l(r); !l.finished(); l.next()) {
263  bool is_in = true;
264  for (dim_type i=0; is_in && i < tm.size(); ++i) {
265  index_type s_ = 0;
266  for (dim_type j=0; j < tm[i]->ndim(); ++j)
267  s_ += l.cnt[j+mask_start[i]]*tm[i]->strides()[j];
268  if (!tm[i]->m[s_])
269  is_in = false;
270  }
271  if (is_in) m.add(lpos(l.cnt));
272  }
273  }
274 #endif
275  // PRODUIT CARTESIEN DE MASQUES BINAIRES DISJOINTS
276  void tensor_mask::assign(const std::vector<const tensor_mask*> & tm) {
277  unset_card();
278  if (tm.size() == 0) { clear(); return; }
279  if (tm.size() == 1) { assign(*tm[0]); return; }
280  clear();
281  basic_multi_iterator<unsigned> bmit;
282  for (dim_type i=0; i < tm.size(); ++i)
283  bmit.insert(tm[i]->indexes(), tm[i]->ranges(), tm[i]->strides(), 0);
284  r = bmit.all_ranges(); idxs = bmit.all_indexes(); eval_strides();
285  assert(size());
286  m.assign(size(), false);
287  bmit.insert(indexes(), ranges(), strides(), 0);
288  bmit.prepare();
289  unsigned N = unsigned(tm.size());
290  bool finished = false;
291  while (!finished) {
292  bool is_in = true;
293  unsigned int b;
294  for (b=0; b < N; ++b) {
295  if (!tm[b]->m[bmit.it(b)]) {
296  is_in = false;
297  break;
298  }
299  }
300  if (is_in) { m[bmit.it(N)] = 1; b = N-1; }
301  while (!finished && !bmit.next(b)) { if (b == 0) finished = true; b--; }
302  }
303  }
304 
305  void tensor_mask::unpack_strides(const tensor_strides& packed, tensor_strides& unpacked) const {
306  if (packed.size() != card())
307  assert(packed.size()==card());
308  unpacked.assign(size(),INT_MIN);
309  index_type i=0;
310  for (tensor_ranges_loop l(r); !l.finished(); l.next()) {
311  if (m[lpos(l.cnt)]) unpacked[lpos(l.cnt)] = packed[i++];
312  }
313  }
314 
315  void tensor_mask::check_assertions() const {
316  GMM_ASSERT3(r.size() == idxs.size(), "");
317  GMM_ASSERT3(s.size() == idxs.size()+1, "");
318  GMM_ASSERT3(m.size() == size(), "");
319  dal::bit_vector bv;
320  for (dim_type i=0; i < idxs.size(); ++i) {
321  GMM_ASSERT3(!bv.is_in(i), "");
322  bv.add(i);
323  }
324  }
325 
326  tensor_mask::tensor_mask(const std::vector<const tensor_mask*> tm1, const std::vector<const tensor_mask*> tm2, bool and_op) {
327  assign(tensor_mask(tm1), tensor_mask(tm2), and_op);
328  }
329 
330  void tensor_ref::set_sub_tensor(const tensor_ref& tr, const tensor_shape& sub) {
331  assign_shape(sub);
332  /* fusionne sub et tr.shape */
333  merge(tr);
334 
335  /* reserve l'espace pour les strides */
336  strides_.resize(masks().size());
337  for (dim_type i = 0; i < strides_.size(); ++i)
338  strides_[i].resize(mask(i).card());
339 
340  pbase_ = tr.pbase_; base_shift_ = tr.base_shift();
341 
342  /*
343  cout << "\n -> entree dans set_sub_tensor: " << endl
344  << "tr.shape=" << (tensor_shape&)(tr) << endl
345  << " sub=" << sub << endl;
346  */
347  /* pre-calcul rapide des strides sur tr, pour chacun de ses masques */
348  std::vector<tensor_strides> trstrides_unpacked(tr.masks().size());
349  for (dim_type im = 0; im < tr.masks().size(); ++im) {
350  tr.mask(im).check_assertions();
351  tr.mask(im).unpack_strides(tr.strides()[im], trstrides_unpacked[im]);
352  }
353 
354 
355  /* on parcours chaque masque (merge) */
356  for (dim_type im = 0; im < masks().size(); ++im) {
357  const tensor_mask& m = masks()[im];
358  /* par construction, tous les masques de tr sont inclus (ou egaux)
359  dans un masque de l'objet courant
360 
361  on construit donc la liste des masques de tr qui sont inclus dans m
362  */
363  std::vector<dim_type> trmasks; trmasks.reserve(tr.masks().size());
364  for (dim_type i=0; i < m.indexes().size(); ++i) {
365  if (tr.index_is_valid(m.indexes()[i])) { /* l'index n'est pas forcement valide pour tr !*/
366  dim_type im2 = tr.index_to_mask_num(m.indexes()[i]);
367  if (std::find(trmasks.begin(), trmasks.end(), im2)==trmasks.end())
368  trmasks.push_back(im2);
369  }
370  }
371 
372  tensor_ranges gcnt(tr.ndim(),0);
373  stride_type stcnt = 0;
374 
375  for (tensor_ranges_loop l(m.ranges()); !l.finished(); l.next()) {
376  /* recopie les indice de la boucle dans les indices globaux */
377  for (dim_type i=0; i < m.ranges().size(); ++i)
378  gcnt[m.indexes()[i]] = l.index(i);
379 
380  bool in_m = m(gcnt);
381  bool in_trm = true;
382  stride_type tr_s = 0;
383 
384  /* verifie si l'element est bien marque non nul dans les masques de tr
385  et met a jour le stride */
386  for (dim_type i=0; i < trmasks.size(); ++i) {
387  const tensor_mask &mm = tr.mask(trmasks[i]);
388 
389  //cout << " mm=" << mm << endl << "gcnt=" << gcnt << endl;
390  if (mm(gcnt)) {
391  tr_s += trstrides_unpacked[trmasks[i]][mm.pos(gcnt)];
392  assert(trstrides_unpacked[trmasks[i]][mm.pos(gcnt)]>=0); // --- DEBUG ---
393  } else { in_trm = false; break; }
394  }
395  /* verifie que m est un sous-ensemble des masques de trmasks */
396  if (in_m) assert(in_trm);
397  if (!in_trm) assert(!in_m);
398  /* recopie le stride si l'element est non nul dans m */
399  if (in_m) {
400  // cout << "ajout du " << stcnt << "eme elt @ stride=" << tr_s << endl;
401  strides_[im][stcnt++] = tr_s;
402  }
403  }
404 
405  /* verif que yapa bug */
406  assert(stcnt == stride_type(m.card()));
407  }
408  ensure_0_stride(); /* c'est plus propre comme ca */
409  }
410 
411  /* slices a tensor_ref, at dimension 'dim', position 'islice'
412  ... not straightforward for sparse tensors !
413  */
414  tensor_ref::tensor_ref(const tensor_ref& tr, tensor_mask::Slice slice) {
415  set_sub_tensor(tr, tr.slice_shape(slice));
416 
417  /* shift the base according to the old stride */
418  ensure_0_stride();
419 
420  /* create a mask m2 with one less dimension than m1 */
421  const tensor_mask& m1(index_to_mask(slice.dim));
422  dim_type mdim = index_to_mask_dim(slice.dim);
423  if (m1.ndim() > 1) {
424  tensor_ranges r(m1.ranges()); r.erase(r.begin()+mdim);
425  index_set idx(m1.indexes()); idx.erase(idx.begin()+mdim);
426  tensor_mask m2(r,idx);
427  index_type pos1 = 0, pos2 = 0;
428  for (tensor_ranges_loop l(m1.ranges()); !l.finished(); l.next()) {
429  if (l.index(mdim) == slice.i0)
430  m2.set_mask_val(pos2++, m1(pos1));
431  else
432  assert(m1(pos1) == 0);
433  pos1++;
434  }
435 
436 
437  /* replace the old mask by the new one */
438  assert(index_to_mask_num(slice.dim) < masks().size());
439  masks()[index_to_mask_num(slice.dim)] = m2;
440  } else {
441  /* simply remove the mask since it only contained the dimension 'dim' */
442  remove_mask(index_to_mask_num(slice.dim));
443  }
444  /* shift all indexes greater than dim */
445  shift_dim_num_ge(slice.dim,-1);
446  set_ndim_noclean(dim_type(ndim()-1));
447  update_idx2mask();
448  }
449 
450 
451  struct compare_packed_range {
452  const std::vector<packed_range_info>& pri;
453  std::vector<stride_type> mean_inc;
454  compare_packed_range(const std::vector<packed_range_info>& pri_) : pri(pri_) {}
455  bool operator()(dim_type a, dim_type b) {
456  if (pri[a].n < pri[b].n) return true;
457  else if (pri[a].n > pri[b].n) return false;
458  else { /* c'est la que ca devient interessant */
459  if (pri[a].mean_increm > pri[b].mean_increm)
460  return true;
461  }
462  return false;
463  }
464  };
465 
466  void multi_tensor_iterator::init(std::vector<tensor_ref> trtab, bool with_index_values) {
467  N = index_type(trtab.size());
468  index_type N_packed_idx = 0;
469 
470  /* non null elements across all tensors */
471 
472  tensor_shape ts(trtab[0]);
473  for (dim_type i=1; i < trtab.size(); ++i)
474  ts.merge(trtab[i]);
475 
476 
477  /* apply the mask to all tensors,
478  updating strides according to it */
479  for (dim_type i = 0; i < N; ++i) {
480  tensor_ref tmp = trtab[i];
481  trtab[i].set_sub_tensor(tmp,ts);
482  }
483 
484  /* find significant indexes (ie remove indexes who only address 1 element) */
485  dal::bit_vector packed_idx; packed_idx.sup(0,ts.ndim());
486  for (index_type mi=0; mi < ts.masks().size(); ++mi) {
487  if (ts.masks()[mi].card() != 1) {
488  packed_idx.add(mi);
489  N_packed_idx++;
490  } else {
491  /* sanity check (TODO: s'assurer que sub_tensor_ref appelle bien ensure_0_stride) */
492  for (dim_type j=0; j < N; ++j) {
493  if (trtab[j].strides()[mi].size() != 0) {
494  assert(trtab[j].strides()[mi].size() == 1);
495  assert(trtab[j].strides()[mi][0] == 0);
496  }
497  }
498  }
499  }
500 
501  pr.resize(N_packed_idx);
502  pri.resize(N_packed_idx);
503 
504  /* evaluation of "ranks" per indice */
505 
506  index_type pmi = 0;
507  for (dim_type mi = 0; mi < ts.masks().size(); ++mi) {
508  if (packed_idx[mi]) {
509  dim_type n;
510  pri[pmi].original_masknum = mi;
511  pri[pmi].range = ts.masks()[mi].card();
512  for (n = 0; n < N; n++)
513  if (trtab[n].index_is_valid(mi))
514  break;
515  pri[pmi].n = n;
516  pr[pmi].n = n;
517  pmi++;
518  }
519  }
520 
521  /* sort the packed_range_info according to the "ranks" */
522  std::sort(pri.begin(), pri.end());
523 
524 
525  /* eval bloc ranks */
526  bloc_rank.resize(N+1); bloc_rank[0] = 0;
527  bloc_nelt.resize(N+1); bloc_nelt[0] = 0;
528  for (index_type i=1; i <= N; ++i) {
529  bloc_rank[i] = bloc_rank[i-1];
530  while (bloc_rank[i] < pri.size() && pri[bloc_rank[i]].n == i-1) bloc_rank[i]++;
531  bloc_nelt[i] = bloc_rank[i] - bloc_rank[i-1];
532  }
533 
534  /* "package" the strides in structure pri */
535  for (pmi = 0; pmi < pri.size(); ++pmi) {
536  index_type mi = pri[pmi].original_masknum;
537  pri[pmi].mean_increm = 0;
538  pri[pmi].inc.assign(pri[pmi].range*(N-pri[pmi].n), 0);
539  pri[pmi].have_regular_strides.reset();
540  pri[pmi].have_regular_strides = std::bitset<32>((1 << N)-1);
541  for (dim_type n=pri[pmi].n; n < N; ++n) {
542  index_type pos0 = (n - pri[pmi].n);
543  for (index_type i = 0; i < pri[pmi].range; ++i) {
544  index_type pos = i * (N-pri[pmi].n) + pos0;
545  if (i != pri[pmi].range-1) {
546  stride_type increm = trtab[n].strides()[mi][i+1] - trtab[n].strides()[mi][i];
547  pri[pmi].inc[pos] = increm;
548  if (pri[pmi].inc[pos] != pri[pmi].inc[pos0])
549  pri[pmi].have_regular_strides[n] = false;
550  pri[pmi].mean_increm += increm;
551  } else
552  pri[pmi].inc[pos] = -trtab[n].strides()[mi][i];
553  }
554  }
555  if (pri[pmi].n!=N)
556  pri[pmi].mean_increm /= (N-pri[pmi].n)*(pri[pmi].range-1);
557  }
558 
559  /* optimize them w/r to mean strides (without modifying the "ranks") */
560  index_set pr_reorder(pri.size());
561  for (pmi = 0; pmi < pri.size(); ++pmi) {
562  pr_reorder[pmi]=dim_type(pmi);
563  }
564  std::sort(pr_reorder.begin(), pr_reorder.end(), compare_packed_range(pri));
565  {
566  std::vector<packed_range> tmppr(pr);
567  std::vector<packed_range_info> tmppri(pri);
568  for (dim_type i =0; i < pri.size(); ++i) {
569  pr[i] = tmppr [pr_reorder[i]];
570  pri[i] = tmppri[pr_reorder[i]];
571  }
572  }
573 
574  /* setup data necessary to get back (quite quickly) the index values while iterating */
575  if (with_index_values) {
576  idxval.resize(ts.ndim());
577 
578  for (dim_type i=0; i < ts.ndim(); ++i) {
579  idxval[i].ppinc = NULL;
580  idxval[i].pposbase = NULL;
581  idxval[i].pos_ = 0;
582  }
583 
584  for (index_type mi = 0; mi < ts.masks().size(); ++mi) {
585  tensor_strides v;
586  pmi = index_type(-1);
587  for (dim_type i=0; i < pr.size(); ++i)
588  if (pri[i].original_masknum == mi)
589  pmi = i;
590 
591  if (pmi != index_type(-1)) {
592  ts.masks()[mi].gen_mask_pos(pri[pmi].mask_pos);
593  } else { /* not very nice .. */
594  ts.masks()[mi].gen_mask_pos(v);
595  assert(v.size()==1);
596  }
597  for (dim_type i=0; i < ts.masks()[mi].indexes().size(); ++i) {
598  dim_type ii = ts.masks()[mi].indexes()[i];
599  idxval[ii].cnt_num = dim_type(pmi); //packed_idx[mi] ? pmi : dim_type(-1);
600  idxval[ii].pos_ = (pmi != index_type(-1)) ? 0 : v[0];
601  idxval[ii].mod = ts.masks()[mi].strides()[i+1];
602  idxval[ii].div = ts.masks()[mi].strides()[i];
603  }
604  }
605  }
606 
607  /* check for opportunities to vectorize the loops with the mti
608  (assuming regular strides etc.)
609  */
610  vectorized_strides_.resize(N); vectorized_size_ = 0;
611  std::fill(vectorized_strides_.begin(), vectorized_strides_.end(), 0);
612  vectorized_pr_dim = index_type(pri.size());
613  for (vectorized_pr_dim = index_type(pri.size()-1); vectorized_pr_dim != index_type(-1); vectorized_pr_dim--) {
614  std::vector<packed_range_info>::const_iterator pp = pri.begin() + vectorized_pr_dim;
615  if (vectorized_pr_dim == pri.size()-1) {
616  if (pp->have_regular_strides.count() == N) vectorized_size_ = pp->range;
617  for (dim_type n=pp->n; n < N; ++n) {
618  GMM_ASSERT3(n < pp->inc.size(), ""); // expected failure here with "empty" tensors, see tests/test_assembly.cc, testbug() function.
619  vectorized_strides_[n] = pp->inc[n];
620  }
621  } else {
622  if (pp->have_regular_strides.count() != N) break;
623  bool still_ok = true;
624  for (dim_type n=pp->n; n < N; ++n) {
625  if (stride_type(vectorized_strides_[n]*vectorized_size_) != pp->inc[n]) still_ok = false;
626  }
627  if (still_ok) {
628  vectorized_size_ *= pp->range;
629  } else break;
630  }
631  }
632 
633  it.resize(N); pit0.resize(N); itbase.resize(N);
634  for (dim_type n=0; n < N; ++n) {
635  pit0[n]=trtab[n].pbase();
636  itbase[n]=trtab[n].base_shift();
637  }
638  rewind();
639  }
640 
641  void multi_tensor_iterator::print() const {
642  cout << "MTI(N=" << N << "): ";
643  for (dim_type i=0; i < pr.size(); ++i)
644  cout << " pri[" << int(i) << "]: n=" << int(pri[i].n)
645  << ", range=" << pri[i].range << ", mean_increm="
646  << pri[i].mean_increm << ", regular = " << pri[i].have_regular_strides
647  << ", inc=" << vref(pri[i].inc) << "\n";
648  cout << "bloc_rank: " << vref(bloc_rank) << ", bloc_nelt: " << vref(bloc_nelt) << "\n";
649  cout << "vectorized_size : " << vectorized_size_ << ", strides = " << vref(vectorized_strides_) << ", pr_dim=" << vectorized_pr_dim << "\n";
650  }
651 
652  void tensor_reduction::insert(const tensor_ref& tr_, const std::string& s) {
653  tensor_shape ts(tr_);
654  diag_shape(ts,s);
655  trtab.push_back(tref_or_reduction(tensor_ref(tr_, ts), s));
656  //cout << "reduction.insert('" << s << "')\n";
657  //cout << "Reduction: INSERT tr(ndim=" << int(tr_.ndim()) << ", s='" << s << "')\n";
658  //cout << "Content: " << tr_ << endl;
659  //cout << "shape: " << (tensor_shape&)tr_ << endl;
660  }
661 
662  void tensor_reduction::insert(const tref_or_reduction& t, const std::string& s) {
663  if (!t.is_reduction()) {
664  insert(t.tr(),s);
665  } else {
666  trtab.push_back(t); trtab.back().ridx = s;
667  //cout << "reduction.insert_reduction('" << s << "')\n";
668  //cout << "Reduction: INSERT REDUCTION tr(ndim=" << int(t.tr().ndim()) << ", s='" << s << "')\n";
669  }
670  }
671 
672  /* ensure that r(i,i).t(j,:,j,j,k,o)
673  becomes r(i,A).t(j,:,B,C,k,o)
674  and updates reduction_chars accordingly
675  */
676  void tensor_reduction::update_reduction_chars() {
677  reduction_chars.clear();
678  for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
679  assert(it->ridx.size() == it->tr().ndim());
680  for (unsigned i =0; i < it->ridx.size(); ++i) {
681  if (it->ridx[i] != ' ' &&
682  reduction_chars.find(it->ridx[i]) == std::string::npos)
683  reduction_chars.push_back(it->ridx[i]);
684  }
685  }
686  /* for each tensor, if a diagonal reduction inside the tensor is used,
687  the mask of the tensor has been 'and'-ed with a diagonal mask
688  and a second 'virtual' reduction index is used */
689  for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
690  it->gdim.resize(it->ridx.size());
691  for (unsigned i =0; i < it->ridx.size(); ++i) {
692  char c = it->ridx[i];
693  if (c != ' ' && it->ridx.find(c) != i) {
694  for (c = 'A'; c <= 'Z'; ++c)
695  if (reduction_chars.find(c) == std::string::npos) break;
696  it->ridx[i] = c;
697  reduction_chars.push_back(it->ridx[i]);
698  }
699  }
700  }
701  }
702 
703  /*
704  initialize 'reduced_range' and it->rdim
705  */
706  void tensor_reduction::pre_prepare() {
707  for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
708  assert(it->ridx.size() == it->tr().ndim());
709  it->rdim.resize(it->ridx.size());
710  //cout << " rix = '" << it->ridx << "'\n";
711  for (dim_type i =0; i < it->ridx.size(); ++i) {
712  if (it->ridx[i] == ' ') {
713  reduced_range.push_back(it->tr().dim(i));
714  it->rdim[i] = dim_type(reduced_range.size()-1);
715  } else
716  it->rdim[i] = dim_type(-1);
717  }
718  }
719  }
720 
721  /* look for a possible sub-reduction on a subset of the tensors.
722  returns the subset, and the list of concerned reduction indexes */
723  size_type tensor_reduction::find_best_sub_reduction(dal::bit_vector &best_lst, std::string &best_idxset) {
724  dal::bit_vector lst;
725  std::string idxset;
726  best_lst.clear(); best_idxset.clear();
727 
728  update_reduction_chars();
729 
730  //cout << "find_best_reduction: reduction_chars='" << reduction_chars << "'\n";
731  GMM_ASSERT2(trtab.size() <= 32, "wow it was assumed that nobody would "
732  "ever need a reduction on more than 32 tensors..");
733 
734  std::vector<std::bitset<32> > idx_occurences(reduction_chars.size());
735 
736  for (unsigned ir=0; ir < reduction_chars.size(); ++ir) {
737  char c = reduction_chars[ir];
738  for (unsigned tnum=0; tnum < trtab.size(); ++tnum)
739  idx_occurences[ir][tnum] = (trtab[tnum].ridx.find(c) != std::string::npos);
740  //cout << "find_best_reduction: idx_occurences[" << ir << "] = " << idx_occurences[ir] << "\n";
741  }
742  size_type best_redsz = 100000000;
743  for (unsigned ir=0; ir < reduction_chars.size(); ++ir) {
744  lst.clear(); lst.add(ir);
745  idxset.resize(0); idxset.push_back(reduction_chars[ir]);
746  /* add other possible reductions */
747  for (unsigned ir2=0; ir2 < reduction_chars.size(); ++ir2) {
748  if (ir2 != ir) {
749  if ((idx_occurences[ir2] | idx_occurences[ir]) == idx_occurences[ir]) {
750  lst.add(ir2);
751  idxset.push_back(reduction_chars[ir2]);
752  }
753  }
754  }
755  /* evaluate the cost */
756  size_type redsz = 1;
757  for (unsigned tnum=0; tnum < trtab.size(); ++tnum) {
758  if (!idx_occurences[ir][tnum])
759  continue;
760  std::bitset<int(32)> once((int)reduction_chars.size());
761  for (dim_type i=0; i < trtab[tnum].tr().ndim(); ++i) {
762  bool ignore = false;
763  for (dal::bv_visitor j(lst); !j.finished(); ++j) {
764  if (trtab[tnum].ridx[i] == reduction_chars[(size_t)j]) {
765  if (once[j]) ignore = true;
766  else once[j] = true;
767  }
768  }
769  if (!ignore)
770  redsz *= trtab[tnum].tr().dim(i);
771  }
772  }
773  //cout << " test " << reduction_chars[ir] << ": lst=" << lst << ", redsz=" << redsz << "\n";
774  if (redsz < best_redsz) {
775  best_redsz = redsz;
776  best_lst.clear();
777  for (unsigned i=0; i < trtab.size(); ++i)
778  if (idx_occurences[ir][i]) best_lst.add(i);
779  best_idxset = idxset;
780  }
781  }
782  /*cout << "find_best_reduction: lst = " << best_lst << " [nt="
783  << trtab.size() << "], idx_set='" << best_idxset
784  << "', redsz=" << best_redsz << "\n";
785  */
786  return best_redsz;
787  }
788 
789  void tensor_reduction::make_sub_reductions() {
790  dal::bit_vector bv; std::string red;
791  int iter = 1;
792  do {
793  find_best_sub_reduction(bv,red);
794  if (bv.card() < trtab.size() && red.size()) {
795  //cout << "making sub reduction\n";
796  auto sub = std::make_shared<tensor_reduction>();
797  std::vector<dim_type> new_rdim; new_rdim.reserve(16);
798  std::string new_reduction;
799  for (dal::bv_visitor tnum(bv); !tnum.finished(); ++tnum) {
800  tref_or_reduction &t = trtab[tnum];
801  std::string re = t.ridx; t.ridx.clear();
802  for (unsigned i = 0; i < re.size(); ++i) {
803  bool reduced = false;
804  char c = re[i];
805  if (c != ' ') {
806  if (red.find(re[i]) == std::string::npos) c = ' ';
807  else reduced = true;
808  }
809  if (!reduced) {
810  t.ridx.push_back(re[i]);
811  new_rdim.push_back(t.rdim[i]);
812  new_reduction.push_back(re[i]);
813  }
814  re[i] = c;
815  }
816  //cout << " sub-";
817  sub->insert(trtab[tnum], re);
818  }
819  //cout << " new_reduction = '" << new_reduction << "'\n";
820  sub->prepare();
821  /*cout << " " << new_reduction.size() << " == " << int(sub->trres.ndim()) << "?\n";
822  assert(new_reduction.size() == sub->trres.ndim());*/
823  trtab[bv.first_true()] = tref_or_reduction(sub, new_reduction);
824  trtab[bv.first_true()].rdim = new_rdim;
825  std::vector<tref_or_reduction> trtab2; trtab2.reserve(trtab.size() - bv.card() + 1);
826  for (size_type i=0; i < trtab.size(); ++i)
827  if (!bv.is_in(i) || i == bv.first_true())
828  trtab2.push_back(trtab[i]);
829  trtab.swap(trtab2);
830  //cout << "make_sub_reductions[" << iter << "] : still " << trtab.size() << " tensors\n";
831  /*for (size_type i=0; i < trtab.size(); ++i)
832  cout << " dim = " << trtab[i].tr().ndim() << " : '" << trtab[i].ridx << "'\n";
833  */
834  ++iter;
835  } else {
836  //cout << "Ok, no more reductions (bv.card() == " << bv.card() << ")\n\n";
837  break;
838  }
839  } while (1);
840  }
841 
842  void tensor_reduction::prepare(const tensor_ref* tr_out) {
843  pre_prepare();
844  make_sub_reductions();
845 
846  /* create the output tensor */
847  if (tr_out == NULL) {
848  trres = tensor_ref(reduced_range);
849  out_data.resize(trres.card());
850  pout_data = &out_data[0];
851  trres.set_base(pout_data);
852  } else {
853  GMM_ASSERT3(tr_out->ndim() == reduced_range.size(), "");
854  for (dim_type i=0; i < tr_out->ndim(); ++i)
855  GMM_ASSERT3(tr_out->dim(i) == reduced_range[i], "");
856  trres = *tr_out;
857  }
858 
859  /* prepare the mapping from each dimension of each tensor to the global range
860  (the first dimensions are reserved for non-reduced dimensions, i.e. those
861  of 'reduced_range'
862  */
863  tensor_ranges global_range; /* global range across all tensors of the
864  reduction */
865  std::string global_chars; /* name of indexes (or ' ') for each dimension
866  of global_range */
867  global_range.reserve(16);
868  global_range.assign(reduced_range.begin(), reduced_range.end());
869  global_chars.insert(size_type(0), reduced_range.size(), ' ');
870  for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
871  assert(it->rdim.size() == it->tr().ndim());
872  it->gdim = it->rdim;
873  for (dim_type i=0; i < it->ridx.size(); ++i) {
874  if (it->rdim[i] == dim_type(-1)) {
875  assert(it->ridx[i] != ' ');
876  std::string::size_type p = global_chars.find(it->ridx[i]);
877  if (p == std::string::npos) {
878  global_chars.push_back(it->ridx[i]);
879  global_range.push_back(it->tr().dim(i));
880  it->gdim[i] = dim_type(global_range.size() - 1);
881  } else {
882  GMM_ASSERT1(it->tr().dim(i) == global_range[p],
883  "inconsistent dimensions for reduction index "
884  << it->ridx[i] << "(" << int(it->tr().dim(i))
885  << " != " << int(global_range[p]) << ")");
886  it->gdim[i] = dim_type(p);
887  }
888  }
889  }
890  //cout << " rdim = " << it->rdim << ", gdim = " << it->gdim << "\n";
891  }
892  //cout << "global_range = " << global_range << ", global_chars = '" << global_chars << "'\n";
893 
894  std::vector<dim_type> reorder(global_chars.size(), dim_type(-1));
895  /* increase the dimension of the tensor holding the result */
896  for (dim_type i=0; i < reduced_range.size(); ++i) reorder[i] = i;
897  //cout << "reorder = '" << reorder << "'\n";
898  trres.permute(reorder);
899  std::vector<tensor_ref> tt; tt.reserve(trtab.size()+1);
900  tt.push_back(trres);
901 
902  /* permute all tensors (and increase their number of dimensions) */
903  for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
904  std::fill(reorder.begin(), reorder.end(), dim_type(-1));
905  for (dim_type i=0; i < it->gdim.size(); ++i) {
906  reorder[it->gdim[i]] = i;
907  }
908  //cout << "reorder = '" << reorder << "'\n";
909  it->tr().permute(reorder);
910  tt.push_back(it->tr());
911  //cout << "MTI[" << it-trtab.begin() << "/" << trtab.size() << "] : " << (tensor_shape&)it->tr();
912  }
913 
914  /* now, the iterator can be built */
915  mti.init(tt,false);
916  }
917 
918  static void do_reduction1(bgeot::multi_tensor_iterator &mti) {
919  do {
920  scalar_type s1 = 0;
921  do {
922  s1 += mti.p(1);
923  } while (mti.bnext(1));
924  mti.p(0) += s1;
925  } while (mti.bnext(0));
926  }
927 
928  static bool do_reduction2v(bgeot::multi_tensor_iterator &mti) {
929  size_type n = mti.vectorized_size();
930  const std::vector<stride_type> &s = mti.vectorized_strides();
931  if (n && s[0] && s[1] && s[2] == 0) {
932 #if defined(GMM_USES_BLAS)
933  BLAS_INT nn = BLAS_INT(n), incx = s[1], incy = s[0];
934 #else
935  dim_type incx = dim_type(s[1]), incy = dim_type(s[0]);
936 #endif
937  /*mti.print();
938  scalar_type *b[3];
939  for (int i=0; i < 3; ++i) b[i] = &mti.p(i);*/
940  do {
941  /*cout << "vectorized_ reduction2a : n=" << n << ", s = " << s << " mti.p=" << &mti.p(0)-b[0] << ","
942  << &mti.p(1)-b[1] << "," << &mti.p(2)-b[2] << "\n";*/
943 #if defined(GMM_USES_BLAS)
944  gmm::daxpy_(&nn, &mti.p(2), &mti.p(1), &incx, &mti.p(0), &incy);
945 #else
946  double a = mti.p(2);
947  scalar_type* itx = &mti.p(1);
948  scalar_type* ity = &mti.p(0);
949  *ity += a * (*itx);
950  for (size_type i=1; i < n; ++i) {
951  itx += incx;
952  ity += incy;
953  *ity += a * (*itx);
954  }
955 #endif
956  } while (mti.vnext());
957  return true;
958  } else
959  return false;
960  }
961  static void do_reduction2a(bgeot::multi_tensor_iterator &mti) {
962  if (!do_reduction2v(mti)) {
963  do {
964  mti.p(0) += mti.p(1)*mti.p(2);
965  } while (mti.bnext(0));
966  }
967  }
968 
969  static void do_reduction2b(bgeot::multi_tensor_iterator &mti) {
970  do {
971  scalar_type s1 = 0;
972  do {
973  scalar_type s2 = 0;
974  do {
975  s2 += mti.p(2);
976  } while (mti.bnext(2));
977  s1 += mti.p(1)*s2;
978  } while (mti.bnext(1));
979  mti.p(0) += s1;
980  } while (mti.bnext(0));
981  }
982 
983  static bool do_reduction3v(bgeot::multi_tensor_iterator &mti) {
984  size_type n = mti.vectorized_size();
985  const std::vector<stride_type> &s = mti.vectorized_strides();
986  if (n && s[0] && s[1] && s[2] == 0 && s[3] == 0) {
987 #if defined(GMM_USES_BLAS)
988  BLAS_INT nn = BLAS_INT(n), incx = s[1], incy = s[0];
989 #else
990  dim_type incx = dim_type(s[1]), incy = dim_type(s[0]);
991 #endif
992  do {
993  double a = mti.p(2)*mti.p(3);
994 #if defined(GMM_USES_BLAS)
995  gmm::daxpy_(&nn, &a, &mti.p(1), &incx, &mti.p(0), &incy);
996 #else
997  scalar_type* itx = &mti.p(1);
998  scalar_type* ity = &mti.p(0);
999  *ity += a * (*itx);
1000  for (size_type i=1; i < n; ++i) {
1001  itx += incx;
1002  ity += incy;
1003  *ity += a * (*itx);
1004  }
1005 #endif
1006  } while (mti.vnext());
1007  return true;
1008  } else
1009  return false;
1010  }
1011 
1012  static void do_reduction3a(bgeot::multi_tensor_iterator &mti) {
1013  if (!do_reduction3v(mti)) {
1014  do {
1015  mti.p(0) += mti.p(1)*mti.p(2)*mti.p(3);
1016  } while (mti.bnext(0));
1017  }
1018  }
1019 
1020  static void do_reduction3b(bgeot::multi_tensor_iterator &mti) {
1021  do {
1022  scalar_type s1 = 0;
1023  do {
1024  scalar_type s2 = 0;
1025  do {
1026  scalar_type s3 = 0;
1027  do {
1028  s3 += mti.p(3);
1029  } while (mti.bnext(3));
1030  s2 += mti.p(2)*s3;
1031  } while (mti.bnext(2));
1032  s1 += mti.p(1)*s2;
1033  } while (mti.bnext(1));
1034  mti.p(0) += s1;
1035  } while (mti.bnext(0));
1036  }
1037 
1038  void tensor_reduction::do_reduction() {
1039  /* on s'assure que le tenseur destination a bien ete remis a zero
1040  avant le calcul (c'est obligatoire malheureusement, consequence
1041  de l'utilisation de masque qui ne s'arretent pas forcement sur les
1042  'frontieres' entre les differents tenseurs reduits) */
1043  //std::fill(out_data.begin(), out_data.end(), 0.);
1044  if (out_data.size()) memset(&out_data[0], 0, out_data.size()*sizeof(out_data[0]));
1045  for (unsigned i=0; i < trtab.size(); ++i) {
1046  if (trtab[i].is_reduction()) {
1047  trtab[i].reduction->do_reduction();
1048  trtab[i].reduction->result(trtab[i].tr());
1049  //cout << "resultat intermediaire: " << trtab[i].tr() << endl;
1050  }
1051  }
1052  mti.rewind();
1053  dim_type N = dim_type(trtab.size());
1054  if (N == 1) {
1055  do_reduction1(mti);
1056  } else if (N == 2) {
1057  if (!mti.bnext_useful(2) && !mti.bnext_useful(1)) {
1058  do_reduction2a(mti);
1059  } else {
1060  do_reduction2b(mti);
1061  }
1062  } else if (N == 3) {
1063  if (!mti.bnext_useful(1) && (!mti.bnext_useful(2)) && !mti.bnext_useful(3)) {
1064  do_reduction3a(mti);
1065  } else {
1066  do_reduction3b(mti);
1067  }
1068  } else if (N == 4) {
1069  do {
1070  scalar_type s1 = 0;
1071  do {
1072  scalar_type s2 = 0;
1073  do {
1074  scalar_type s3 = 0;
1075  do {
1076  scalar_type s4 = 0;
1077  do {
1078  s4 += mti.p(4);
1079  } while (mti.bnext(4));
1080  s3 += mti.p(3)*s4;
1081  } while (mti.bnext(3));
1082  s2 += mti.p(2)*s3;
1083  } while (mti.bnext(2));
1084  s1 += mti.p(1)*s2;
1085  } while (mti.bnext(1));
1086  mti.p(0) += s1;
1087  } while (mti.bnext(0));
1088  } else {
1089  GMM_ASSERT1(false, "unhandled reduction case ! (N=" << int(N) << ")");
1090  }
1091  }
1092 
1093  void tensor_reduction::clear() {
1094  trtab.clear();
1095  trres.clear();
1096  reduced_range.resize(0);
1097  reduction_chars.clear();
1098 
1099  out_data.resize(0);
1100  pout_data = 0; trtab.reserve(10);
1101  mti.clear();
1102  }
1103 
1104 
1105  void tensor_mask::print(std::ostream &o) const {
1106  index_type c=card(true);
1107  check_assertions();
1108  o << " mask : card=" << c << "(card_=" << card_ << ", uptodate=" << card_uptodate << "), indexes=";
1109  for (dim_type i=0; i < idxs.size(); ++i)
1110  o << (i==0?"":", ") << int(idxs[i]) << ":" << int(r[i]);
1111  o << " ";
1112  if (c == size()) o << " FULL" << endl;
1113  else {
1114  o << "m={";
1115  if (idxs.size() == 1) {
1116  for (index_type i=0; i < m.size(); ++i)
1117  o << (m[i] ? 1 : 0);
1118  } else {
1119  for (tensor_ranges_loop l(r); !l.finished(); l.next()) {
1120  if (l.cnt[0] == 0 && l.cnt[1] == 0 && r.size()>2) {
1121  o << "\n -> (:,:";
1122  for (dim_type i=2; i < r.size(); ++i)
1123  o << "," << l.cnt[i];
1124  o << ")={";
1125  }
1126  o << (m[lpos(l.cnt)] ? 1 : 0);
1127  if (l.cnt[0] == r[0]-1) {
1128  if (l.cnt[1] != r[1]-1) o << ",";
1129  else if (idxs.size() > 2) o << "}";
1130  }
1131  }
1132  }
1133  o << "}" << endl;
1134  }
1135  }
1136 
1137 
1138 
1139  void tensor_shape::print(std::ostream& o) const {
1140  o << " tensor_shape: n=" << idx2mask.size() << ", idx2mask=";
1141  for (dim_type i=0; i < idx2mask.size(); ++i) {
1142  if (i) o << ",";
1143  if (idx2mask[i].is_valid()) {
1144  o << "r" << dim(i) << ":m" << int(idx2mask[i].mask_num) << "/" << int(idx2mask[i].mask_dim);
1145  } else o << " (na) ";
1146  }
1147  o << endl;
1148  // o << " masks[1.."<< masks_.size() << "]={" << endl;
1149  for (dim_type i=0; i < masks_.size(); ++i) o << masks_[i];
1150  o << " ^-- end tensor_shape" << endl;
1151  }
1152 
1153  void tensor_ref::print(std::ostream& o) const {
1154  o << "tensor_ref, n=" << int(ndim()) << ", card=" << card(true) << ", base=" << base() << endl;
1155  for (dim_type i=0; i < strides().size(); ++i) {
1156  o << " * strides["<<i<<"]={";
1157  for (size_type j=0; j < strides()[i].size(); ++j) o << (j>0?",":"") << strides()[i][j];
1158  o << "}" << endl;
1159  }
1160  multi_tensor_iterator mti(*this, true);
1161  do {
1162  for (dim_type i = 0; i < mti.ndim(); ++i) {
1163  o << (i==0?"(":",");
1164  if (index_is_valid(i))
1165  o << mti.index(i);
1166  else o << "*";
1167  }
1168  o << ")";
1169  if (base()) {
1170  o << " = " << mti.p(0) << "\t@base+" << &mti.p(0) - base();
1171  } else o << "\t@" << size_t(&mti.p(0))/sizeof(scalar_type);
1172  o << endl;
1173  } while (mti.qnext1());
1174 
1175  o << "^---- end tensor_ref" << endl;
1176  }
1177 
1178  std::ostream& operator<<(std::ostream& o, const tensor_mask& m) {
1179  m.print(o); return o;
1180  }
1181  std::ostream& operator<<(std::ostream& o, const tensor_shape& ts) {
1182  ts.print(o); return o;
1183  }
1184  std::ostream& operator<<(std::ostream& o, const tensor_ref& tr) {
1185  tr.print(o); return o;
1186  }
1187  void print_tm(const tensor_mask& tm) { tm.print_(); }
1188  void print_ts(const tensor_shape& ts) { ts.print_(); }
1189  void print_tr(const tensor_ref& tr) { tr.print_(); }
1190 }
Sparse tensors, used during the assembly.
void clear(L &l)
clear (fill with zeros) a vector or matrix.
Definition: gmm_blas.h:58
void resize(V &v, size_type n)
*‍/
Definition: gmm_blas.h:209
gmm interface for fortran BLAS.
Basic Geometric Tools.
std::ostream & operator<<(std::ostream &o, const convex_structure &cv)
Print the details of the convex structure cvs to the output stream o.
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:48