51 S, row_ind.size(), &*row_ind.begin(), col_ind.size(), &*col_ind.begin(),
52 &*
m.data().begin(), iora
73 template <
typename OP_SCHUR_ASSEMBLE_BASE>
91 std::vector<std::string> fields_name,
92 std::vector<boost::shared_ptr<Range>> field_ents,
95 std::vector<bool> sym_schur,
bool symm_op =
true,
96 boost::shared_ptr<BlockStructure> diag_blocks =
nullptr);
112 std::vector<std::string> fields_name,
113 std::vector<boost::shared_ptr<Range>> field_ents,
116 std::vector<bool> sym_schur, std::vector<double> diag_eps,
118 boost::shared_ptr<BlockStructure> diag_blocks =
nullptr);
121 template <
typename I>
173 struct SchurElemMats :
public boost::enable_shared_from_this<SchurElemMats> {
198 template <
typename OP_SCHUR_ASSEMBLE_BASE>
213 member<SchurElemMats, const UId, &SchurElemMats::uidRow>,
214 member<SchurElemMats, const UId, &SchurElemMats::uidCol>
223 static boost::ptr_vector<MatrixDouble>
locMats;
286 const_mem_fun<Indexes, int, &Indexes::getLocRow>,
287 const_mem_fun<Indexes, int, &Indexes::getLocCol>,
288 const_mem_fun<Indexes, int, &Indexes::getNbRows>,
289 const_mem_fun<Indexes, int, &Indexes::getNbCols>>
297 const_mem_fun<Indexes, int, &Indexes::getRow>,
298 const_mem_fun<Indexes, int, &Indexes::getCol>,
299 const_mem_fun<Indexes, int, &Indexes::getNbRows>,
300 const_mem_fun<Indexes, int, &Indexes::getNbCols>>
306 const_mem_fun<Indexes, int, &Indexes::rowShift>
312 const_mem_fun<Indexes, int, &Indexes::colShift>
373 : iDX(idx), uidRow(uid_row), uidCol(uid_col) {}
387 auto get_row_indices = [&]() ->
const VectorInt & {
389 if (
auto stored_data_ptr =
391 return stored_data_ptr->entityIndices;
397 const auto &row_ind = get_row_indices();
400 const auto nb_rows = row_ind.size();
401 const auto nb_cols = col_ind.size();
404 if (mat.size1() != nb_rows) {
406 "Wrong mat size %d != %d", mat.size1(), nb_rows);
408 if (mat.size2() != nb_cols) {
410 "Wrong mat size %d != %d", mat.size2(), nb_cols);
415 auto get_uid = [](
auto &data) {
416 if (data.getFieldEntities().size() == 1) {
418 return data.getFieldEntities()[0]->getLocalUniqueId();
427 auto &uid0 = data.getFieldEntities()[0]->getLocalUniqueId();
433 for (
auto i = 1;
i < data.getFieldEntities().size(); ++
i) {
440 data.getFieldEntities()[
i]->getLocalUniqueId())
454 auto uid_row = get_uid(row_data);
455 auto uid_col = get_uid(col_data);
459 .find(boost::make_tuple(uid_row, uid_col));
491 auto asmb = [&](
auto &sm) {
492 sm.resize(nb_rows, nb_cols,
false);
496 asmb((*p.first)->getMat());
498 auto add_indices = [](
auto &storage,
auto &
ind) {
499 storage.resize(
ind.size(),
false);
500 noalias(storage) =
ind;
503 add_indices((*p.first)->getRowInd(), row_ind);
504 add_indices((*p.first)->getColInd(), col_ind);
509 auto asmb = [&](
auto &sm) {
513 if (sm.size1() != nb_rows) {
515 "Wrong mat or storage size %d != %d", sm.size1(), nb_rows);
517 if (sm.size2() != nb_cols) {
519 "Wrong mat or storage size %d != %d", sm.size2(), nb_cols);
532 "Assembly type not implemented");
537 CHKERR asmb((*it)->getMat());
556 MOFEM_LOG(
"SELF", Sev::noisy) <<
"Schur assemble begin";
564 template <
typename OP_SCHUR_ASSEMBLE_BASE>
566 std::vector<std::string> fields_name,
567 std::vector<boost::shared_ptr<Range>> field_ents,
570 std::vector<bool> sym_schur, std::vector<double> diag_eps,
bool symm_op,
571 boost::shared_ptr<BlockStructure> diag_blocks)
572 :
OP(
NOSPACE,
OP::OPSPACE, symm_op), fieldsName(fields_name),
573 fieldEnts(field_ents), sequenceOfAOs(sequence_of_aos),
574 sequenceOfMats(sequence_of_mats), symSchur(sym_schur), diagEps(diag_eps),
575 diagBlocks(diag_blocks) {}
577 template <
typename OP_SCHUR_ASSEMBLE_BASE>
579 std::vector<std::string> fields_name,
580 std::vector<boost::shared_ptr<Range>> field_ents,
583 std::vector<bool> sym_schur,
bool symm_op,
584 boost::shared_ptr<BlockStructure> diag_blocks)
586 fields_name, field_ents, sequence_of_aos, sequence_of_mats, sym_schur,
587 std::vector<
double>(fields_name.size(), 0), symm_op, diag_blocks) {}
589 template <
typename OP_SCHUR_ASSEMBLE_BASE>
590 template <
typename I>
597 PetscLogEventBegin(SchurEvents::MOFEM_EVENT_opSchurAssembleEnd, 0, 0, 0, 0);
602 MOFEM_LOG(
"SELF", Sev::noisy) <<
"Schur assemble begin -> end";
606 auto get_field_name = [&](
auto uid) {
608 FieldEntity::getFieldBitNumberFromUniqueId(uid)));
615 for (
auto &s : storage) {
616 auto &
m = s->getMat();
618 auto &row_ind = s->getRowInd();
619 auto &col_ind = s->getColInd();
621 if (
auto ierr = this->assembleSchurMat(
623 M, s->uidRow, row_ind, s->uidCol, col_ind,
m, ADD_VALUES
627 auto field_ents = OP::getPtrFE()->mField.get_field_ents();
628 auto row_ent_it = field_ents->find(s->uidRow);
629 auto col_ent_it = field_ents->find(s->uidCol);
631 if (row_ent_it != field_ents->end())
633 <<
"Assemble row entity: " << (*row_ent_it)->getName() <<
" "
634 << (*col_ent_it)->getEntTypeName() <<
" side "
635 << (*row_ent_it)->getSideNumber();
636 if (col_ent_it != field_ents->end())
638 <<
"Assemble col entity: " << (*col_ent_it)->getName() <<
" "
639 << (*col_ent_it)->getEntTypeName() <<
" side "
640 << (*col_ent_it)->getSideNumber();
650 auto apply_schur = [&](
auto &storage,
652 auto lo_uid,
auto hi_uid,
654 double eps,
bool symm) {
658 auto add_off_mat = [&](
auto uid_row,
auto uid_col,
auto &row_ind,
659 auto &col_ind,
auto &offMatInvDiagOffMat) {
662 auto it = storage.template get<SchurElemMats::uid_mi_tag>().find(
663 boost::make_tuple(uid_row, uid_col));
665 if (it == storage.template get<SchurElemMats::uid_mi_tag>().end()) {
667 const auto size = SchurElemMats::locMats.size();
669 if (SchurElemMats::maxIndexCounter == size) {
671 SchurElemMats::rowIndices.push_back(
new VectorInt());
672 SchurElemMats::colIndices.push_back(
new VectorInt());
674 SchurElemMats::maxIndexCounter, uid_row, uid_col));
676 SchurElemMats::schurElemMats[SchurElemMats::maxIndexCounter].uidRow =
678 SchurElemMats::schurElemMats[SchurElemMats::maxIndexCounter].uidCol =
682 auto p = SchurElemMats::schurL2Storage.emplace(
683 &SchurElemMats::schurElemMats[SchurElemMats::maxIndexCounter++]);
691 auto &mat = (*p.first)->
getMat();
692 auto &set_row_ind = (*p.first)->getRowInd();
693 auto &set_col_ind = (*p.first)->getColInd();
695 set_row_ind.resize(row_ind.size(),
false);
696 noalias(set_row_ind) = row_ind;
697 set_col_ind.resize(col_ind.size(),
false);
698 noalias(set_col_ind) = col_ind;
700 mat.resize(offMatInvDiagOffMat.size1(), offMatInvDiagOffMat.size2(),
702 noalias(mat) = offMatInvDiagOffMat;
706 MOFEM_LOG(
"SELF", Sev::noisy) <<
"insert: " << get_field_name(uid_row)
707 <<
" " << get_field_name(uid_col) <<
" "
708 << mat.size1() <<
" " << mat.size2();
714 auto &mat = (*it)->getMat();
717 MOFEM_LOG(
"SELF", Sev::noisy) <<
"add: " << get_field_name(uid_row)
718 <<
" " << get_field_name(uid_col) <<
" "
719 << mat.size1() <<
" " << mat.size2();
724 if (mat.size1() != offMatInvDiagOffMat.size1()) {
726 "Wrong size %d != %d", mat.size1(),
727 offMatInvDiagOffMat.size1());
729 if (mat.size2() != offMatInvDiagOffMat.size2()) {
731 "Wrong size %d != %d", mat.size2(),
732 offMatInvDiagOffMat.size2());
735 mat += offMatInvDiagOffMat;
740 auto get_row_view = [&]() {
742 storage.template get<SchurElemMats::uid_mi_tag>().lower_bound(
743 boost::make_tuple(lo_uid, 0));
745 storage.template get<SchurElemMats::uid_mi_tag>().upper_bound(
748 std::vector<const SchurElemMats *> schur_row_ptr_view;
749 schur_row_ptr_view.reserve(std::distance(row_it, hi_row_it));
750 for (; row_it != hi_row_it; ++row_it) {
751 schur_row_ptr_view.push_back(*row_it);
753 return schur_row_ptr_view;
756 auto get_col_view = [&]() {
758 storage.template get<SchurElemMats::col_mi_tag>().lower_bound(lo_uid);
760 storage.template get<SchurElemMats::col_mi_tag>().upper_bound(hi_uid);
761 std::vector<const SchurElemMats *> schur_col_ptr_view;
762 schur_col_ptr_view.reserve(std::distance(col_it, hi_col_it));
763 for (; col_it != hi_col_it; ++col_it) {
764 schur_col_ptr_view.push_back(*col_it);
766 return schur_col_ptr_view;
769 auto schur_row_ptr_view = get_row_view();
770 auto schur_col_ptr_view = get_col_view();
773 for (
auto row_it : schur_row_ptr_view) {
775 if (row_it->uidRow == row_it->uidCol) {
779 <<
"invert: uid_row " << get_field_name(row_it->uidRow)
780 <<
" row uid " << get_field_name(row_it->uidCol) <<
" : "
781 << (*row_it)->getMat().size1() <<
" "
782 << (*row_it)->getMat().size2();
787 CHKERR I::invertMat(row_it, invMat,
eps);
790 for (
auto c_lo : schur_col_ptr_view) {
792 auto &uid_row = c_lo->uidRow;
793 if (uid_row == row_it->uidRow) {
797 auto &cc_off_mat = c_lo->getMat();
798 invDiagOffMat.resize(cc_off_mat.size1(), invMat.size2(),
false);
800 if (invMat.size1() != cc_off_mat.size2()) {
802 <<
"row_uid " << get_field_name(row_it->uidRow) <<
" row uid "
803 << get_field_name(uid_row);
805 "Wrong size %d != %d", invMat.size1(), cc_off_mat.size2());
811 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
812 cc_off_mat.size1(), invMat.size2(), cc_off_mat.size2(),
813 1., &*cc_off_mat.data().begin(), cc_off_mat.size2(),
814 &*invMat.data().begin(), invMat.size2(), 0.,
815 &*invDiagOffMat.data().begin(), invDiagOffMat.size2());
818 for (
auto r_lo : schur_row_ptr_view) {
820 auto &uid_col = r_lo->uidCol;
823 if (uid_col == row_it->uidRow) {
828 if (symm && uid_row > uid_col) {
832 auto &rr_off_mat = r_lo->getMat();
833 offMatInvDiagOffMat.resize(invDiagOffMat.size1(),
834 rr_off_mat.size2(),
false);
836 if (rr_off_mat.size1() != invDiagOffMat.size2()) {
838 <<
"uid_row " << get_field_name(row_it->uidRow)
839 <<
": col uid " << get_field_name(uid_col) <<
" row uid "
840 << get_field_name(uid_row);
842 "Wrong size %d != %d", rr_off_mat.size1(),
843 invDiagOffMat.size2());
849 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
850 invDiagOffMat.size1(), rr_off_mat.size2(),
851 invDiagOffMat.size2(), 1.,
852 &*invDiagOffMat.data().begin(), invDiagOffMat.size2(),
853 &*rr_off_mat.data().begin(), rr_off_mat.size2(), 0.,
854 &*offMatInvDiagOffMat.data().begin(),
855 offMatInvDiagOffMat.size2());
858 CHKERR add_off_mat(uid_row, uid_col, c_lo->getRowInd(),
859 r_lo->getColInd(), offMatInvDiagOffMat);
861 if (symm && uid_row != uid_col) {
862 transOffMatInvDiagOffMat.resize(offMatInvDiagOffMat.size2(),
863 offMatInvDiagOffMat.size1(),
865 noalias(transOffMatInvDiagOffMat) = trans(offMatInvDiagOffMat);
866 CHKERR add_off_mat(uid_col, uid_row, r_lo->getColInd(),
867 c_lo->getRowInd(), transOffMatInvDiagOffMat);
877 auto erase_factored = [&](
auto &storage,
auto lo_uid,
auto hi_uid) {
880 auto r_lo = storage.template get<SchurElemMats::uid_mi_tag>().lower_bound(
881 boost::make_tuple(lo_uid, 0));
882 auto r_hi = storage.template get<SchurElemMats::uid_mi_tag>().upper_bound(
883 boost::make_tuple(hi_uid,
885 storage.template get<SchurElemMats::uid_mi_tag>().erase(r_lo, r_hi);
888 storage.template get<SchurElemMats::col_mi_tag>().lower_bound(lo_uid);
890 storage.template get<SchurElemMats::col_mi_tag>().upper_bound(hi_uid);
891 storage.template get<SchurElemMats::col_mi_tag>().erase(c_lo, c_hi);
896 auto assemble_S = [&](
auto &storage,
auto ao,
auto mat) {
901 for (
auto &
m : storage) {
902 auto &ind_row =
m->getRowInd();
903 CHKERR AOApplicationToPetsc(ao, ind_row.size(), &*ind_row.begin());
904 auto &ind_col =
m->getColInd();
905 CHKERR AOApplicationToPetsc(ao, ind_col.size(), &*ind_col.begin());
911 CHKERR assemble_mat(mat, storage);
917 auto assemble_mat_a00_solver = [&](
auto &&list) {
920 if (!diagBlocks->dataInvBlocksPtr)
924 for (
auto lo : list) {
925 auto &
m = lo->getMat();
926 if (
m.size1() &&
m.size2()) {
927 auto row_ind = lo->getRowInd()[0];
928 auto col_ind = lo->getColInd()[0];
929 auto nb_rows =
m.size1();
930 auto nb_cols =
m.size2();
931 auto it = diagBlocks->blockIndex.get<1>().find(
932 boost::make_tuple(row_ind, col_ind, nb_rows, nb_cols));
933 if (it != diagBlocks->blockIndex.get<1>().end()) {
934 auto inv_shift = it->getInvShift();
935 if (inv_shift != -1) {
936 auto *ptr = &((*diagBlocks->dataInvBlocksPtr)[inv_shift]);
939 std::copy(
m.data().begin(),
m.data().end(), ptr);
947 auto assemble_inv_diag_a00_solver = [&](
auto &&list) {
950 if (!diagBlocks->dataInvBlocksPtr)
954 for (
auto lo : list) {
955 if (invMat.size1() && invMat.size2()) {
956 auto row_ind = lo->getRowInd()[0];
957 auto col_ind = lo->getColInd()[0];
958 auto nb_rows = invMat.size1();
959 auto nb_cols = invMat.size2();
960 auto it = diagBlocks->blockIndex.get<1>().find(
961 boost::make_tuple(row_ind, col_ind, nb_rows, nb_cols));
962 if (it != diagBlocks->blockIndex.get<1>().end()) {
963 auto inv_shift = it->getInvShift();
968 auto *ptr = &((*diagBlocks->dataInvBlocksPtr)[inv_shift]);
970 std::copy(invMat.data().begin(), invMat.data().end(), ptr);
977 auto gey_a00_diag_list = [&](
auto &storage,
auto lo_uid,
auto hi_uid) {
978 std::vector<const SchurElemMats *> list;
979 auto lo = storage.template get<SchurElemMats::uid_mi_tag>().lower_bound(
980 boost::make_tuple(lo_uid, 0));
981 auto hi = storage.template get<SchurElemMats::uid_mi_tag>().upper_bound(
982 boost::make_tuple(hi_uid,
984 list.reserve(std::distance(lo, hi));
985 for (
auto it = lo; it != hi; ++it) {
986 if ((*it)->getRowInd()[0] == (*it)->getColInd()[0]) {
993 auto get_a00_row_list = [&](
auto &storage,
auto lo_uid,
auto hi_uid) {
994 std::vector<const SchurElemMats *> list;
995 auto lo = storage.template get<SchurElemMats::uid_mi_tag>().lower_bound(
996 boost::make_tuple(lo_uid, 0));
997 auto hi = storage.template get<SchurElemMats::uid_mi_tag>().upper_bound(
998 boost::make_tuple(hi_uid,
1000 list.reserve(std::distance(lo, hi));
1001 for (
auto it = lo; it != hi; ++it) {
1002 if ((*it)->getRowInd()[0] != (*it)->getColInd()[0]) {
1003 list.push_back(*it);
1009 auto get_a00_col_list = [&](
auto &storage,
auto lo_uid,
auto hi_uid) {
1010 std::vector<const SchurElemMats *> list;
1012 storage.template get<SchurElemMats::col_mi_tag>().lower_bound(lo_uid);
1014 storage.template get<SchurElemMats::col_mi_tag>().upper_bound(hi_uid);
1015 list.reserve(std::distance(lo, hi));
1016 for (
auto it = lo; it != hi; ++it) {
1017 if ((*it)->getRowInd()[0] != (*it)->getColInd()[0]) {
1018 list.push_back(*it);
1024 auto get_a00_uids = [&]() {
1025 auto get_field_bit = [&](
auto &name) {
1026 return OP::getPtrFE()->mField.get_field_bit_number(name);
1029 std::vector<std::pair<UId, UId>> a00_uids;
1030 a00_uids.reserve(fieldsName.size());
1031 for (
auto ss = 0; ss != fieldsName.size(); ++ss) {
1032 auto field_bit = get_field_bit(fieldsName[ss]);
1033 auto row_ents = fieldEnts[ss];
1035 for (
auto p = row_ents->pair_begin(); p != row_ents->pair_end(); ++p) {
1037 FieldEntity::getLoLocalEntityBitNumber(field_bit, p->first);
1039 FieldEntity::getHiLocalEntityBitNumber(field_bit, p->second);
1040 a00_uids.push_back(std::make_pair(lo_uid, hi_uid));
1043 auto lo_uid = FieldEntity::getLoLocalEntityBitNumber(
1044 field_bit, get_id_for_min_type<MBVERTEX>());
1045 auto hi_uid = FieldEntity::getHiLocalEntityBitNumber(
1046 field_bit, get_id_for_max_type<MBENTITYSET>());
1047 a00_uids.push_back(std::make_pair(lo_uid, hi_uid));
1054 auto list_storage = [&](
auto &storage) {
1057 for (
auto &p : storage) {
1059 <<
"List schur storage: " <<
i <<
" " << p->iDX <<
": "
1060 << get_field_name(p->uidRow) <<
" " << get_field_name(p->uidCol)
1061 <<
" : " << p->getMat().size1() <<
" " << p->getMat().size2();
1068 auto assemble = [&](
auto &&a00_uids) {
1070 auto &storage = SchurElemMats::schurL2Storage;
1072 for (
auto &p : a00_uids) {
1073 auto [lo_uid, hi_uid] = p;
1076 list_storage(storage);
1078 <<
"Schur assemble: " << get_field_name(lo_uid) <<
" - "
1079 << get_field_name(hi_uid);
1082 CHKERR apply_schur(storage, lo_uid, hi_uid, diagEps[ss], symSchur[ss]);
1084 CHKERR assemble_mat_a00_solver(
1085 get_a00_row_list(storage, lo_uid, hi_uid));
1086 CHKERR assemble_mat_a00_solver(
1087 get_a00_col_list(storage, lo_uid, hi_uid));
1088 CHKERR assemble_inv_diag_a00_solver(
1089 gey_a00_diag_list(storage, lo_uid, hi_uid));
1091 CHKERR erase_factored(storage, lo_uid, hi_uid);
1092 CHKERR assemble_S(storage, sequenceOfAOs[ss], sequenceOfMats[ss]);
1099 CHKERR assemble(get_a00_uids());
1103 MOFEM_LOG(
"SELF", Sev::noisy) <<
"Schur assemble done";
1107 PetscLogEventEnd(SchurEvents::MOFEM_EVENT_opSchurAssembleEnd, 0, 0, 0, 0);
1122 const int nb =
m.size1();
1125 for (
int cc = 0; cc != nb; ++cc) {
1130 inv.resize(nb, nb,
false);
1132 auto ptr = &*inv.data().begin();
1133 for (
int c = 0;
c != nb; ++
c, ptr += nb + 1)
1135 ipiv.resize(nb,
false);
1136 lapack_work.resize(nb * nb,
false);
1138 lapack_dsysv(
'L', nb, nb, &*
m.data().begin(), nb, &*ipiv.begin(),
1139 &*inv.data().begin(), nb, &*lapack_work.begin(), nb * nb);
1142 "Can not invert matrix info = %d", info);
1155 const auto nb =
m.size1();
1157 if (nb !=
m.size2()) {
1159 "It should be square matrix %d != %d", nb,
m.size2());
1164 for (
int cc = 0; cc != nb; ++cc) {
1169 inv.resize(nb, nb,
false);
1171 auto ptr = &*inv.data().begin();
1172 for (
int c = 0;
c != nb; ++
c, ptr += nb + 1)
1174 ipiv.resize(nb,
false);
1175 const auto info =
lapack_dgesv(nb, nb, &*
m.data().begin(), nb,
1176 &*ipiv.begin(), &*inv.data().begin(), nb);
1179 "Can not invert matrix info = %d", info);
1187 return doWorkImpl<SchurDSYSV>(side,
type, data);
1193 return doWorkImpl<SchurDGESV>(side,
type, data);
1203 auto cmp_uid_lo = [](
const boost::weak_ptr<FieldEntity> &
a,
const UId &b) {
1204 if (
auto a_ptr =
a.lock()) {
1205 if (a_ptr->getLocalUniqueId() < b)
1214 auto cmp_uid_hi = [](
const UId &b,
const boost::weak_ptr<FieldEntity> &
a) {
1215 if (
auto a_ptr =
a.lock()) {
1216 if (b < a_ptr->getLocalUniqueId())
1226 auto get_uid_pair = [](
const auto &field_id) {
1227 auto lo_uid = FieldEntity::getLocalUniqueIdCalculate(
1228 field_id, get_id_for_min_type<MBVERTEX>());
1229 auto hi_uid = FieldEntity::getLocalUniqueIdCalculate(
1230 field_id, get_id_for_max_type<MBENTITYSET>());
1231 return std::make_pair(lo_uid, hi_uid);
1236 auto lo = std::lower_bound(field_ents.begin(), field_ents.end(),
1238 auto hi = std::upper_bound(field_ents.begin(), field_ents.end(),
1240 return std::make_pair(lo, hi);
1245 auto row_extractor = [](
auto &e) {
return e->entityCacheRowDofs; };
1246 auto col_extractor = [](
auto &e) {
return e->entityCacheColDofs; };
1248 auto extract_data = [](
auto &&its,
auto extractor) {
1249 std::vector<std::tuple<int, int, int>> data;
1250 data.reserve(std::distance(its.first, its.second));
1252 for (; its.first != its.second; ++its.first) {
1253 if (
auto e = its.first->lock()) {
1254 if (
auto cache = extractor(e).lock()) {
1255 auto nb_dofs = std::distance(cache->loHi[0], cache->loHi[1]);
1257 auto glob = (*cache->loHi[0])->getPetscGlobalDofIdx();
1258 auto loc = (*cache->loHi[0])->getPetscLocalDofIdx();
1259 data.emplace_back(glob, nb_dofs, loc);
1263 for (
auto lo = cache->loHi[0]; lo != cache->loHi[1]; ++lo) {
1264 auto glob = (*lo)->getPetscGlobalDofIdx();
1267 "Wrong global index");
1279 auto data_ptr = boost::make_shared<BlockStructure>();
1283 auto fe_method = boost::shared_ptr<MoFEM::FEMethod>(
new MoFEM::FEMethod());
1285 for (
auto &
d : schur_fe_op_vec) {
1288 auto get_bit_numbers = [&
d](
auto op) {
1289 std::vector<FieldBitNumber> bit_numbers(
d.second.size());
1295 auto row_bit_numbers = get_bit_numbers(
1296 [&](
auto &p) {
return m_field_ptr->get_field_bit_number(p.first); });
1298 auto col_bit_numbers = get_bit_numbers(
1299 [&](
auto &p) {
return m_field_ptr->get_field_bit_number(p.second); });
1301 fe_method->preProcessHook = []() {
return 0; };
1302 fe_method->postProcessHook = []() {
return 0; };
1303 fe_method->operatorHook = [&]() {
1306 for (
auto f = 0;
f != row_bit_numbers.size(); ++
f) {
1309 extract_data(get_it_pair(fe_method->getRowFieldEnts(),
1310 get_uid_pair(row_bit_numbers[
f])),
1313 extract_data(get_it_pair(fe_method->getColFieldEnts(),
1314 get_uid_pair(col_bit_numbers[
f])),
1317 for (
auto &
r : row_data) {
1318 auto [r_glob, r_nb_dofs, r_loc] =
r;
1319 for (
auto &
c : col_data) {
1320 auto [c_glob, c_nb_dofs, c_loc] =
c;
1321 if (r_glob != -1 && c_glob != -1) {
1323 r_glob, c_glob, r_nb_dofs, c_nb_dofs, r_loc, c_loc, -1, -1});
1333 m_field_ptr->get_comm_size());
1338 for (
auto &
v : data_ptr->blockIndex.get<0>()) {
1339 v.getMatShift() = mem_size;
1340 mem_size +=
v.getNbCols() *
v.getNbRows();
1343 std::vector<double> tmp;
1344 if (mem_size > tmp.max_size())
1347 data_ptr->dataBlocksPtr =
1348 boost::make_shared<std::vector<double>>(mem_size, 0);
1352 CHKERR VecSetDM(ghost_x, PETSC_NULL);
1353 CHKERR VecSetDM(ghost_y, PETSC_NULL);
1355 data_ptr->ghostX = ghost_x;
1356 data_ptr->ghostY = ghost_y;
1381 const PetscInt rows[], PetscScalar diag,
1386 CHKERR MatShellGetContext(
A, (
void **)&ctx);
1388 PetscLogEventBegin(SchurEvents::MOFEM_EVENT_zeroRowsAndCols, 0, 0, 0, 0);
1390 const int *ptr = &rows[0];
1395 CHKERR PetscObjectGetComm((PetscObject)
A, &comm);
1397 MPI_Comm_size(comm, &size);
1403 CHKERR ISGetSize(is_local, &is_nb_rows);
1406 CHKERR ISView(is_local, PETSC_VIEWER_STDOUT_WORLD);
1409 CHKERR ISGetIndices(is_local, &ptr);
1413 CHKERR MatGetLocalSize(
A, &loc_m, &loc_n);
1415 for (
auto n = 0;
n != is_nb_rows; ++
n) {
1417 auto rlo = ctx->blockIndex.get<2>().lower_bound(row);
1418 auto rhi = ctx->blockIndex.get<2>().end();
1419 for (; rlo != rhi; ++rlo) {
1420 auto r_shift = row - rlo->getRow();
1421 if (r_shift >= 0 && r_shift < rlo->getNbRows()) {
1422 auto *ptr = &(*ctx->dataBlocksPtr)[rlo->getMatShift()];
1423 for (
auto i = 0;
i != rlo->getNbCols(); ++
i) {
1424 ptr[
i + r_shift * rlo->getNbCols()] = 0;
1426 }
else if (rlo->getRow() + rlo->getNbRows() > row) {
1432 for (
auto n = 0;
n != is_nb_rows; ++
n) {
1434 auto clo = ctx->blockIndex.get<3>().lower_bound(col);
1435 auto chi = ctx->blockIndex.get<3>().end();
1436 for (; clo != chi; ++clo) {
1437 auto c_shift = col - clo->getCol();
1438 if (c_shift >= 0 && c_shift < clo->getNbCols()) {
1440 auto *ptr = &(*ctx->dataBlocksPtr)[clo->getMatShift()];
1441 for (
auto i = 0;
i != clo->getNbRows(); ++
i) {
1442 ptr[c_shift +
i * clo->getNbCols()] = 0;
1448 clo->getRow() == clo->getCol() && clo->getLocRow() < loc_m &&
1449 clo->getLocCol() < loc_n
1452 auto r_shift = col - clo->getCol();
1453 if (r_shift >= 0 && r_shift < clo->getNbRows()) {
1454 ptr[c_shift + r_shift * clo->getNbCols()] = diag;
1457 }
else if (clo->getCol() + clo->getNbCols() > col) {
1464 CHKERR ISRestoreIndices(is_local, &ptr);
1467 PetscLogEventEnd(SchurEvents::MOFEM_EVENT_zeroRowsAndCols, 0, 0, 0, 0);
1475 CHKERR MatShellGetContext(
m, (
void **)&ctx);
1476 if (ctx->dataBlocksPtr)
1477 std::fill(ctx->dataBlocksPtr->begin(), ctx->dataBlocksPtr->end(), 0.);
1478 if (ctx->dataInvBlocksPtr)
1479 std::fill(ctx->dataInvBlocksPtr->begin(), ctx->dataInvBlocksPtr->end(), 0.);
1480 if (ctx->preconditionerBlocksPtr)
1481 std::fill(ctx->preconditionerBlocksPtr->begin(),
1482 ctx->preconditionerBlocksPtr->end(), 0.);
1488 CHKERR MatShellSetManageScalingShifts(mat_raw);
1489 CHKERR MatShellSetOperation(mat_raw, MATOP_MULT, (
void (*)(
void))
mult);
1490 CHKERR MatShellSetOperation(mat_raw, MATOP_MULT_ADD,
1492 CHKERR MatShellSetOperation(mat_raw, MATOP_SOLVE, (
void (*)(
void))
solve);
1493 CHKERR MatShellSetOperation(mat_raw, MATOP_SOLVE_ADD,
1495 CHKERR MatShellSetOperation(mat_raw, MATOP_ZERO_ENTRIES,
1497 CHKERR MatShellSetOperation(mat_raw, MATOP_ZERO_ROWS_COLUMNS,
1504 boost::shared_ptr<BlockStructure> data) {
1507 auto nb_local = problem_ptr->nbLocDofsRow;
1508 auto nb_global = problem_ptr->nbDofsRow;
1511 if (nb_local != problem_ptr->nbLocDofsCol) {
1513 <<
"Wrong size " << nb_local <<
" != " << problem_ptr->nbLocDofsCol;
1515 "nb. cols is inconsistent with nb. rows");
1517 if (nb_global != problem_ptr->nbDofsCol) {
1519 <<
"Wrong size " << nb_global <<
" != " << problem_ptr->nbDofsCol;
1521 "nb. cols is inconsistent with nb. rows");
1526 CHKERR PetscObjectGetComm((PetscObject)dm, &comm);
1529 CHKERR MatCreateShell(comm, nb_local, nb_local, nb_global, nb_global,
1530 (
void *)data.get(), &mat_raw);
1543 CHKERR MatShellGetContext(mat, (
void **)&ctx);
1545 PetscLogEventBegin(SchurEvents::MOFEM_EVENT_BlockStructureMult, 0, 0, 0, 0);
1548 CHKERR VecGetLocalSize(x, &x_loc_size);
1550 CHKERR VecGetLocalSize(y, &y_loc_size);
1552 Vec ghost_x = ctx->ghostX;
1553 Vec ghost_y = ctx->ghostY;
1555 CHKERR VecCopy(x, ghost_x);
1559 CHKERR VecGhostGetLocalForm(ghost_y, &loc_ghost_y);
1561 CHKERR VecGetLocalSize(loc_ghost_y, &nb_y);
1562 CHKERR VecGetArray(loc_ghost_y, &y_array);
1563 for (
auto i = 0;
i != nb_y; ++
i)
1565 CHKERR VecRestoreArray(loc_ghost_y, &y_array);
1566 CHKERR VecGhostRestoreLocalForm(ghost_y, &loc_ghost_y);
1568 auto mult = [&](
int low_x,
int hi_x,
int low_y,
int hi_y) {
1573 CHKERR VecGhostGetLocalForm(ghost_x, &loc_ghost_x);
1574 CHKERR VecGetArray(loc_ghost_x, &x_array);
1578 CHKERR VecGhostGetLocalForm(ghost_y, &loc_ghost_y);
1580 CHKERR VecGetLocalSize(loc_ghost_y, &nb_y);
1581 CHKERR VecGetArray(loc_ghost_y, &y_array);
1583 double *block_ptr = &*ctx->dataBlocksPtr->begin();
1584 auto it = ctx->blockIndex.get<0>().lower_bound(0);
1585 auto hi = ctx->blockIndex.get<0>().end();
1588 if (it->getLocRow() < low_y || it->getLocRow() >= hi_y ||
1589 it->getLocCol() < low_x || it->getLocCol() >= hi_x) {
1594 auto nb_rows = it->getNbRows();
1595 auto nb_cols = it->getNbCols();
1596 auto x_ptr = &x_array[it->getLocCol()];
1597 auto y_ptr = &y_array[it->getLocRow()];
1598 auto ptr = &block_ptr[it->getMatShift()];
1601 cblas_dgemv(CblasRowMajor, CblasNoTrans, nb_rows, nb_cols, 1.0, ptr,
1602 nb_cols, x_ptr, 1, 1.0, y_ptr, 1);
1604 for (
auto r = 0;
r != nb_rows; ++
r) {
1605 for (
auto c = 0;
c != nb_cols; ++
c) {
1606 y_ptr[
r] += ptr[
r * nb_cols +
c] * x_ptr[
c];
1613 if (ctx->multiplyByPreconditioner) {
1615 if (!ctx->preconditionerBlocksPtr)
1617 "No parentBlockStructurePtr");
1619 auto preconditioner_ptr = &*ctx->preconditionerBlocksPtr->begin();
1621 auto it = ctx->blockIndex.get<0>().lower_bound(0);
1622 auto hi = ctx->blockIndex.get<0>().end();
1625 if (it->getLocRow() < low_y || it->getLocRow() >= hi_y ||
1626 it->getLocCol() < low_x || it->getLocCol() >= hi_x) {
1631 if (it->getInvShift() != -1) {
1632 auto nb_rows = it->getNbRows();
1633 auto nb_cols = it->getNbCols();
1634 auto x_ptr = &x_array[it->getLocCol()];
1635 auto y_ptr = &y_array[it->getLocRow()];
1636 auto ptr = &preconditioner_ptr[it->getInvShift()];
1638 cblas_dgemv(CblasRowMajor, CblasNoTrans, nb_rows, nb_cols, 1.0, ptr,
1639 nb_cols, x_ptr, 1, 1.0, y_ptr, 1);
1641 for (
auto r = 0;
r != nb_rows; ++
r) {
1642 for (
auto c = 0;
c != nb_cols; ++
c) {
1643 y_ptr[
r] += ptr[
r * nb_cols +
c] * x_ptr[
c];
1653 CHKERR VecRestoreArray(loc_ghost_x, &x_array);
1654 CHKERR VecRestoreArray(loc_ghost_y, &y_array);
1655 CHKERR VecGhostRestoreLocalForm(ghost_x, &loc_ghost_x);
1656 CHKERR VecGhostRestoreLocalForm(ghost_y, &loc_ghost_y);
1660 constexpr
auto max_int = std::numeric_limits<int>::max();
1661 CHKERR VecGhostUpdateBegin(ghost_x, INSERT_VALUES, SCATTER_FORWARD);
1663 CHKERR VecGhostUpdateEnd(ghost_x, INSERT_VALUES, SCATTER_FORWARD);
1664 CHKERR mult(x_loc_size, max_int, 0, max_int);
1666 CHKERR VecGhostUpdateBegin(ghost_y, ADD_VALUES, SCATTER_REVERSE);
1667 CHKERR VecGhostUpdateEnd(ghost_y, ADD_VALUES, SCATTER_REVERSE);
1671 CHKERR VecCopy(ghost_y, y);
1674 CHKERR VecAXPY(y, 1., ghost_y);
1682 auto print_norm = [&](
auto msg,
auto y) {
1685 CHKERR VecNorm(y, NORM_2, &norm);
1687 CHKERR VecGetLocalSize(y, &nb_loc_y);
1689 CHKERR VecGetSize(y, &nb_y);
1691 << msg <<
" " << nb_y <<
" " << nb_loc_y <<
" norm " << norm;
1697 print_norm(
"mult_schur_block_shell insert x", x);
1698 print_norm(
"mult_schur_block_shell insert y", y);
1701 print_norm(
"mult_schur_block_shell add x", x);
1702 print_norm(
"mult_schur_block_shell add y", y);
1711 PetscLogEventEnd(SchurEvents::MOFEM_EVENT_BlockStructureMult, 0, 0, 0, 0);
1720 CHKERR MatShellGetContext(mat, (
void **)&ctx);
1722 PetscLogEventBegin(SchurEvents::MOFEM_EVENT_BlockStructureSolve, 0, 0, 0, 0);
1725 Vec ghost_x = ctx->ghostY;
1726 Vec ghost_y = ctx->ghostX;
1728 CHKERR VecCopy(y, ghost_y);
1729 CHKERR VecZeroEntries(ghost_x);
1736 CHKERR VecGhostGetLocalForm(ghost_x, &loc_ghost_x);
1737 CHKERR VecGetArray(loc_ghost_x, &x_array);
1740 CHKERR VecGhostGetLocalForm(ghost_y, &loc_ghost_y);
1743 CHKERR VecZeroEntries(inv_y);
1744 CHKERR VecCopy(loc_ghost_y, add_y);
1745 double *y_inv_array;
1746 CHKERR VecGetArray(inv_y, &y_inv_array);
1747 double *y_add_array;
1748 CHKERR VecGetArray(add_y, &y_add_array);
1750 CHKERR VecGetArray(loc_ghost_y, &y_array);
1752 auto data_inv_blocks = ctx->dataInvBlocksPtr;
1753 auto inv_block_ptr = &*data_inv_blocks->begin();
1754 auto data_blocks = ctx->dataBlocksPtr;
1759 std::vector<double>
f;
1762 for (
auto s1 = 0; s1 != index_view->diagUpRange.size() - 1; ++s1) {
1763 auto lo = index_view->diagUpRange[s1];
1764 auto hi = index_view->diagUpRange[s1 + 1];
1767 auto diag_index_ptr =
1768 index_view->upView[lo];
1771 auto row = diag_index_ptr->getLocRow();
1772 auto col = diag_index_ptr->getLocCol();
1773 auto nb_rows = diag_index_ptr->getNbRows();
1774 auto nb_cols = diag_index_ptr->getNbCols();
1775 auto inv_shift = diag_index_ptr->getInvShift();
1777 for (; lo != hi; ++lo) {
1778 auto off_index_ptr = index_view->upView[lo];
1779 auto off_col = off_index_ptr->getLocCol();
1780 auto off_nb_cols = off_index_ptr->getNbCols();
1781 auto off_shift = off_index_ptr->getInvShift();
1783 if (off_shift == -1)
1786 auto ptr = &inv_block_ptr[off_shift];
1788 cblas_dgemv(CblasRowMajor, CblasNoTrans, nb_rows, off_nb_cols, -1.0,
1789 ptr, off_nb_cols, &y_inv_array[off_col], 1, 1.0,
1790 &y_add_array[row], 1);
1792 for (
auto r = 0;
r != nb_rows; ++
r) {
1793 for (
auto c = 0;
c != off_nb_cols; ++
c) {
1794 y_add_array[row +
r] -=
1795 ptr[
r * off_nb_cols +
c] * y_inv_array[off_col +
c];
1802 if (inv_shift == -1)
1804 if (inv_shift + nb_rows * nb_cols > data_inv_blocks->size())
1806 "inv_shift out of range %d > %d", inv_shift + nb_rows * nb_cols,
1807 data_inv_blocks->size());
1810 auto ptr = &inv_block_ptr[inv_shift];
1812 cblas_dgemv(CblasRowMajor, CblasNoTrans, nb_rows, nb_cols, -1.0, ptr,
1813 nb_cols, &y_add_array[col], 1, 1.0, &y_inv_array[row], 1);
1815 for (
auto r = 0;
r != nb_rows; ++
r) {
1816 for (
auto c = 0;
c != nb_cols; ++
c) {
1817 y_inv_array[row +
r] -= ptr[
r * nb_cols +
c] * y_add_array[col +
c];
1824 for (
auto s1 = 0; s1 != index_view->diagLoRange.size() - 1; ++s1) {
1825 auto lo = index_view->diagLoRange[s1];
1826 auto hi = index_view->diagLoRange[s1 + 1];
1829 auto diag_index_ptr =
1830 index_view->lowView[lo];
1833 auto row = diag_index_ptr->getLocRow();
1834 auto col = diag_index_ptr->getLocCol();
1835 auto nb_rows = diag_index_ptr->getNbRows();
1836 auto nb_cols = diag_index_ptr->getNbCols();
1837 auto inv_shift = diag_index_ptr->getInvShift();
1840 std::copy(&y_add_array[col], &y_add_array[col + nb_cols],
f.begin());
1842 for (; lo != hi; ++lo) {
1843 auto off_index_ptr = index_view->lowView[lo];
1844 auto off_col = off_index_ptr->getLocCol();
1845 auto off_nb_cols = off_index_ptr->getNbCols();
1846 auto off_shift = off_index_ptr->getInvShift();
1848 if (off_shift == -1)
1851 auto x_ptr = &x_array[off_col];
1852 auto ptr = &inv_block_ptr[off_shift];
1854 cblas_dgemv(CblasRowMajor, CblasNoTrans, nb_rows, off_nb_cols, -1.0,
1855 ptr, off_nb_cols, x_ptr, 1, 1.0, &
f[0], 1);
1857 for (
auto r = 0;
r != nb_rows; ++
r) {
1858 for (
auto c = 0;
c != off_nb_cols; ++
c) {
1859 f[
r] -= ptr[
r * off_nb_cols +
c] * x_ptr[
c];
1866 if (inv_shift == -1)
1868 if (inv_shift + nb_rows * nb_cols > data_inv_blocks->size())
1870 "inv_shift out of range %d > %d", inv_shift + nb_rows * nb_cols,
1871 data_inv_blocks->size());
1874 auto ptr = &inv_block_ptr[inv_shift];
1876 cblas_dgemv(CblasRowMajor, CblasNoTrans, nb_rows, nb_cols, -1.0, ptr,
1877 nb_cols, &
f[0], 1, 1.0, &x_array[row], 1);
1879 for (
auto r = 0;
r != nb_rows; ++
r) {
1880 for (
auto c = 0;
c != nb_cols; ++
c) {
1881 x_array[row +
r] -= ptr[
r * nb_cols +
c] *
f[
c];
1887 CHKERR VecRestoreArray(loc_ghost_x, &x_array);
1888 CHKERR VecRestoreArray(inv_y, &y_inv_array);
1889 CHKERR VecRestoreArray(add_y, &y_add_array);
1890 CHKERR VecRestoreArray(loc_ghost_y, &y_array);
1891 CHKERR VecGhostRestoreLocalForm(ghost_x, &loc_ghost_x);
1892 CHKERR VecGhostRestoreLocalForm(ghost_y, &loc_ghost_y);
1899 CHKERR VecCopy(ghost_x, x);
1902 CHKERR VecAXPY(x, 1., ghost_x);
1910 auto print_norm = [&](
auto msg,
auto y) {
1913 CHKERR VecNorm(y, NORM_2, &norm);
1915 CHKERR VecGetLocalSize(y, &nb_loc_y);
1917 CHKERR VecGetSize(y, &nb_y);
1919 << msg <<
" " << nb_y <<
" " << nb_loc_y <<
" norm " << norm;
1925 print_norm(
"solve_schur_block_shell insert x", x);
1926 print_norm(
"solve_schur_block_shell insert y", y);
1929 print_norm(
"solve_schur_block_shell add x", x);
1930 print_norm(
"solve_schur_block_shell add y", y);
1939 PetscLogEventEnd(SchurEvents::MOFEM_EVENT_BlockStructureSolve, 0, 0, 0, 0);
1949 boost::shared_ptr<std::vector<double>> data_blocks_ptr) {
1960 PetscLogEventBegin(SchurEvents::MOFEM_EVENT_BlockStructureSetValues, 0, 0, 0,
1965 auto get_row_data = [&]() -> std::pair<bool, VectorInt> {
1967 if (
auto stored_data_ptr =
1969 return std::make_pair(
true, stored_data_ptr->entityIndices);
1972 return std::make_pair(
false, row_data.
getIndices());
1975 auto row_indices = get_row_data();
1976 auto it_r = std::find(row_indices.second.begin(), row_indices.second.end(),
1977 -1) == row_indices.second.end();
1987 row_indices.second.size()
1998 boost::make_tuple(row_indices.second[0], col_data.
getIndices()[0],
1999 row_indices.second.size(),
2009 <<
"missing block: row " << row_data.
getFieldDofs()[0]->getName()
2016 auto shift = shift_extractor(&*it);
2019 auto nbr = row_indices.second.size();
2021 auto mat_row_ptr = &mat(0, 0);
2022 auto s_mat = &(*data_blocks_ptr)[shift];
2023 if (iora == ADD_VALUES) {
2024 for (
int r = 0;
r != nbr; ++
r) {
2025 for (
int c = 0;
c != nbc; ++
c) {
2026 *s_mat += *mat_row_ptr;
2032 for (
int r = 0;
r != nbr; ++
r) {
2033 for (
int c = 0;
c != nbc; ++
c) {
2034 *s_mat = *mat_row_ptr;
2047 std::vector<int> row_ent_idx;
2048 std::vector<int> col_ent_idx;
2050 auto get_ent_idx = [&](
auto lo,
auto hi,
auto &idx) {
2052 idx.reserve(std::distance(lo, hi));
2053 for (; lo != hi; ++lo) {
2054 idx.emplace_back((*lo)->getEntDofIdx());
2060 if (
auto r_cache = rent->entityCacheRowDofs.lock()) {
2062 auto rlo = r_cache->loHi[0];
2063 auto rhi = r_cache->loHi[1];
2067 get_ent_idx(rlo, rhi, row_ent_idx);
2068 auto gr = (*rlo)->getPetscGlobalDofIdx();
2069 auto nbr = std::distance(rlo, rhi);
2073 if (
auto c_cache = cent->entityCacheColDofs.lock()) {
2075 auto clo = c_cache->loHi[0];
2076 auto chi = c_cache->loHi[1];
2080 auto nbc = std::distance(clo, chi);
2083 boost::make_tuple(gr, (*clo)->getPetscGlobalDofIdx(), nbr, nbc)
2092 <<
"missing block: " << row_data.
getFieldDofs()[0]->getName()
2095 "Block not allocated");
2100 auto shift = shift_extractor(&*it);
2103 auto mat_row_ptr0 = &mat(row, col);
2104 auto s_mat = &(*data_blocks_ptr)[shift];
2108 nbr == rent->getNbDofsOnEnt() && nbc == cent->getNbDofsOnEnt()
2114 if (iora == ADD_VALUES) {
2115 for (
int r = 0;
r != nbr; ++
r) {
2116 auto mat_row_ptr = mat_row_ptr0 +
r * mat.size2();
2117 for (
int c = 0;
c != nbc; ++
c) {
2118 *s_mat += *mat_row_ptr;
2124 for (
int r = 0;
r != nbr; ++
r) {
2125 auto mat_row_ptr = mat_row_ptr0 +
r * mat.size2();
2126 for (
int c = 0;
c != nbc; ++
c) {
2127 *s_mat = *mat_row_ptr;
2136 nbc == cent->getNbDofsOnEnt()
2142 if (iora == ADD_VALUES) {
2143 for (
auto r : row_ent_idx) {
2144 auto mat_row_ptr = mat_row_ptr0 +
r * mat.size2();
2145 for (
int c = 0;
c != nbc; ++
c) {
2146 *s_mat += *mat_row_ptr;
2152 for (
auto r : row_ent_idx) {
2153 auto mat_row_ptr = mat_row_ptr0 +
r * mat.size2();
2154 for (
int c = 0;
c != nbc; ++
c) {
2155 *s_mat = *mat_row_ptr;
2164 get_ent_idx(clo, chi, col_ent_idx);
2165 auto row_idx = &row_indices.second[row];
2167 if (iora == ADD_VALUES) {
2168 for (
auto r : row_ent_idx) {
2169 auto mat_row_ptr = mat_row_ptr0 +
r * mat.size2();
2170 for (
auto c : col_ent_idx) {
2171 if (row_idx[
r] != -1 && col_idx[
c] != -1)
2172 *s_mat += mat_row_ptr[
c];
2177 for (
auto r : row_ent_idx) {
2178 auto mat_row_ptr = mat_row_ptr0 +
r * mat.size2();
2179 for (
auto c : col_ent_idx) {
2180 if (row_idx[
r] != -1 && col_idx[
c] != -1)
2181 *s_mat = mat_row_ptr[
c];
2189 col += cent->getNbDofsOnEnt();
2192 row += rent->getNbDofsOnEnt();
2197 PetscLogEventEnd(SchurEvents::MOFEM_EVENT_BlockStructureSetValues, 0, 0, 0,
2209 PetscBool is_mat_shell = PETSC_FALSE;
2210 PetscObjectTypeCompare((PetscObject)
M, MATSHELL, &is_mat_shell);
2213 CHKERR MatShellGetContext(
M, (
void **)&ctx);
2215 ctx, row_data, col_data, mat, iora,
2217 ctx->dataBlocksPtr);
2219 CHKERR MatSetValues<AssemblyTypeSelector<PETSC>>(
M, row_data, col_data, mat,
2230 PetscBool is_mat_shell = PETSC_FALSE;
2231 PetscObjectTypeCompare((PetscObject)
M, MATSHELL, &is_mat_shell);
2234 CHKERR MatShellGetContext(
M, (
void **)&ctx);
2235 if (!ctx->preconditionerBlocksPtr)
2237 "No preconditionerBlocksPtr");
2239 ctx, row_data, col_data, mat, iora,
2241 ctx->preconditionerBlocksPtr);
2243 CHKERR MatSetValues<AssemblyTypeSelector<PETSC>>(
M, row_data, col_data, mat,
2252 boost::shared_ptr<BlockStructure> block_mat_data_ptr,
2254 std::vector<std::string> fields_names,
2255 std::vector<boost::shared_ptr<Range>>
2257 bool add_preconditioner_block) {
2259 if (!block_mat_data_ptr) {
2263 if(fields_names.size() != field_ents.size())
2265 "fields_names.size() != field_ents.size()");
2267 auto [schur_dm, block_dm] = dms;
2272 auto schur_dofs_row = schur_prb->getNumeredRowDofsPtr();
2273 auto schur_dofs_col = schur_prb->getNumeredColDofsPtr();
2274 auto block_dofs_row = block_prb->getNumeredRowDofsPtr();
2275 auto block_dofs_col = block_prb->getNumeredColDofsPtr();
2277 auto ao_schur_row = schur_prb->getSubData()->getSmartRowMap();
2278 auto ao_schur_col = schur_prb->getSubData()->getSmartColMap();
2279 auto ao_block_row = block_prb->getSubData()->getSmartRowMap();
2280 auto ao_block_col = block_prb->getSubData()->getSmartColMap();
2286 CHKERR VecSetDM(schur_vec_x, PETSC_NULL);
2287 CHKERR VecSetDM(block_vec_x, PETSC_NULL);
2288 CHKERR VecSetDM(schur_vec_y, PETSC_NULL);
2289 CHKERR VecSetDM(block_vec_y, PETSC_NULL);
2291 auto get_vec = [&](
auto schur_data) {
2292 std::vector<int> vec_r, vec_c;
2293 vec_r.reserve(schur_data->blockIndex.size());
2294 vec_c.reserve(schur_data->blockIndex.size());
2295 for (
auto &
d : schur_data->blockIndex.template get<1>()) {
2296 vec_r.push_back(
d.getRow());
2297 vec_c.push_back(
d.getCol());
2299 return std::make_pair(vec_r, vec_c);
2302 auto [vec_r_schur, vec_c_schur] = get_vec(block_mat_data_ptr);
2303 CHKERR AOApplicationToPetsc(ao_schur_row, vec_r_schur.size(),
2304 &*vec_r_schur.begin());
2305 CHKERR AOApplicationToPetsc(ao_schur_col, vec_c_schur.size(),
2306 &*vec_c_schur.begin());
2307 auto [vec_r_block, vec_c_block] = get_vec(block_mat_data_ptr);
2308 CHKERR AOApplicationToPetsc(ao_block_row, vec_r_block.size(),
2309 &*vec_r_block.begin());
2310 CHKERR AOApplicationToPetsc(ao_block_col, vec_c_block.size(),
2311 &*vec_c_block.begin());
2313 std::array<boost::shared_ptr<BlockStructure>, 4> data_ptrs;
2315 for (
auto r = 0;
r != 3; ++
r) {
2316 data_ptrs[
r] = boost::make_shared<BlockStructure>();
2317 data_ptrs[
r]->dataBlocksPtr = block_mat_data_ptr->dataBlocksPtr;
2319 data_ptrs[3] = boost::make_shared<DiagBlockInvStruture>();
2320 data_ptrs[3]->dataBlocksPtr = block_mat_data_ptr->dataBlocksPtr;
2322 data_ptrs[0]->ghostX = schur_vec_x;
2323 data_ptrs[0]->ghostY = schur_vec_y;
2324 data_ptrs[1]->ghostX = block_vec_x;
2325 data_ptrs[1]->ghostY = schur_vec_y;
2326 data_ptrs[2]->ghostX = schur_vec_x;
2327 data_ptrs[2]->ghostY = block_vec_y;
2328 data_ptrs[3]->ghostX = block_vec_x;
2329 data_ptrs[3]->ghostY = block_vec_y;
2332 for (
auto &
d : block_mat_data_ptr->blockIndex.get<1>()) {
2334 auto insert = [&](
auto &
m,
auto &dof_r,
auto &dof_c,
auto &
d) {
2338 dof_r->getPetscGlobalDofIdx(), dof_c->getPetscGlobalDofIdx(),
2340 d.getNbRows(),
d.getNbCols(),
2342 dof_r->getPetscLocalDofIdx(), dof_c->getPetscLocalDofIdx(),
2344 d.getMatShift(),
d.getInvShift()}
2349 if (vec_r_schur[idx] != -1 && vec_c_schur[idx] != -1) {
2362 insert(data_ptrs[0]->blockIndex, *schur_dof_r, *schur_dof_c,
d);
2365 if (vec_r_schur[idx] != -1 && vec_c_block[idx] != -1) {
2378 insert(data_ptrs[1]->blockIndex, *schur_dof_r, *block_dof_c,
d);
2381 if (vec_r_block[idx] != -1 && vec_c_schur[idx] != -1) {
2394 insert(data_ptrs[2]->blockIndex, *block_dof_r, *schur_dof_c,
d);
2397 if (vec_r_block[idx] != -1 && vec_c_block[idx] != -1) {
2402 insert(data_ptrs[3]->blockIndex, *block_dof_r, *block_dof_c,
d);
2409 auto set_up_a00_data = [&](
auto inv_block_data) {
2412 auto get_forward_list = [&]() {
2413 std::vector<int> list;
2414 list.reserve(fields_names.size());
2415 for (
auto s = 0; s != fields_names.size(); ++s) {
2421 auto get_reverse_list = [&]() {
2422 std::vector<int> list;
2423 list.reserve(fields_names.size());
2424 for (
auto s = 0; s != fields_names.size(); ++s) {
2425 list.push_back(fields_names.size() - s - 1);
2431 auto get_a00_uids = [&](
auto &&list) {
2433 return m_field_ptr->get_field_bit_number(
field_name);
2436 std::vector<std::pair<UId, UId>> a00_uids;
2437 a00_uids.reserve(fields_names.size());
2438 for (
auto ss : list) {
2439 auto field_bit = get_field_bit(fields_names[ss]);
2440 auto row_ents = field_ents[ss];
2442 for (
auto p = row_ents->pair_begin(); p != row_ents->pair_end();
2445 FieldEntity::getLoLocalEntityBitNumber(field_bit, p->first);
2447 FieldEntity::getHiLocalEntityBitNumber(field_bit, p->second);
2448 a00_uids.push_back(std::make_pair(lo_uid, hi_uid));
2451 auto lo_uid = FieldEntity::getLoLocalEntityBitNumber(
2452 field_bit, get_id_for_min_type<MBVERTEX>());
2453 auto hi_uid = FieldEntity::getHiLocalEntityBitNumber(
2454 field_bit, get_id_for_max_type<MBENTITYSET>());
2455 a00_uids.push_back(std::make_pair(lo_uid, hi_uid));
2462 auto get_glob_idex_pairs = [&](
auto &&uid_pairs) {
2463 std::vector<std::pair<int, int>> glob_idex_pairs;
2464 glob_idex_pairs.reserve(uid_pairs.size());
2465 auto dofs = block_prb->getNumeredRowDofsPtr();
2467 auto it = uid_pairs.rbegin();
2468 auto hi = uid_pairs.rend();
2470 for (; it != hi; ++it) {
2471 auto [lo_uid, hi_uid] = *it;
2472 auto lo_it = dofs->lower_bound(lo_uid);
2473 auto hi_it = dofs->upper_bound(hi_uid);
2474 if (lo_it != hi_it) {
2475 auto lo_idx = (*lo_it)->getPetscGlobalDofIdx();
2476 glob_idex_pairs.emplace_back(lo_idx, std::distance(lo_it, hi_it));
2480 return glob_idex_pairs;
2484 boost::dynamic_pointer_cast<DiagBlockInvStruture>(inv_block_data)
2488 index_view.lowView.resize(0);
2489 index_view.lowView.reserve(inv_block_data->blockIndex.size());
2490 index_view.diagLoRange.resize(0);
2491 index_view.diagLoRange.reserve(inv_block_data->blockIndex.size() + 1);
2492 index_view.diagLoRange.push_back(0);
2494 index_view.upView.resize(0);
2495 index_view.upView.reserve(inv_block_data->blockIndex.size());
2496 index_view.diagUpRange.resize(0);
2497 index_view.diagUpRange.reserve(inv_block_data->blockIndex.size() + 1);
2498 index_view.diagUpRange.push_back(0);
2501 using BlockIndexView = multi_index_container<
2508 &DiagBlockIndex::Indexes::getRow>>
2512 BlockIndexView block_index_view;
2513 for (
auto it = inv_block_data->blockIndex.template get<1>().begin();
2514 it != inv_block_data->blockIndex.template get<1>().end(); ++it) {
2515 block_index_view.insert(&*it);
2518 auto set_index_view_data = [&](
2522 auto &&glob_idx_pairs,
2524 auto &view,
auto &range
2528 for (
auto s1 = start; s1 < glob_idx_pairs.size(); ++s1) {
2532 auto [lo_idx, nb] = glob_idx_pairs[s1];
2533 auto it = block_index_view.lower_bound(lo_idx);
2534 auto hi = block_index_view.end();
2538 ((*it)->getRow() + (*it)->getNbRows()) <= lo_idx + nb) {
2540 auto row = (*it)->getRow();
2542 auto get_diag_index =
2544 while (it != hi && (*it)->getRow() == row) {
2545 if ((*it)->getCol() == row &&
2546 (*it)->getNbCols() == (*it)->getNbRows()) {
2556 auto push_off_diag = [&](
auto it,
auto s1) {
2557 while (it != hi && (*it)->getRow() == row) {
2558 for (
int s2 = 0; s2 < s1; ++s2) {
2559 auto [col_lo_idx, col_nb] = glob_idx_pairs[s2];
2560 if ((*it)->getCol() >= col_lo_idx &&
2561 (*it)->getCol() + (*it)->getNbCols() <=
2562 col_lo_idx + col_nb) {
2563 view.push_back(*it);
2570 view.push_back(get_diag_index(it));
2571 push_off_diag(it, s1);
2572 range.push_back(view.size());
2574 while (it != hi && (*it)->getRow() == row) {
2581 set_index_view_data(
2585 get_glob_idex_pairs(get_a00_uids(get_forward_list())),
2587 index_view.lowView, index_view.diagLoRange
2591 set_index_view_data(
2595 get_glob_idex_pairs(get_a00_uids(get_reverse_list())),
2597 index_view.upView, index_view.diagUpRange
2602 auto set_inv_mat = [&](
auto inv_mem_size,
auto &view,
auto &range) {
2603 auto get_vec = [&](
auto &view) {
2604 std::vector<int> vec_r, vec_c;
2605 vec_r.reserve(view.size());
2606 vec_c.reserve(view.size());
2607 for (
auto &
i : view) {
2608 vec_r.push_back(
i->getRow());
2609 vec_c.push_back(
i->getCol());
2611 return std::make_pair(vec_r, vec_c);
2614 auto [vec_row, vec_col] = get_vec(view);
2616 AOPetscToApplication(ao_block_row, vec_row.size(), &*vec_row.begin()),
2619 AOPetscToApplication(ao_block_col, vec_col.size(), &*vec_col.begin()),
2622 for (
auto s1 = 0; s1 != range.size() - 1; ++s1) {
2623 auto lo = range[s1];
2624 auto hi = range[s1 + 1];
2625 for (; lo != hi; ++lo) {
2626 auto diag_index_ptr = view[lo];
2627 auto row = vec_row[lo];
2628 auto col = vec_col[lo];
2629 auto nb_rows = diag_index_ptr->getNbRows();
2630 auto nb_cols = diag_index_ptr->getNbCols();
2631 auto it = block_mat_data_ptr->blockIndex.get<1>().find(
2632 boost::make_tuple(row, col, nb_rows, nb_cols));
2633 if (it == block_mat_data_ptr->blockIndex.get<1>().end()) {
2637 if (it->getInvShift() == -1) {
2638 it->getInvShift() = inv_mem_size;
2639 inv_mem_size += nb_rows * nb_cols;
2641 diag_index_ptr->getInvShift() = it->getInvShift();
2645 return inv_mem_size;
2649 auto allocate_inv_block = [&](
auto inv_mem_size) {
2651 boost::make_shared<std::vector<double>>(inv_mem_size, 0);
2652 data_ptrs[3]->dataInvBlocksPtr = inv_data_ptr;
2653 block_mat_data_ptr->dataInvBlocksPtr = data_ptrs[3]->dataInvBlocksPtr;
2656 int inv_mem_size = 0;
2658 set_inv_mat(inv_mem_size, index_view.lowView, index_view.diagLoRange);
2660 set_inv_mat(inv_mem_size, index_view.upView, index_view.diagUpRange);
2661 allocate_inv_block(inv_mem_size);
2663 if (add_preconditioner_block) {
2664 auto preconditioned_block = boost::make_shared<std::vector<double>>(
2665 data_ptrs[3]->dataInvBlocksPtr->size(), 0);
2666 data_ptrs[3]->preconditionerBlocksPtr = preconditioned_block;
2667 data_ptrs[3]->multiplyByPreconditioner =
true;
2668 block_mat_data_ptr->preconditionerBlocksPtr =
2669 data_ptrs[3]->preconditionerBlocksPtr;
2670 block_mat_data_ptr->multiplyByPreconditioner =
false;
2676 CHKERR set_up_a00_data(data_ptrs[3]);
2679 CHKERR PetscObjectGetComm((PetscObject)schur_dm, &comm);
2681 auto create_shell_mat = [&](
auto nb_r_loc,
auto nb_c_loc,
auto nb_r_glob,
2682 auto nb_c_glob,
auto data_ptr) {
2684 CHKERR MatCreateShell(comm, nb_r_loc, nb_c_loc, nb_r_glob, nb_c_glob,
2685 (
void *)data_ptr.get(), &mat_raw);
2690 auto schur_nb_global = schur_prb->getNbDofsRow();
2691 auto block_nb_global = block_prb->getNbDofsRow();
2692 auto schur_nb_local = schur_prb->getNbLocalDofsRow();
2693 auto block_nb_local = block_prb->getNbLocalDofsRow();
2695 std::array<SmartPetscObj<Mat>, 4> mats_array;
2697 create_shell_mat(schur_nb_local, schur_nb_local, schur_nb_global,
2698 schur_nb_global, data_ptrs[0]);
2700 create_shell_mat(schur_nb_local, block_nb_local, schur_nb_global,
2701 block_nb_global, data_ptrs[1]);
2703 create_shell_mat(block_nb_local, schur_nb_local, block_nb_global,
2704 schur_nb_global, data_ptrs[2]);
2706 create_shell_mat(block_nb_local, block_nb_local, block_nb_global,
2707 block_nb_global, data_ptrs[3]);
2710 <<
"(0, 0) " << schur_nb_local <<
" " << schur_nb_global <<
" "
2711 << data_ptrs[0]->blockIndex.size();
2713 <<
"(0, 1) " << schur_nb_local <<
" " << block_nb_local <<
" "
2714 << schur_nb_global <<
" " << block_nb_global <<
" "
2715 << data_ptrs[1]->blockIndex.size();
2717 <<
"(1, 0) " << block_nb_local <<
" " << schur_nb_local <<
" "
2718 << block_nb_global <<
" " << schur_nb_global <<
" "
2719 << data_ptrs[2]->blockIndex.size();
2721 <<
"(1, 1) " << block_nb_local <<
" " << block_nb_global <<
" "
2722 << data_ptrs[3]->blockIndex.size();
2726 auto schur_is = schur_prb->getSubData()->getSmartRowIs();
2727 auto block_is = block_prb->getSubData()->getSmartRowIs();
2729 return boost::make_shared<NestSchurData>(
2731 mats_array, data_ptrs, block_mat_data_ptr,
2732 std::make_pair(schur_is, block_is)
2737 std::pair<SmartPetscObj<Mat>, boost::shared_ptr<NestSchurData>>
2740 if (!schur_net_data_ptr)
2743 auto [mat_arrays, data_ptrs, block_mat_data_ptr, iss] = *schur_net_data_ptr;
2744 auto [schur_is, block_is] = iss;
2746 std::array<IS, 2> is_a = {schur_is, block_is};
2747 std::array<Mat, 4> mats_a = {
2749 mat_arrays[0], mat_arrays[1], mat_arrays[2], mat_arrays[3]
2754 CHKERR PetscObjectGetComm((PetscObject)mat_arrays[0], &comm);
2759 comm, 2, is_a.data(), 2, is_a.data(), mats_a.data(), &mat_raw
2770 OpSchurAssembleBase *
2772 std::vector<boost::shared_ptr<Range>> field_ents,
2775 std::vector<bool> sym_schur,
bool symm_op,
2776 boost::shared_ptr<BlockStructure> diag_blocks) {
2779 sequence_of_aos, sequence_of_mats,
2780 sym_schur, symm_op, diag_blocks);
2783 sequence_of_aos, sequence_of_mats,
2784 sym_schur, symm_op, diag_blocks);
2787 OpSchurAssembleBase *
2789 std::vector<boost::shared_ptr<Range>> field_ents,
2792 std::vector<bool> sym_schur,
2793 std::vector<double> diag_eps,
bool symm_op,
2794 boost::shared_ptr<BlockStructure> diag_blocks) {
2797 fields_name, field_ents, sequence_of_aos, sequence_of_mats, sym_schur,
2798 diag_eps, symm_op, diag_blocks);
2801 fields_name, field_ents, sequence_of_aos, sequence_of_mats, sym_schur,
2802 diag_eps, symm_op, diag_blocks);
2808 auto apply = [](PC pc,
Vec f,
Vec x) {
2816 CHKERR PCSetType(pc, PCSHELL);
2817 CHKERR PCShellSetApply(pc, apply);
2818 CHKERR PCShellSetName(pc,
"MoFEMSchurBlockPC");
2841 return MatSetValues<AssemblyTypeSelector<PETSC>>(
M, row_data, col_data, mat,
2846 SchurBackendMatSetValuesPtr::MatSetValuesPtr
2856 CHKERR SchurBackendMatSetValuesPtr::matSetValuesPtr(
M, row_data, col_data,
2858 CHKERR assembleStorage(row_data, col_data, mat, iora);
2890 SchurBackendMatSetValuesPtr::matSetValuesBlockPtr =
2899 CHKERR SchurBackendMatSetValuesPtr::matSetValuesBlockPtr(
M, row_data,
2900 col_data, mat, iora);
2901 CHKERR assembleStorage(row_data, col_data, mat, iora);
2923 SchurBackendMatSetValuesPtr::matSetValuesPreconditionedBlockPtr =
2931 CHKERR SchurBackendMatSetValuesPtr::matSetValuesPreconditionedBlockPtr(
2932 M, row_data, col_data, mat, iora);
2933 CHKERR assembleStorage(row_data, col_data, mat, iora);
2949 if (block_mat_data->multiplyByPreconditioner) {
2950 block_mat_data->multiplyByPreconditioner =
false;
2952 if (!block_mat_data->preconditionerBlocksPtr) {
2954 "preconditionerBlocksPtr not set");
2956 block_mat_data->multiplyByPreconditioner =
true;
2963 std::string filename) {
2969 ReadUtilIface *iface;
2970 CHKERR moab.query_interface(iface);
2972 auto size = block_mat_data->blockIndex.size();
2975 vector<double *> arrays_coord;
2976 CHKERR iface->get_node_coords(3, 4 * size, 0, starting_vertex, arrays_coord);
2979 CHKERR iface->get_element_connect(size, 4, MBQUAD, 0, starting_handle, array);
2981 double def_val_nrm2 = 0;
2983 CHKERR moab.tag_get_handle(
"nrm2", 1, MB_TYPE_DOUBLE, th_nrm2,
2984 MB_TAG_CREAT | MB_TAG_DENSE, &def_val_nrm2);
2986 int def_val_mat_shift = 0;
2988 CHKERR moab.tag_get_handle(
"mat_shift", 1, MB_TYPE_INTEGER, th_mat_shift,
2989 MB_TAG_CREAT | MB_TAG_DENSE, &def_val_mat_shift);
2992 for (
auto &
d : block_mat_data->blockIndex) {
2993 auto row =
d.getRow();
2994 auto col =
d.getCol();
2995 auto nb_rows =
d.getNbRows();
2996 auto nb_cols =
d.getNbCols();
2997 auto mat_shift =
d.getMatShift();
3000 arrays_coord[0][4 *
i + 0] = row;
3001 arrays_coord[1][4 *
i + 0] = col;
3002 arrays_coord[2][4 *
i + 0] = 0;
3005 arrays_coord[0][4 *
i + 1] = row + nb_rows;
3006 arrays_coord[1][4 *
i + 1] = col;
3007 arrays_coord[2][4 *
i + 1] = 0;
3010 arrays_coord[0][4 *
i + 2] = row + nb_rows;
3011 arrays_coord[1][4 *
i + 2] = col + nb_cols;
3012 arrays_coord[2][4 *
i + 2] = 0;
3015 arrays_coord[0][4 *
i + 3] = row;
3016 arrays_coord[1][4 *
i + 3] = col + nb_cols;
3017 arrays_coord[2][4 *
i + 3] = 0;
3020 array[4 *
i + 0] = starting_vertex + 4 *
i + 0;
3021 array[4 *
i + 1] = starting_vertex + 4 *
i + 1;
3022 array[4 *
i + 2] = starting_vertex + 4 *
i + 2;
3023 array[4 *
i + 3] = starting_vertex + 4 *
i + 3;
3025 auto prt = &(*block_mat_data->dataBlocksPtr)[
d.getMatShift()];
3026 auto nrm2 = cblas_dnrm2(nb_rows * nb_cols, prt, 1);
3028 CHKERR moab.tag_set_data(th_nrm2, &ele, 1, &nrm2);
3029 CHKERR moab.tag_set_data(th_mat_shift, &ele, 1, &mat_shift);
3034 CHKERR iface->update_adjacencies(starting_handle, size, 4, array);
3035 CHKERR moab.write_file(filename.c_str(),
"VTK",
"");