WIP. pathological case is known

This commit is contained in:
Miguel M 2023-06-05 20:57:26 +01:00
parent 97ab74bbd0
commit ef6f80aaaf
4 changed files with 525 additions and 207 deletions

View File

@ -19,7 +19,7 @@ typedef struct {
void DebugPrintMatrix(matrix_t *matrix);
// Prints a row of a `matrix_t` to standard output.
void PrintMatrixRow(matrix_t *matrix, size_t row);
void PrintMatrixRow(const matrix_t *matrix, const size_t row);
// Parse a matrix from the standard input.
//

View File

@ -7,7 +7,7 @@ from sane import *
COMPILER = "gcc"
COMPILE_FLAGS = [
"-std=c11",
"-std=gnu11",
"-g3",
"-O0",
"-Wall",
@ -16,9 +16,10 @@ COMPILE_FLAGS = [
"-Wpedantic",
"-pedantic-errors",
"-fopenmp",
"-pthread",
]
LINK_FLAGS = ["-fopenmp"]
LIBRARIES = ["-lm"]
LINK_FLAGS = ["-fopenmp", "-pthread"]
LIBRARIES = ["-lm", "-lpthread"]
OBJ_DIR = os.path.join("obj", "release" if ("RELEASE" in os.environ) else "debug")
SRC_DIR = "src"
EXE_NAME = "main." + ("release" if ("RELEASE" in os.environ) else "debug") + ".exe"
@ -26,7 +27,7 @@ EXE_NAME = "main." + ("release" if ("RELEASE" in os.environ) else "debug") + ".e
ROOT = os.path.dirname(os.path.realpath(__file__))
if "RELEASE" in os.environ:
COMPILE_FLAGS[COMPILE_FLAGS.index("-O0")] = "-O2"
COMPILE_FLAGS[COMPILE_FLAGS.index("-O0")] = "-Ofast"
COMPILE_FLAGS[COMPILE_FLAGS.index("-g3")] = "-g"
COMPILE_FLAGS.append("-DRELEASE")
LINK_FLAGS.append("-DRELEASE")

View File

@ -1,5 +1,7 @@
#include <alloca.h>
#include <math.h>
#include <omp.h>
#include <pthread.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
@ -7,71 +9,15 @@
#include <time.h>
#include <unistd.h>
// #include "../contrib/orlp/ipow.h"
#define STB_DS_IMPLEMENTATION
#define STBDS_NO_SHORT_NAMES
#include "../contrib/orlp/ipow.h"
#include "../contrib/stb/stb_ds.h"
#include "../includes/cg.h"
#include "../includes/matrix.h"
#include "../includes/permutation.h"
static uintmax_t _timing_last_count = 0;
static clock_t _timing_last_time = 0.;
static float _timing_rate = 0.;
static float _timing_exp_backoff = 1.;
static void ReportProgress(uintmax_t current, uintmax_t total, clock_t now) {
_Bool updated = 0;
#pragma omp critical
{
if (current >= _timing_last_count) {
float inferred_rate = (float)(current - _timing_last_count) /
((float)(now - _timing_last_time) / CLOCKS_PER_SEC);
if (inferred_rate < _timing_rate) {
_timing_rate = inferred_rate;
_timing_exp_backoff = 0.3;
} else {
_timing_rate = _timing_rate * (1. - _timing_exp_backoff) +
inferred_rate * _timing_exp_backoff;
_timing_exp_backoff = fmax(0.99 * _timing_exp_backoff, 0.001);
}
_timing_last_count = current;
_timing_last_time = now;
updated = 1;
}
}
if (updated) {
uintmax_t time_left = (total - current) / _timing_rate;
uintmax_t days = time_left / 60 / 60 / 24;
if (days > 0) {
fprintf(stderr,
"\33[2K\r%zu/%zu (%" PRIuMAX " iter/sec | About %" PRIuMAX
" days left.)",
current, total, (uintmax_t)_timing_rate, days);
} else {
uint8_t hours = (time_left % (60 * 60 * 24)) / 60;
uint8_t minutes = (time_left % (60 * 60)) / 60;
uint8_t seconds = time_left % 60;
fprintf(stderr,
"\33[2K\r%zu/%zu (%" PRIuMAX " iter/sec | About %" PRIu8
"h%" PRIu8 "m%" PRIu8 "s left.)",
current, total, (uintmax_t)_timing_rate, hours, minutes, seconds);
}
fflush(stderr);
}
}
static uintmax_t Factorial(uint8_t value) {
uintmax_t acc = 1;
for (uint8_t i = 2; i <= value; i++) {
acc *= (uintmax_t)value;
}
return acc;
}
static inline _Bool IsOdd(size_t value) { return (_Bool)(value & 1); }
static _Bool ReadArguments(char *from, size_t *into, const char *varname) {
if (!sscanf(from, "%zu", into)) {
@ -112,6 +58,7 @@ typedef struct {
size_t b_out;
size_t a_in;
size_t b_in;
size_t bin_size;
arg_format_t formatting;
} args_t;
@ -129,17 +76,18 @@ static args_t ParseArgs(size_t argc, char *argv[]) {
}
}
size_t a_out, b_out, a_in, b_in;
size_t a_out, b_out, a_in, b_in, bin_size;
arg_format_t formatting;
if (stbds_arrlen(positionals) != 4) {
if (stbds_arrlen(positionals) != 5) {
goto arg_error;
}
if (!ReadArguments(positionals[0], &a_out, "A outputs") ||
!ReadArguments(positionals[1], &b_out, "B outputs") ||
!ReadArguments(positionals[2], &a_in, "A inputs") ||
!ReadArguments(positionals[3], &b_in, "B inputs")) {
!ReadArguments(positionals[3], &b_in, "B inputs") ||
!ReadArguments(positionals[4], &bin_size, "bin size")) {
goto arg_error;
}
@ -160,6 +108,7 @@ static args_t ParseArgs(size_t argc, char *argv[]) {
.a_out = a_out,
.b_in = b_in,
.b_out = b_out,
.bin_size = bin_size,
.formatting = formatting,
};
@ -176,20 +125,299 @@ arg_error:
if (argc > 0) {
fprintf(stderr, "%s", argv[0]);
} else {
fprintf(stderr, "./exe");
fprintf(stderr, "./main.exe");
}
fprintf(stderr,
" [--cg|--p|--p-raw] <A outputs> <B outputs> <A inputs> <B inputs>");
" [--cg|--p|--p-raw] <A outputs> <B outputs> <A inputs> <B inputs> "
"<bin size>");
exit(EXIT_FAILURE);
}
// Calculates inequivalent rows out of a given block of the rows. A block is a
// contiguous subset of rows, with indices
// `block_start..(block_start+row_count)`.
//
// Arguments:
// a_in Number of inputs for A
// b_in Number of inputs for B
// a_out Number of outputs for A
// a_out Number of outputs for B
// matrix The matrix whose rows are to be examined
// row_len Length of a matrix row
// seen Boolean buffer of size at least `row_count`
// p_buf data_t buffer capable of holding a P representation row
// cg_buf data_t buffer capable of holding a CG representation row
// a_in_perm permutation_generator_t for permutations of A's input
// b_in_perm permutation_generator_t for permutations of B's input
// a_out_perms permutation_generator_t* array for permutations of A's output
// b_out_perms permutation_generator_t* array for permutations of B's output
// block_start Where to start counting the indices from.
// row_count Length of block
//
// After the function call, `seen[0..row_count]` holds whether each row is
// redundant to a previous row or not.
//
// `seen`, `p_buf`, and `cg_buf` WILL be clobbered. They don't need to be
// initialized, only allocated. It's the caller's responsibility to free these
// buffers if applicable.
//
// The permutation generators are expected to be correctly initialized; this
// function only resets the permutation generators, it does NOT allocate them
// or free them.
static void UniqueInBlock(const size_t a_in, const size_t b_in,
const size_t a_out, const size_t b_out,
const matrix_t *matrix, size_t row_len, _Bool *seen,
data_t *p_buf, data_t *cg_buf,
permutation_generator_t *a_in_perm,
permutation_generator_t *b_in_perm,
permutation_generator_t *a_out_perms,
permutation_generator_t *b_out_perms,
const size_t block_start, const size_t row_count) {
for (size_t i = 0; i < row_count; i++) {
seen[i] = 0;
}
for (size_t lhs_i = 0; lhs_i < row_count - 1; lhs_i++) {
if (seen[lhs_i]) {
continue;
}
data_t *lhs = matrix->head + (block_start + lhs_i) * matrix->row_len;
for (size_t rhs_i = lhs_i + 1; rhs_i < row_count; rhs_i++) {
if (seen[rhs_i]) {
continue;
}
data_t *rhs = matrix->head + (block_start + rhs_i) * matrix->row_len;
PermutationReset(a_in_perm);
PermutationReset(b_in_perm);
for (size_t i = 0; i < a_in; i++) {
PermutationReset(a_out_perms + i);
}
for (size_t i = 0; i < b_in; i++) {
PermutationReset(b_out_perms + i);
}
while (!a_in_perm->exhausted) {
PermutationReset(b_in_perm);
while (!b_in_perm->exhausted) {
ResetConditionalPermutations(a_out_perms, a_out);
while (!a_out_perms[a_out - 1].exhausted) {
ResetConditionalPermutations(b_out_perms, b_out);
while (!b_out_perms[b_out - 1].exhausted) {
// Compare the two rows
// TODO: Figure out why both A==B and B==A need to be checked.
// Of note is that the pathological case I found occurs under a
// party swap.
// See also: the same problem in UniqueInSubsetPair
FromCgToP(a_out, b_out, a_in, b_in, rhs, p_buf,
a_in_perm->permutation, b_in_perm->permutation,
a_out_perms, b_out_perms);
{
FromPToCg(a_out, b_out, a_in, b_in, cg_buf, p_buf);
_Bool equivalent = 1;
for (size_t i = row_len; i > 0; i--) {
if (lhs[i - 1] != cg_buf[i - 1]) {
equivalent = 0;
break;
}
}
if (equivalent) {
seen[rhs_i] = 1;
goto skip_permutations;
}
}
// If the number of output labels are the same, and the number
// of input labels is also the same, we can also check for
// equality under party swapping. I don't expect this
// conditional to be very penalizing because it's very
// predictable.
if (a_in == b_in && a_out == b_out) {
// Use the results in p_buf
PSwapParties(a_out, a_in, p_buf);
FromPToCg(a_out, b_out, a_in, b_in, cg_buf, p_buf);
_Bool equivalent = 1;
for (size_t i = row_len; i > 0; i--) {
if (lhs[i - 1] != cg_buf[i - 1]) {
equivalent = 0;
break;
}
}
if (equivalent) {
seen[rhs_i] = 1;
goto skip_permutations;
}
}
AdvanceConditionalPermutations(b_out_perms, b_out);
}
AdvanceConditionalPermutations(a_out_perms, a_out);
}
PermutationNext(b_in_perm);
}
PermutationNext(a_in_perm);
}
skip_permutations:;
} // For loop over rhs_i
} // For loop over lhs_i
}
// Calculates inequivalent rows out of a given subset of the rows. The subset
// is identified by a pair of arrays of indices.
//
// Arguments:
// a_in Number of inputs for A
// b_in Number of inputs for B
// a_out Number of outputs for A
// a_out Number of outputs for B
// matrix The matrix whose rows are to be examined
// row_len Length of a matrix row
// seen Boolean buffer of size at least `second_row_count`
// p_buf data_t buffer capable of holding a P representation row
// cg_buf data_t buffer capable of holding a CG representation row
// a_in_perm permutation_generator_t for permutations of A's input
// b_in_perm permutation_generator_t for permutations of B's input
// a_out_perms permutation_generator_t* array for permutations of A's
// output
// b_out_perms permutation_generator_t* array for permutations of B's
// output
// first_row_idxs First array of indices of rows to consider
// first_row_count Length of first_row_idxs[]
// second_row_idxs Second array of indices of rows to consider
// second_row_count Length of second_row_idxs[]
//
//
// After the function call, `seen[..]` holds whether each row in the second
// block is redundant to a row in the first block.
//
// `seen`, `p_buf`, and `cg_buf` WILL be clobbered. They don't need to be
// initialized, only allocated. It's the caller's responsibility to free these
// buffers if applicable.
//
// The permutation generators are expected to be correctly initialized; this
// function only resets the permutation generators, it does NOT allocate them
// or free them.
static void UniqueInSubsetPair(
const size_t a_in, const size_t b_in, const size_t a_out,
const size_t b_out, const matrix_t *matrix, const size_t row_len,
_Bool *seen, data_t *p_buf, data_t *cg_buf,
permutation_generator_t *a_in_perm, permutation_generator_t *b_in_perm,
permutation_generator_t *a_out_perms, permutation_generator_t *b_out_perms,
const size_t *first_row_idxs, const size_t first_row_count,
const size_t *second_row_idxs, const size_t second_row_count) {
for (size_t i = 0; i < second_row_count; i++) {
seen[i] = 0;
}
for (size_t lhs_i = 0; lhs_i < first_row_count; lhs_i++) {
data_t *lhs = matrix->head + first_row_idxs[lhs_i] * matrix->row_len;
for (size_t rhs_i = 0; rhs_i < second_row_count; rhs_i++) {
if (seen[rhs_i]) {
continue;
}
data_t *rhs = matrix->head + second_row_idxs[rhs_i] * matrix->row_len;
// This may be unnecessary the first time around, but after
// goto skip_permutations, the generators are left in an undefined state.
PermutationReset(a_in_perm);
PermutationReset(b_in_perm);
for (size_t i = 0; i < a_in; i++) {
PermutationReset(a_out_perms + i);
}
for (size_t i = 0; i < b_in; i++) {
PermutationReset(b_out_perms + i);
}
while (!a_in_perm->exhausted) {
PermutationReset(b_in_perm);
while (!b_in_perm->exhausted) {
ResetConditionalPermutations(a_out_perms, a_out);
while (!a_out_perms[a_out - 1].exhausted) {
ResetConditionalPermutations(b_out_perms, b_out);
while (!b_out_perms[b_out - 1].exhausted) {
// TODO: Figure out why both A==B and B==A need to be checked.
// Of note is that the pathological case I found occurs under a
// party swap.
// Compare the two rows
FromCgToP(a_out, b_out, a_in, b_in, rhs, p_buf,
a_in_perm->permutation, b_in_perm->permutation,
a_out_perms, b_out_perms);
{
FromPToCg(a_out, b_out, a_in, b_in, cg_buf, p_buf);
_Bool equivalent = 1;
for (size_t i = row_len; i > 0; i--) {
if (lhs[i - 1] != cg_buf[i - 1]) {
equivalent = 0;
break;
}
}
if (equivalent) {
seen[rhs_i] = 1;
goto skip_permutations;
}
}
// If the number of output labels are the same, and the number
// of input labels is also the same, we can also check for
// equality under party swapping. I don't expect this
// conditional to be very penalizing because it's very
// predictable.
if (a_in == b_in && a_out == b_out) {
// Use the results in p_buf
PSwapParties(a_out, a_in, p_buf);
FromPToCg(a_out, b_out, a_in, b_in, cg_buf, p_buf);
_Bool equivalent = 1;
for (size_t i = row_len; i > 0; i--) {
if (lhs[i - 1] != cg_buf[i - 1]) {
equivalent = 0;
break;
}
}
if (equivalent) {
seen[rhs_i] = 1;
goto skip_permutations;
}
}
AdvanceConditionalPermutations(b_out_perms, b_out);
}
AdvanceConditionalPermutations(a_out_perms, a_out);
}
PermutationNext(b_in_perm);
}
PermutationNext(a_in_perm);
}
skip_permutations:;
} // For loop over rhs_i
} // For loop over lhs_i
}
int main(int argc, char *argv[]) {
args_t args = ParseArgs(argc, argv);
size_t a_out, a_in, b_out, b_in;
a_out = args.a_out;
b_out = args.b_out;
a_in = args.a_in;
b_in = args.b_in;
const args_t args = ParseArgs(argc, argv);
const size_t a_out = args.a_out;
const size_t b_out = args.b_out;
const size_t a_in = args.a_in;
const size_t b_in = args.b_in;
const size_t bin_size = args.bin_size;
if (a_out < 2 || b_out < 2 || a_in < 2 || b_in < 2) {
fprintf(stderr,
@ -198,29 +426,48 @@ int main(int argc, char *argv[]) {
exit(EXIT_FAILURE);
}
size_t row_len = (a_out - 1) * (b_out - 1) * a_in * b_in +
(a_out - 1) * a_in + (b_out - 1) * b_in + 1;
const size_t row_len = (a_out - 1) * (b_out - 1) * a_in * b_in +
(a_out - 1) * a_in + (b_out - 1) * b_in + 1;
matrix_t matrix = ParseMatrix(row_len);
size_t row_count = matrix.len / matrix.row_len;
const size_t row_count = matrix.len / matrix.row_len;
_Bool *seen = calloc(row_count, sizeof(_Bool));
if (seen == NULL) {
fprintf(stderr, "Failed to allocate equivalence flag string. Aborting.");
if (bin_size > row_count) {
fprintf(stderr,
"Bin size cannot be bigger than the number of rows in the input.");
exit(EXIT_FAILURE);
}
// Start testing for equivalences.
const uintmax_t perm_iterations =
Factorial(a_in) * Factorial(b_in) *
(uintmax_t)upow((uint64_t)(Factorial(a_in)), (uint8_t)a_out) *
(uintmax_t)upow((uint64_t)(Factorial(b_in)), (uint8_t)b_out);
const uintmax_t total =
(uintmax_t)row_count * (uintmax_t)row_count * perm_iterations;
uintmax_t tested = 0;
// BEGINNING OF COMPARISON ALGORITHM
const size_t bin_count =
((row_count % bin_size == 0) ? row_count / bin_size
: row_count / bin_size + 1);
size_t *unique = NULL;
size_t *ends =
NULL; // Bin i contains elements with indices ends[i-1]..ends[i]
size_t *unique_swap = NULL;
size_t *ends_swap = NULL;
// TODO: Optimization: only one lock is needed
pthread_rwlockattr_t attr;
pthread_rwlockattr_init(&attr);
pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
pthread_rwlock_t vector_lock, vector_swap_lock;
pthread_rwlock_init(&vector_lock, &attr);
pthread_rwlock_init(&vector_swap_lock, &attr);
#pragma omp parallel default(none) \
shared(stderr, args, a_out, b_out, a_in, b_in, row_len, matrix, row_count, \
seen, perm_iterations, total, tested)
bin_size, bin_count, unique, ends, unique_swap, ends_swap, \
vector_lock, vector_swap_lock)
{
_Bool *seen = malloc(bin_size * sizeof(_Bool));
if (seen == NULL) {
fprintf(stderr, "Failed to allocate equivalence flag string. Aborting.");
exit(EXIT_FAILURE);
}
data_t *p_buf = malloc(a_out * b_out * a_in * b_in * sizeof(data_t));
data_t *cg_buf = malloc((((a_out - 1) * (b_out - 1) * a_in * b_in +
(a_out - 1) * a_in + (b_out - 1) * b_in) +
@ -260,113 +507,179 @@ int main(int argc, char *argv[]) {
PermutationReset(b_out_perms + i);
}
#pragma omp for schedule(dynamic)
for (size_t lhs_i = 0; lhs_i < row_count - 1; lhs_i++) {
// Base round
#pragma omp for
for (size_t bin_i = 0; bin_i < bin_count; bin_i++) {
const size_t block_i_start = bin_i * bin_size;
size_t bin_i_size = row_count - block_i_start;
if (bin_i_size > bin_size) {
bin_i_size = bin_size;
}
UniqueInBlock(a_in, b_in, a_out, b_out, &matrix, row_len, seen, p_buf,
cg_buf, &a_in_perm, &b_in_perm, a_out_perms, b_out_perms,
block_i_start, bin_i_size);
pthread_rwlock_wrlock(&vector_lock);
{
clock_t now = clock();
uintmax_t tested_now = tested;
ReportProgress(tested_now, total, now);
size_t unique_count = 0;
for (size_t i = 0; i < bin_i_size; i++) {
if (!seen[i]) {
stbds_arrput(unique, block_i_start + i);
unique_count++;
}
}
if (stbds_arrlenu(ends) == 0) {
stbds_arrput(ends, unique_count);
} else {
size_t last_end = stbds_arrlast(ends);
stbds_arrput(ends, last_end + unique_count);
}
}
pthread_rwlock_unlock(&vector_lock);
if (seen[lhs_i]) {
const uintmax_t remaining = (uintmax_t)row_count * perm_iterations;
#pragma omp atomic update
tested += remaining;
continue;
}
} // End of omp for
// Implicit barrier
data_t *lhs = matrix.head + lhs_i * matrix.row_len;
for (size_t rhs_i = lhs_i + 1; rhs_i < row_count; rhs_i++) {
if (seen[lhs_i]) {
const uintmax_t remaining =
(uintmax_t)(row_count - lhs_i) * perm_iterations;
#pragma omp atomic update
tested += remaining;
break;
#pragma omp single
{
printf("Base round results:\n");
size_t count = stbds_arrlen(ends);
for (size_t i = 0; i < count; i++) {
size_t *block;
size_t len;
if (i == 0) {
len = ends[0];
block = unique;
} else {
len = ends[i] - ends[i - 1];
block = unique + ends[i - 1];
}
if (seen[rhs_i]) {
#pragma omp atomic update
tested += perm_iterations;
continue;
printf("%zu[", ends[i]);
for (size_t j = 0; j < len; j++) {
printf("%zu, ", block[j]);
}
printf("]\n");
}
printf("\nReduction rounds:\n");
}
data_t *rhs = matrix.head + rhs_i * matrix.row_len;
// Reductions round
PermutationReset(&a_in_perm);
while (!a_in_perm.exhausted) {
PermutationReset(&b_in_perm);
while (!b_in_perm.exhausted) {
ResetConditionalPermutations(a_out_perms, a_out);
while (!a_out_perms[a_out - 1].exhausted) {
ResetConditionalPermutations(b_out_perms, b_out);
while (!b_out_perms[b_out - 1].exhausted) {
if (seen[lhs_i]) {
goto skip_permutations;
}
size_t round_bin_count;
{
pthread_rwlock_rdlock(&vector_lock);
round_bin_count = stbds_arrlenu(ends);
pthread_rwlock_unlock(&vector_lock);
}
while (round_bin_count > 1) {
const size_t pairs = round_bin_count / 2;
// Compare the two rows
FromCgToP(a_out, b_out, a_in, b_in, rhs, p_buf,
a_in_perm.permutation, b_in_perm.permutation,
a_out_perms, b_out_perms);
#pragma omp for
for (size_t pair_i = 0; pair_i < pairs; pair_i++) {
size_t *first_block_idxs, *second_block_idxs;
size_t first_block_len, second_block_len;
{
FromPToCg(a_out, b_out, a_in, b_in, cg_buf, p_buf);
_Bool equivalent = 1;
for (size_t i = row_len; i > 0; i--) {
if (lhs[i - 1] != cg_buf[i - 1]) {
equivalent = 0;
break;
}
}
if (pair_i == 0) {
first_block_idxs = unique;
first_block_len = ends[0];
} else {
first_block_idxs = unique + ends[2 * pair_i - 1];
first_block_len = ends[2 * pair_i] - ends[2 * pair_i - 1];
}
second_block_idxs = unique + ends[2 * pair_i];
second_block_len = ends[2 * pair_i + 1] - ends[2 * pair_i];
if (equivalent) {
seen[rhs_i] = 1;
goto skip_permutations;
}
}
UniqueInSubsetPair(a_in, b_in, a_out, b_out, &matrix, row_len, seen,
p_buf, cg_buf, &a_in_perm, &b_in_perm, a_out_perms,
b_out_perms, first_block_idxs, first_block_len,
second_block_idxs, second_block_len);
// If the number of output labels are the same, and the number
// of input labels is also the same, we can also check for
// equality under party swapping. I don't expect this
// conditional to be very penalizing because it's very
// predictable.
if (a_in == b_in && a_out == b_out) {
// Use the results in p_buf
PSwapParties(a_out, a_in, p_buf);
FromPToCg(a_out, b_out, a_in, b_in, cg_buf, p_buf);
_Bool equivalent = 1;
for (size_t i = row_len; i > 0; i--) {
if (lhs[i - 1] != cg_buf[i - 1]) {
equivalent = 0;
break;
}
}
if (equivalent) {
seen[rhs_i] = 1;
goto skip_permutations;
}
}
AdvanceConditionalPermutations(b_out_perms, b_out);
}
AdvanceConditionalPermutations(a_out_perms, a_out);
pthread_rwlock_wrlock(&vector_swap_lock);
{
size_t reduction_size = first_block_len;
for (size_t i = 0; i < first_block_len; i++) {
stbds_arrput(unique_swap, first_block_idxs[i]);
}
for (size_t i = 0; i < second_block_len; i++) {
if (!seen[i]) {
stbds_arrput(unique_swap, second_block_idxs[i]);
reduction_size++;
}
}
if (stbds_arrlenu(ends_swap) == 0) {
stbds_arrput(ends_swap, reduction_size);
} else {
size_t last_end = stbds_arrlast(ends_swap);
stbds_arrput(ends_swap, last_end + reduction_size);
}
}
pthread_rwlock_unlock(&vector_swap_lock);
PermutationNext(&b_in_perm);
} // End of loop over pairs
// Implicit barrier
// This barrier is important, as it ensures `unique` and `ends` to be
// read only without locking.
// For the same reason, and in conjunction over the following #single
// section, there is no need to lock anything in the following #single
// section
#pragma omp single
{
if (IsOdd(round_bin_count)) {
// Push the remaining bin into the swap variables
// Note that stbds_arrlenu(ends)) is guaranteed to be >= 2 at this
// point
size_t *odd_block_idxs = unique + ends[stbds_arrlenu(ends) - 2];
size_t odd_block_len =
stbds_arrlast(ends) - ends[stbds_arrlenu(ends) - 2];
for (size_t i = 0; i < odd_block_len; i++) {
stbds_arrput(unique_swap, odd_block_idxs[i]);
}
size_t last_end = stbds_arrlast(ends_swap);
stbds_arrput(ends_swap, last_end + odd_block_len);
}
// ---
size_t count = stbds_arrlen(ends_swap);
for (size_t i = 0; i < count; i++) {
size_t *block;
size_t len;
if (i == 0) {
len = ends_swap[0];
block = unique_swap;
} else {
len = ends_swap[i] - ends_swap[i - 1];
block = unique_swap + ends_swap[i - 1];
}
PermutationNext(&a_in_perm);
printf("%zu[", ends_swap[i]);
for (size_t j = 0; j < len; j++) {
printf("%zu, ", block[j]);
}
printf("] ");
}
printf("\n");
// ---
skip_permutations:;
#pragma omp atomic update
tested += perm_iterations;
} // For loop over rhs_i
} // For loop over lhs_i
stbds_arrfree(unique);
stbds_arrfree(ends);
unique = unique_swap;
ends = ends_swap;
unique_swap = NULL;
ends_swap = NULL;
} // End of single section
// Implicit barrier
pthread_rwlock_rdlock(&vector_lock);
{ round_bin_count = stbds_arrlenu(ends); }
pthread_rwlock_unlock(&vector_lock);
} // End of pairs reduction loop
// Program is done!
// `unique` should hold the indices of the unique rows.
#pragma omp single
{
@ -382,34 +695,34 @@ int main(int argc, char *argv[]) {
}
// Print every unique row
for (size_t i = 0; i < row_count; i++) {
if (!seen[i]) {
switch (args.formatting) {
default:
case kRowFmt: {
PrintMatrixRow(&matrix, i);
} break;
case kCgFmt: {
printf("%zu: ", i);
PrintCg(a_out, b_out, a_in, b_in,
matrix.head + i * matrix.row_len);
} break;
case kPFmt: {
printf("%zu: ", i);
FromCgToP(a_out, b_out, a_in, b_in,
matrix.head + i * matrix.row_len, p_buf,
a_in_perm.permutation, b_in_perm.permutation,
a_out_perms, b_out_perms);
PrintP(a_out, b_out, a_in, b_in, p_buf);
} break;
case kPRawFmt: {
FromCgToP(a_out, b_out, a_in, b_in,
matrix.head + i * matrix.row_len, p_buf,
a_in_perm.permutation, b_in_perm.permutation,
a_out_perms, b_out_perms);
PrintPRaw(a_out, b_out, a_in, b_in, p_buf);
} break;
}
size_t unique_count = stbds_arrlenu(unique);
for (size_t i = 0; i < unique_count; i++) {
size_t row_idx = unique[i];
switch (args.formatting) {
default:
case kRowFmt: {
PrintMatrixRow(&matrix, row_idx);
} break;
case kCgFmt: {
printf("%zu: ", i);
PrintCg(a_out, b_out, a_in, b_in,
matrix.head + row_idx * matrix.row_len);
} break;
case kPFmt: {
printf("%zu: ", i);
FromCgToP(a_out, b_out, a_in, b_in,
matrix.head + row_idx * matrix.row_len, p_buf,
a_in_perm.permutation, b_in_perm.permutation, a_out_perms,
b_out_perms);
PrintP(a_out, b_out, a_in, b_in, p_buf);
} break;
case kPRawFmt: {
FromCgToP(a_out, b_out, a_in, b_in,
matrix.head + row_idx * matrix.row_len, p_buf,
a_in_perm.permutation, b_in_perm.permutation, a_out_perms,
b_out_perms);
PrintPRaw(a_out, b_out, a_in, b_in, p_buf);
} break;
}
}
} // End of omp single region
@ -426,10 +739,14 @@ int main(int argc, char *argv[]) {
free(b_out_perms);
free(cg_buf);
free(p_buf);
free(seen);
} // End of omp parallel region
// Free all the memory so the sanitizer is happy.
free(seen);
pthread_rwlock_destroy(&vector_lock);
pthread_rwlock_destroy(&vector_swap_lock);
stbds_arrfree(unique);
stbds_arrfree(ends);
MatrixFree(&matrix);
return EXIT_SUCCESS;
}

View File

@ -21,7 +21,7 @@ void DebugPrintMatrix(matrix_t *matrix) {
}
// Prints a row of a `matrix_t` to standard output.
void PrintMatrixRow(matrix_t *matrix, size_t row) {
void PrintMatrixRow(const matrix_t *matrix, const size_t row) {
for (size_t entry = 0; entry < matrix->row_len; entry++) {
printf("%" PRImDATA, *(matrix->head + row * (matrix->row_len) + entry));
if (entry != matrix->row_len - 1) {