Aux script to split input data files into chunks

(Hopeful) improvements by using transitivity of equivalence
2023-06-06 16:04:30 +01:00 · 2023-06-06 16:03:09 +01:00
5 changed files with 360 additions and 237 deletions
--- a/59
+++ b/59
@ -36,4 +36,61 @@ To use, pipe the input (structured according to the source specification) into
 the executable, while specifying the number of outputs and inputs, in that
 order. E.g.,

-    ./main.exe 2 2 2 2 < data/2222_inq.txt
+    ./main.exe 2 2 2 2 < data/2222_inq.txt
+    
+
+[[Notes on the Approach Taken]]
+
+Originally, pairs were straightforwardly compared. This was too slow to tackle
+even, for example [data/3332_inq.txt]. (Cf. [data/README].) So, instead, I'm now
+trying a divide-and-conquer approach, now explained.
+
+Consider the input to be N lines. From the ground truths, we know that the
+number of non-equivalent rows, which we will denote k, is such that k << N.
+So consider a subset of L lines (k << L < N). Without loss of generality, take
+N/L to be an integer. For each subset L, at most k lines are non-equivalent.
+We consider a "reduction" within each block, and then comparisons across
+blocks: if there are P permutations to be considered (see [doc/proposal.pdf]
+for details) this means, worst-case scenario, less than
+
+    (N/L)L²P + (N/2L)k²P + (N/4L)k²P + ... + (N/L)/2^log(N/L) k²P 
+  = (N/L)L²P + (N/L)k²P(1 - L/N)
+  = NP[L + k²/L (1 - L/N)]                                              (1)
+
+operations. Compare this with the "straightforward" mode of operation, where
+
+    N²P                                                                 (2)
+
+operations are required. Clearly, the divide-and-conquer approach has an
+advantage as long as the term in square brackets in (1) is significantly smaller
+than N.
+
+In pseudo-code:
+
+    block_size is provided
+    unique := []
+    sizes := []
+
+    base round:
+        # distribute loop
+        for block of rows of size block_size:
+            local_unique := unique rows in local block
+            push local_unique to unique
+            push len(local_unique) to sizes
+
+    reductions:
+        while len(sizes) > 1:
+            unique' := []
+            sizes' := []
+
+            # distribute loop
+            for pair of blocks in (unique, sizes): # Odd ones out are ignored
+                local := concatenation of pair of blocks
+                local_unique = unique rows in local
+                push local_unique to unique'
+                push len(local_unique) to sizes'
+
+            unique := unique'
+            sizes := sizes'
+            
+    done
--- a/aux/split_data.py
+++ b/aux/split_data.py
@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+"""Splits an input file into random chunks.
+
+Usage:
+    split_data <input file> <chunks>
+
+See [root]/data/splits/README for a rationale.
+"""
+
+import os
+import argparse
+
+
+CHUNK_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/splits"))
+
+
+def get_args():
+    doclines = __doc__.splitlines()
+    description = doclines[0]
+    epilog = "\n".join(doclines[1:])
+
+    parser = argparse.ArgumentParser(description=description, epilog=epilog)
+    parser.add_argument("<input file>", type=str, help="Input file to split.")
+    parser.add_argument("<chunks>", type=int, help="Number of chunks to produce.")
+    args = vars(parser.parse_args())
+    return args
+
+
+def chunk_fpath(source_path, chunk_i, chunk_total):
+    in_basename = os.path.basename(source_path)
+    out_basename = f"{chunk_total}_{chunk_i}_{in_basename}"
+    return os.path.join(CHUNK_DIR, out_basename)
+
+
+def main():
+    args = get_args()
+    source_path = args["<input file>"]
+    chunk_count = args["<chunks>"]
+
+    with open(source_path, "r") as source_file:
+        line_count = 0
+        for _line in source_file:
+            line_count += 1
+
+        chunk_len = line_count // chunk_count
+
+        source_file.seek(0)
+
+        for chunk_i in range(chunk_count - 1):
+            outfile_name = chunk_fpath(source_path, chunk_i, chunk_count)
+            print(f'Writing {outfile_name}...')
+            with open(outfile_name, "w") as out_file:
+                chunk_i_len = 0
+                for line in source_file:
+                    out_file.write(line)
+
+                    chunk_i_len += 1
+                    if chunk_i_len >= chunk_len:
+                        break
+
+        if line_count % chunk_len != 0:
+            outfile_name = chunk_fpath(source_path, chunk_count - 1, chunk_count)
+            print(f'Writing {outfile_name}...')
+            with open(outfile_name, "w") as out_file:
+                for line in source_file:
+                    out_file.write(line)
+
+
+if __name__ == "__main__":
+    main()
--- a/data/.gitignore
+++ b/data/.gitignore
@ -0,0 +1,2 @@
+splits/
+!splits/.gitinclude
--- a/make.py
+++ b/make.py
@ -27,7 +27,7 @@ EXE_NAME = "main." + ("release" if ("RELEASE" in os.environ) else "debug") + ".e
 ROOT = os.path.dirname(os.path.realpath(__file__))

 if "RELEASE" in os.environ:
-    COMPILE_FLAGS[COMPILE_FLAGS.index("-O0")] = "-Ofast"
+    COMPILE_FLAGS[COMPILE_FLAGS.index("-O0")] = "-O3"
    COMPILE_FLAGS[COMPILE_FLAGS.index("-g3")] = "-g"
    COMPILE_FLAGS.append("-DRELEASE")
    LINK_FLAGS.append("-DRELEASE")
--- a/src/main.c
+++ b/src/main.c
@ -135,6 +135,92 @@ arg_error:
  exit(EXIT_FAILURE);
 }

+static _Bool RowsEquivalent(
+    const size_t a_in, const size_t b_in, const size_t a_out,
+    const size_t b_out, size_t row_len, data_t *p_buf, data_t *cg_buf,
+    permutation_generator_t *a_in_perm, permutation_generator_t *b_in_perm,
+    permutation_generator_t *a_out_perms, permutation_generator_t *b_out_perms,
+    const data_t *restrict lhs, const data_t *restrict rhs) {
+  PermutationReset(a_in_perm);
+  PermutationReset(b_in_perm);
+  for (size_t i = 0; i < a_in; i++) {
+    PermutationReset(a_out_perms + i);
+  }
+  for (size_t i = 0; i < b_in; i++) {
+    PermutationReset(b_out_perms + i);
+  }
+
+  while (!a_in_perm->exhausted) {
+    PermutationReset(b_in_perm);
+    while (!b_in_perm->exhausted) {
+      ResetConditionalPermutations(a_out_perms, a_in);
+      while (!a_out_perms[a_in - 1].exhausted) {
+        ResetConditionalPermutations(b_out_perms, b_in);
+        while (!b_out_perms[b_in - 1].exhausted) {
+          // Compare the two rows
+
+          FromCgToP(a_out, b_out, a_in, b_in, rhs, p_buf,
+                    a_in_perm->permutation, b_in_perm->permutation, a_out_perms,
+                    b_out_perms);
+
+          {
+            FromPToCg(a_out, b_out, a_in, b_in, cg_buf, p_buf);
+            _Bool equivalent = 1;
+            for (size_t i = row_len; i > 0; i--) {
+              if (lhs[i - 1] != cg_buf[i - 1]) {
+                equivalent = 0;
+                break;
+              }
+            }
+
+            if (equivalent) {
+              return 1;
+            }
+          }
+
+          // If the number of output labels are the same, and the number
+          // of input labels is also the same, we can also check for
+          // equality under party swapping. I don't expect this
+          // conditional to be very penalizing because it's very
+          // predictable.
+          if (a_in == b_in && a_out == b_out) {
+            // Use the results in p_buf
+            PSwapParties(a_out, a_in, p_buf);
+            FromPToCg(a_out, b_out, a_in, b_in, cg_buf, p_buf);
+            _Bool equivalent = 1;
+            for (size_t i = row_len; i > 0; i--) {
+              if (lhs[i - 1] != cg_buf[i - 1]) {
+                equivalent = 0;
+                break;
+              }
+            }
+
+            if (equivalent) {
+              return 1;
+            }
+          }
+          AdvanceConditionalPermutations(b_out_perms, b_in);
+        }
+
+        AdvanceConditionalPermutations(a_out_perms, a_in);
+      }
+
+      PermutationNext(b_in_perm);
+    }
+
+    PermutationNext(a_in_perm);
+  }
+  
+  return 0;
+}
+
+typedef struct {
+  // The updated pointer to the vector with the indices of the unique rows.
+  size_t *unique;
+  // The number of elements pushed to the vector.
+  size_t count;
+} reduction_result_t;
+
 // Calculates inequivalent rows out of a given block of the rows. A block is a
 // contiguous subset of rows, with indices
 // `block_start..(block_start+row_count)`.
@ -146,7 +232,6 @@ arg_error:
 //    a_out       Number of outputs for B
 //    matrix      The matrix whose rows are to be examined
 //    row_len     Length of a matrix row
-//    seen        Boolean buffer of size at least `row_count`
 //    p_buf       data_t buffer capable of holding a P representation row
 //    cg_buf      data_t buffer capable of holding a CG representation row
 //    a_in_perm   permutation_generator_t for permutations of A's input
@ -155,119 +240,83 @@ arg_error:
 //    b_out_perms permutation_generator_t* array for permutations of B's output
 //    block_start Where to start counting the indices from.
 //    row_count   Length of block
+//    unique      size_t stbds vector pointer
 //
-// After the function call, `seen[0..row_count]` holds whether each row is
-// redundant to a previous row or not.
+// The vector `unique` is only pushed to; it's the user's responsibility to
+// allocate or free it as needed.
 //
-// `seen`, `p_buf`, and `cg_buf` WILL be clobbered. They don't need to be
-// initialized, only allocated. It's the caller's responsibility to free these
-// buffers if applicable.
+// `row_count` MUST be greater than 0, otherwise behaviour is undefined.
+//
+// `p_buf`, and `cg_buf` WILL be clobbered. They don't need to be initialized,
+// only allocated. It's the caller's responsibility to free these buffers if
+// applicable.
 //
 // The permutation generators are expected to be correctly initialized; this
 // function only resets the permutation generators, it does NOT allocate them
 // or free them.
-static void UniqueInBlock(const size_t a_in, const size_t b_in,
-                          const size_t a_out, const size_t b_out,
-                          const matrix_t *matrix, size_t row_len, _Bool *seen,
-                          data_t *p_buf, data_t *cg_buf,
-                          permutation_generator_t *a_in_perm,
-                          permutation_generator_t *b_in_perm,
-                          permutation_generator_t *a_out_perms,
-                          permutation_generator_t *b_out_perms,
-                          const size_t block_start, const size_t row_count) {
-  for (size_t i = 0; i < row_count; i++) {
-    seen[i] = 0;
+static reduction_result_t UniqueInBlock(
+    const size_t a_in, const size_t b_in, const size_t a_out,
+    const size_t b_out, const matrix_t *matrix, size_t row_len, data_t *p_buf,
+    data_t *cg_buf, permutation_generator_t *a_in_perm,
+    permutation_generator_t *b_in_perm, permutation_generator_t *a_out_perms,
+    permutation_generator_t *b_out_perms, const size_t block_start,
+    const size_t row_count, size_t *unique) {
+  size_t *remaining = NULL;
+  size_t *remaining_swap = NULL;
+  size_t push_count = 0;
+
+  // First round: perform the first split of equivalence classes.
+  {
+    stbds_arrput(unique, block_start);
+    push_count++;
+    data_t *root = matrix->head + block_start * matrix->row_len;
+
+    for (size_t rhs_i = 1; rhs_i < row_count; rhs_i++) {
+      data_t *rhs = matrix->head + (block_start + rhs_i) * matrix->row_len;
+      _Bool equivalent = RowsEquivalent(a_in, b_in, a_out, b_out, row_len,
+                                        p_buf, cg_buf, a_in_perm, b_in_perm,
+                                        a_out_perms, b_out_perms, root, rhs);
+      if (!equivalent) {
+        stbds_arrput(remaining, rhs_i);
+      }
+    }
  }

-  for (size_t lhs_i = 0; lhs_i < row_count - 1; lhs_i++) {
-    if (seen[lhs_i]) {
-      continue;
-    }
+  // Remaining rounds: perform reduction
+  {
+    while (stbds_arrlenu(remaining) > 0) {
+      size_t lhs_i = stbds_arrpop(remaining);
+      stbds_arrput(unique, block_start + lhs_i);
+      push_count++;
+      data_t *lhs = matrix->head + (block_start + lhs_i) * matrix->row_len;

-    data_t *lhs = matrix->head + (block_start + lhs_i) * matrix->row_len;
+      for (size_t i = 0; i < stbds_arrlenu(remaining); i++) {
+        size_t rhs_i = remaining[i];
+        data_t *rhs = matrix->head + (block_start + rhs_i) * matrix->row_len;

-    for (size_t rhs_i = lhs_i + 1; rhs_i < row_count; rhs_i++) {
-      if (seen[rhs_i]) {
-        continue;
-      }
-
-      data_t *rhs = matrix->head + (block_start + rhs_i) * matrix->row_len;
-
-      PermutationReset(a_in_perm);
-      PermutationReset(b_in_perm);
-      for (size_t i = 0; i < a_in; i++) {
-        PermutationReset(a_out_perms + i);
-      }
-      for (size_t i = 0; i < b_in; i++) {
-        PermutationReset(b_out_perms + i);
-      }
-
-      while (!a_in_perm->exhausted) {
-        PermutationReset(b_in_perm);
-        while (!b_in_perm->exhausted) {
-          ResetConditionalPermutations(a_out_perms, a_in);
-          while (!a_out_perms[a_in - 1].exhausted) {
-            ResetConditionalPermutations(b_out_perms, b_in);
-            while (!b_out_perms[b_in - 1].exhausted) {
-              // Compare the two rows
-
-              FromCgToP(a_out, b_out, a_in, b_in, rhs, p_buf,
-                        a_in_perm->permutation, b_in_perm->permutation,
-                        a_out_perms, b_out_perms);
-
-              {
-                FromPToCg(a_out, b_out, a_in, b_in, cg_buf, p_buf);
-                _Bool equivalent = 1;
-                for (size_t i = row_len; i > 0; i--) {
-                  if (lhs[i - 1] != cg_buf[i - 1]) {
-                    equivalent = 0;
-                    break;
-                  }
-                }
-
-                if (equivalent) {
-                  seen[rhs_i] = 1;
-                  goto skip_permutations;
-                }
-              }
-
-              // If the number of output labels are the same, and the number
-              // of input labels is also the same, we can also check for
-              // equality under party swapping. I don't expect this
-              // conditional to be very penalizing because it's very
-              // predictable.
-              if (a_in == b_in && a_out == b_out) {
-                // Use the results in p_buf
-                PSwapParties(a_out, a_in, p_buf);
-                FromPToCg(a_out, b_out, a_in, b_in, cg_buf, p_buf);
-                _Bool equivalent = 1;
-                for (size_t i = row_len; i > 0; i--) {
-                  if (lhs[i - 1] != cg_buf[i - 1]) {
-                    equivalent = 0;
-                    break;
-                  }
-                }
-
-                if (equivalent) {
-                  seen[rhs_i] = 1;
-                  goto skip_permutations;
-                }
-              }
-              AdvanceConditionalPermutations(b_out_perms, b_in);
-            }
-
-            AdvanceConditionalPermutations(a_out_perms, a_in);
-          }
-
-          PermutationNext(b_in_perm);
+        _Bool equivalent = RowsEquivalent(a_in, b_in, a_out, b_out, row_len,
+                                          p_buf, cg_buf, a_in_perm, b_in_perm,
+                                          a_out_perms, b_out_perms, lhs, rhs);
+        if (!equivalent) {
+          stbds_arrput(remaining_swap, rhs_i);
        }
-
-        PermutationNext(a_in_perm);
      }

-    skip_permutations:;
-    }  // For loop over rhs_i
-  }    // For loop over lhs_i
+      stbds_arrfree(remaining);
+      remaining = remaining_swap;
+      remaining_swap = NULL;
+    }
+  }
+
+  stbds_arrfree(remaining);
+  stbds_arrfree(remaining_swap);
+
+  reduction_result_t result = {
+      .unique = unique,
+      .count = push_count,
+  };
+
+  return result;
 }

 // Calculates inequivalent rows out of a given subset of the rows. The subset
@ -280,7 +329,6 @@ static void UniqueInBlock(const size_t a_in, const size_t b_in,
 //    a_out             Number of outputs for B
 //    matrix            The matrix whose rows are to be examined
 //    row_len           Length of a matrix row
-//    seen              Boolean buffer of size at least `second_row_count`
 //    p_buf             data_t buffer capable of holding a P representation row
 //    cg_buf            data_t buffer capable of holding a CG representation row
 //    a_in_perm         permutation_generator_t for permutations of A's input
@ -293,10 +341,13 @@ static void UniqueInBlock(const size_t a_in, const size_t b_in,
 //    first_row_count   Length of first_row_idxs[]
 //    second_row_idxs   Second array of indices of rows to consider
 //    second_row_count  Length of second_row_idxs[]
+//    unique            size_t stbds vector pointer
 //
+// The vector `unique` is only pushed to; it's the user's responsibility to
+// allocate or free it as needed.
 //
-// After the function call, `seen[..]` holds whether each row in the second
-// block is redundant to a row in the first block.
+// `first_row_count` and `second_row_count` MUST be greater than 0, otherwise
+// behaviour is undefined.
 //
 // `seen`, `p_buf`, and `cg_buf` WILL be clobbered. They don't need to be
 // initialized, only allocated. It's the caller's responsibility to free these
@ -305,104 +356,68 @@ static void UniqueInBlock(const size_t a_in, const size_t b_in,
 // The permutation generators are expected to be correctly initialized; this
 // function only resets the permutation generators, it does NOT allocate them
 // or free them.
-static void UniqueInSubsetPair(
+static reduction_result_t UniqueInSubsetPair(
    const size_t a_in, const size_t b_in, const size_t a_out,
    const size_t b_out, const matrix_t *matrix, const size_t row_len,
-    _Bool *seen, data_t *p_buf, data_t *cg_buf,
-    permutation_generator_t *a_in_perm, permutation_generator_t *b_in_perm,
-    permutation_generator_t *a_out_perms, permutation_generator_t *b_out_perms,
-    const size_t *first_row_idxs, const size_t first_row_count,
-    const size_t *second_row_idxs, const size_t second_row_count) {
-  for (size_t i = 0; i < second_row_count; i++) {
-    seen[i] = 0;
+    data_t *p_buf, data_t *cg_buf, permutation_generator_t *a_in_perm,
+    permutation_generator_t *b_in_perm, permutation_generator_t *a_out_perms,
+    permutation_generator_t *b_out_perms, const size_t *first_row_idxs,
+    const size_t first_row_count, const size_t *second_row_idxs,
+    const size_t second_row_count, size_t *unique) {
+  // This one is a little more complicated:
+  // Every two elements within `first_row_idxs` are inequivalent, by design, and
+  //  likewise for `second_row_idxs`. This means that if a row A in `first_row`
+  //  is equivalent to a row A' in `second_row`, it cannot be equivalent to any
+  //  other row in `second_row` by transitivity of the equivalence.
+
+  size_t push_count = 0;
+  size_t *classes = NULL;
+
+  for (size_t i = 0; i < first_row_count; i++) {
+    size_t lhs_i = first_row_idxs[i];
+    stbds_arrput(unique, lhs_i);
+    push_count++;
+    stbds_arrput(classes, lhs_i);
  }

-  for (size_t lhs_i = 0; lhs_i < first_row_count; lhs_i++) {
-    data_t *lhs = matrix->head + first_row_idxs[lhs_i] * matrix->row_len;
-    for (size_t rhs_i = 0; rhs_i < second_row_count; rhs_i++) {
-      if (seen[rhs_i]) {
-        continue;
+  for (size_t i = 0; i < second_row_count; i++) {
+    if (stbds_arrlenu(classes) == 0) {
+      break;
+    }
+
+    size_t rhs_i = second_row_idxs[i];
+    data_t *rhs = matrix->head + rhs_i * matrix->row_len;
+
+    _Bool equivalent;
+
+    for (size_t i = 0; i < stbds_arrlenu(classes); i++) {
+      size_t lhs_i = classes[i];
+      data_t *lhs = matrix->head + lhs_i * matrix->row_len;
+
+      equivalent = RowsEquivalent(a_in, b_in, a_out, b_out, row_len, p_buf,
+                                  cg_buf, a_in_perm, b_in_perm, a_out_perms,
+                                  b_out_perms, lhs, rhs);
+
+      if (equivalent) {
+        stbds_arrdel(classes, i);
+        break;
      }
+    }

-      data_t *rhs = matrix->head + second_row_idxs[rhs_i] * matrix->row_len;
+    if (!equivalent) {
+      stbds_arrput(unique, rhs_i);
+      push_count++;
+    }
+  }

-      // This may be unnecessary the first time around, but after
-      // goto skip_permutations, the generators are left in an undefined state.
-      PermutationReset(a_in_perm);
-      PermutationReset(b_in_perm);
-      for (size_t i = 0; i < a_in; i++) {
-        PermutationReset(a_out_perms + i);
-      }
-      for (size_t i = 0; i < b_in; i++) {
-        PermutationReset(b_out_perms + i);
-      }
+  stbds_arrfree(classes);

-      while (!a_in_perm->exhausted) {
-        PermutationReset(b_in_perm);
-        while (!b_in_perm->exhausted) {
-          ResetConditionalPermutations(a_out_perms, a_in);
-          while (!a_out_perms[a_in - 1].exhausted) {
-            ResetConditionalPermutations(b_out_perms, b_in);
-            while (!b_out_perms[b_in - 1].exhausted) {
-              // Compare the two rows
-              FromCgToP(a_out, b_out, a_in, b_in, rhs, p_buf,
-                        a_in_perm->permutation, b_in_perm->permutation,
-                        a_out_perms, b_out_perms);
+  reduction_result_t result = {
+      .unique = unique,
+      .count = push_count,
+  };

-              {
-                FromPToCg(a_out, b_out, a_in, b_in, cg_buf, p_buf);
-                _Bool equivalent = 1;
-                for (size_t i = row_len; i > 0; i--) {
-                  if (lhs[i - 1] != cg_buf[i - 1]) {
-                    equivalent = 0;
-                    break;
-                  }
-                }
-
-                if (equivalent) {
-                  seen[rhs_i] = 1;
-                  goto skip_permutations;
-                }
-              }
-
-              // If the number of output labels are the same, and the number
-              // of input labels is also the same, we can also check for
-              // equality under party swapping. I don't expect this
-              // conditional to be very penalizing because it's very
-              // predictable.
-              if (a_in == b_in && a_out == b_out) {
-                // Use the results in p_buf
-                PSwapParties(a_out, a_in, p_buf);
-                FromPToCg(a_out, b_out, a_in, b_in, cg_buf, p_buf);
-                _Bool equivalent = 1;
-                for (size_t i = row_len; i > 0; i--) {
-                  if (lhs[i - 1] != cg_buf[i - 1]) {
-                    equivalent = 0;
-                    break;
-                  }
-                }
-
-                if (equivalent) {
-                  seen[rhs_i] = 1;
-                  goto skip_permutations;
-                }
-              }
-
-              AdvanceConditionalPermutations(b_out_perms, b_in);
-            }
-
-            AdvanceConditionalPermutations(a_out_perms, a_in);
-          }
-
-          PermutationNext(b_in_perm);
-        }
-
-        PermutationNext(a_in_perm);
-      }
-
-    skip_permutations:;
-    }  // For loop over rhs_i
-  }    // For loop over lhs_i
+  return result;
 }

 int main(int argc, char *argv[]) {
@ -441,7 +456,6 @@ int main(int argc, char *argv[]) {
  size_t *ends =
      NULL;  // Bin i contains elements with indices ends[i-1]..ends[i]

-  // TODO: Optimization: only one lock is needed
  pthread_rwlockattr_t attr;
  pthread_rwlockattr_init(&attr);
  pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
@ -452,12 +466,6 @@ int main(int argc, char *argv[]) {
    shared(stderr, args, a_out, b_out, a_in, b_in, row_len, matrix, row_count, \
               bin_size, bin_count, unique, ends, vector_lock)
  {
-    _Bool *seen = malloc(bin_size * sizeof(_Bool));
-    if (seen == NULL) {
-      fprintf(stderr, "Failed to allocate equivalence flag string. Aborting.");
-      exit(EXIT_FAILURE);
-    }
-
    data_t *p_buf = malloc(a_out * b_out * a_in * b_in * sizeof(data_t));
    data_t *cg_buf = malloc((((a_out - 1) * (b_out - 1) * a_in * b_in +
                              (a_out - 1) * a_in + (b_out - 1) * b_in) +
@ -508,23 +516,19 @@ int main(int argc, char *argv[]) {
        bin_i_size = bin_size;
      }

-      UniqueInBlock(a_in, b_in, a_out, b_out, &matrix, row_len, seen, p_buf,
-                    cg_buf, &a_in_perm, &b_in_perm, a_out_perms, b_out_perms,
-                    block_i_start, bin_i_size);
+      reduction_result_t result =
+          UniqueInBlock(a_in, b_in, a_out, b_out, &matrix, row_len, p_buf,
+                        cg_buf, &a_in_perm, &b_in_perm, a_out_perms,
+                        b_out_perms, block_i_start, bin_i_size, local_unique);

-      size_t unique_count = 0;
-      for (size_t i = 0; i < bin_i_size; i++) {
-        if (!seen[i]) {
-          stbds_arrput(local_unique, block_i_start + i);
-          unique_count++;
-        }
-      }
+      local_unique = result.unique;
+      size_t push_count = result.count;
      size_t ends_len = stbds_arrlenu(local_ends);
      if (ends_len == 0) {
-        stbds_arrput(local_ends, unique_count);
+        stbds_arrput(local_ends, push_count);
      } else {
        size_t last_end = stbds_arrlast(local_ends);
-        stbds_arrput(local_ends, last_end + unique_count);
+        stbds_arrput(local_ends, last_end + push_count);
      }

      fprintf(stderr, ".");
@ -587,28 +591,18 @@ int main(int argc, char *argv[]) {
        second_block_idxs = unique + ends[2 * pair_i];
        second_block_len = ends[2 * pair_i + 1] - ends[2 * pair_i];

-        UniqueInSubsetPair(a_in, b_in, a_out, b_out, &matrix, row_len, seen,
-                           p_buf, cg_buf, &a_in_perm, &b_in_perm, a_out_perms,
-                           b_out_perms, first_block_idxs, first_block_len,
-                           second_block_idxs, second_block_len);
+        reduction_result_t result = UniqueInSubsetPair(
+            a_in, b_in, a_out, b_out, &matrix, row_len, p_buf, cg_buf,
+            &a_in_perm, &b_in_perm, a_out_perms, b_out_perms, first_block_idxs,
+            first_block_len, second_block_idxs, second_block_len, local_unique);

-        {
-          size_t reduction_size = first_block_len;
-          for (size_t i = 0; i < first_block_len; i++) {
-            stbds_arrput(local_unique, first_block_idxs[i]);
-          }
-          for (size_t i = 0; i < second_block_len; i++) {
-            if (!seen[i]) {
-              stbds_arrput(local_unique, second_block_idxs[i]);
-              reduction_size++;
-            }
-          }
-          if (stbds_arrlenu(local_ends) == 0) {
-            stbds_arrput(local_ends, reduction_size);
-          } else {
-            size_t last_end = stbds_arrlast(local_ends);
-            stbds_arrput(local_ends, last_end + reduction_size);
-          }
+        local_unique = result.unique;
+        size_t reduction_size = result.count;
+        if (stbds_arrlenu(local_ends) == 0) {
+          stbds_arrput(local_ends, reduction_size);
+        } else {
+          size_t last_end = stbds_arrlast(local_ends);
+          stbds_arrput(local_ends, last_end + reduction_size);
        }

      }  // End of loop over pairs
@ -632,7 +626,7 @@ int main(int argc, char *argv[]) {
            stbds_arrput(local_ends, odd_block_len);
          }
        }
-        
+
        stbds_arrfree(unique);
        stbds_arrfree(ends);
        unique = NULL;
@ -729,7 +723,6 @@ int main(int argc, char *argv[]) {
    free(b_out_perms);
    free(cg_buf);
    free(p_buf);
-    free(seen);
  }  // End of omp parallel region

  // Free all the memory so the sanitizer is happy.
Author	SHA1	Message	Date
Miguel M	9671205c15	Aux script to split input data files into chunks	2023-06-06 16:04:30 +01:00
Miguel M	0ea106abcf	(Hopeful) improvements by using transitivity of equivalence	2023-06-06 16:03:09 +01:00