1 op { 2 graph_op_name: "LogUniformCandidateSampler" 3 in_arg { 4 name: "true_classes" 5 description: <<END 6 A batch_size * num_true matrix, in which each row contains the 7 IDs of the num_true target_classes in the corresponding original label. 8 END 9 } 10 out_arg { 11 name: "sampled_candidates" 12 description: <<END 13 A vector of length num_sampled, in which each element is 14 the ID of a sampled candidate. 15 END 16 } 17 out_arg { 18 name: "true_expected_count" 19 description: <<END 20 A batch_size * num_true matrix, representing 21 the number of times each candidate is expected to occur in a batch 22 of sampled candidates. If unique=true, then this is a probability. 23 END 24 } 25 out_arg { 26 name: "sampled_expected_count" 27 description: <<END 28 A vector of length num_sampled, for each sampled 29 candidate representing the number of times the candidate is expected 30 to occur in a batch of sampled candidates. If unique=true, then this is a 31 probability. 32 END 33 } 34 attr { 35 name: "num_true" 36 description: <<END 37 Number of true labels per context. 38 END 39 } 40 attr { 41 name: "num_sampled" 42 description: <<END 43 Number of candidates to randomly sample. 44 END 45 } 46 attr { 47 name: "unique" 48 description: <<END 49 If unique is true, we sample with rejection, so that all sampled 50 candidates in a batch are unique. This requires some approximation to 51 estimate the post-rejection sampling probabilities. 52 END 53 } 54 attr { 55 name: "range_max" 56 description: <<END 57 The sampler will sample integers from the interval [0, range_max). 58 END 59 } 60 attr { 61 name: "seed" 62 description: <<END 63 If either seed or seed2 are set to be non-zero, the random number 64 generator is seeded by the given seed. Otherwise, it is seeded by a 65 random seed. 66 END 67 } 68 attr { 69 name: "seed2" 70 description: <<END 71 An second seed to avoid seed collision. 72 END 73 } 74 summary: "Generates labels for candidate sampling with a log-uniform distribution." 75 description: <<END 76 See explanations of candidate sampling and the data formats at 77 go/candidate-sampling. 78 79 For each batch, this op picks a single set of sampled candidate labels. 80 81 The advantages of sampling candidates per-batch are simplicity and the 82 possibility of efficient dense matrix multiplication. The disadvantage is that 83 the sampled candidates must be chosen independently of the context and of the 84 true labels. 85 END 86 } 87