Writing a DeferredCompositeOperator

public final class AssignUniqueIDs extends DeferredCompositeOperator implements RecordPipelineOperator {

    //required: declare any input ports
    private final RecordPort input= newRecordInput("input");
    //required: declare any output ports
    private final RecordPort output= newRecordOutput("output");
    //required: declare any properties
    private String fieldName= "id";

    /**
     * Default constructor, uses a fieldName of "id" by default
     */
    //required: all operators must have a default constructor
    public AssignUniqueIDs() {
    }

    /**
     * Create a new AssignUniqueIDs, specifying the name of the id field to output
     * @param fieldName the name of the id output field
     */
    //optional: convenience constructor
    public AssignUniqueIDs(String fieldName) {
        setFieldName(fieldName);
    }

    ///////////////////////////////////////////////////
    //
    // Required: getters and setters for each property
    //
    ///////////////////////////////////////////////////

    /**
     * Returns the name of the id output field
     * @return the name of the id output field
     */
    public String getFieldName() {
        return fieldName;
    }

    /**
     * Sets the name of the id output field
     * @param fieldName the name of the id output field
     */
    public void setFieldName(String fieldName) {
        this.fieldName = fieldName;
    }

    ///////////////////////////////////////////////////
    //
    // Required: getters for each port
    //
    ///////////////////////////////////////////////////

    @Override
    public RecordPort getInput() {
        return input;
    }

    @Override
    public RecordPort getOutput() {
        return output;
    }

    @Override
    protected void computeMetadata(StreamingMetadataContext ctx) {

        //best practice: perform validation first
        PropertyUtil.checkNotEmpty("fieldName", fieldName);

        //required: declare parallelizability
        //  in this case we set are parallelism based on source
        ctx.parallelize(NEGOTIATE_BASED_ON_SOURCE);

        //required: declare output type
        //  in this case our output type is the input type plus an additional field
        //  containing the id
        RecordTokenType outputType= TypeUtil.mergeTypes(input.getType(ctx), record(LONG(fieldName)));
        output.setType(ctx, outputType);

        //best practice: define output ordering/distribution
        //  in this case we are appending a field to the source so
        //  ordering and distribution are preserved
        output.setOutputDataOrdering(ctx, input.getSourceDataOrdering(ctx));
        output.setOutputDataDistribution(ctx, input.getSourceDataDistribution(ctx));
    }

    @Override
    protected void compose(DeferredCompositionContext ctx) {

        //get the partitionID information
        //(available at execution time and at deferred composition time)
        PartitionInstanceInfo info= ctx.getAssignment().getPartitionInfo();

        //define startValue to be my partitionID
        long startValue= info.getPartitionID();

        //define stepSize to be the total number of partitions
        long stepSize= info.getPartitionCount();

        //derive a sequence with the given startValue and stepSize
        FieldDerivation derivation= FieldDerivation.derive(fieldName,
                Sequences.sequence(startValue, stepSize));

        //add the DeriveFields operator to the graph, connecting our input
        DeriveFields df= ctx.add(new DeriveFields(derivation));
        ctx.connect(input, df.getInput());

        //required: connect all output ports
        //  in this case we're connecting the output from derive fields to our output
        ctx.connect(df.getOutput(), output);
    }

}