Threaded optical flow

Hi I am trying to make a threaded optical flow system, the setup is that I will use a quad split hardware device to combine 4 video feeds into one, I then split the image across four ofpixel objects and send each quadrant to its own thread for the optical flow and then use the results in my main thread.

Using some other threaded examples I found I was able to make myself a simple threaded optical flow class using ofxCV. The issue I am having is trying to find an efficient (ie fastest) way of passing the updated pixels to each thread every frame. If I make a version where each thread contains its own image and only outputs the flow data then it works great and 60 fps, when I add the pixels from the main thread to each of the four optical flow threads I drop to 8 fps.

I have been reading up on threading and OF, but I am not sure what the bottleneck is here, to make a comparison, if I use the same total size of pixels in optical flow in a single thread i get the same speed as with this in 4 threads (Ie a single thread with a 640480 image as opposed to 4 320*240 images threaded).

I am using ofxRuiThread, here is my class

#pragma once

    #include "ofMain.h"

    #include "ofxRuiThread.h"


    class threadedOpticalFlow:public ofxRuiThread
    {
    public:
        
        
        ofxCv::FlowFarneback fb;
        ofxCv::FlowPyrLK lk;
        ofxCv::Flow* curFlow;

        
        threadedOpticalFlow()
        {

            curFlow = &fb;
        }
        
        void updateThread(ofPixels inputImage)
        {
            
                
                if(usefb) {
                    curFlow = &fb;
                    fb.setPyramidScale(fbPyrScale);
                    fb.setNumLevels(fbLevels);
                    fb.setWindowSize(fbWinSize);
                    fb.setNumIterations(fbIterations);
                    fb.setPolyN(fbPolyN);
                    fb.setPolySigma(fbPolySigma);
                    fb.setUseGaussian(fbUseGaussian);
                } else {
                    curFlow = &lk;
                    lk.setMaxFeatures(lkMaxFeatures);
                    lk.setQualityLevel(lkQualityLevel);
                    lk.setMinDistance(lkMinDistance);
                    lk.setWindowSize(lkWinSize);
                    lk.setMaxLevel(lkMaxLevel);
                }
                
                // you can use Flow polymorphically
                curFlow->calcOpticalFlow(inputImage);
            }
        
        
        vector<ofVec2f> getMotionFromThread()
        {
            if(usefb) {
              //  return fb.getAverageFlow();
                //return  fb.getMotion();
            } else {
                return lk.getMotion();
            }
            
        }

And in my app I have this

//--------------------------------------------------------------
void ofApp::setup(){
    camWidth 		= 640;	// try to grab at this size.
    camHeight 		= 480;
    quadWidth = camWidth/2;
    quadHeight = camHeight/2;
 
    vidGrabber.setDeviceID(2);
    vidGrabber.setDesiredFrameRate(60);
    vidGrabber.initGrabber(camWidth,camHeight);
    
    for (int i =0; i<4; i++) {
        videoQuads[i].allocate(quadWidth, quadHeight, 3);
        videoTextures[i].allocate(quadWidth,quadHeight, GL_RGB);
        
    }
    
    ofSetVerticalSync(true);
}

//--------------------------------------------------------------
void ofApp::update(){
    ofBackground(100,100,100);
    
    vidGrabber.update();
    
    if (vidGrabber.isFrameNew()){
        for (int i = 0; i<4; i++) {
            switch (i) {
                case 0:
                    vidGrabber.getPixels().cropTo(videoQuads[i], 0, 0, quadWidth, quadHeight);
                    break;
                case 1:
                    vidGrabber.getPixels().cropTo(videoQuads[i], quadWidth, 0, quadWidth, quadHeight);
                    break;
                case 2:
                    vidGrabber.getPixels().cropTo(videoQuads[i], 0, quadHeight, quadWidth, quadHeight);
                    break;
                case 3:
                    vidGrabber.getPixels().cropTo(videoQuads[i], quadWidth, quadHeight, quadWidth, quadHeight);
                    break;
            }
            threadedOpticalFlow[i].updateThread(videoQuads[i]);
            videoTextures[i].loadData(videoQuads[i].getData(), quadWidth,quadHeight, GL_RGB);
        }
    }
}

//--------------------------------------------------------------
void ofApp::draw(){

    for (int i=0; i<4; i++) {
        ofPushView();
        ofTranslate(quadWidth*i, 0);
        videoTextures[i].draw(0, 0, quadWidth,quadHeight);
        threadedOpticalFlow[i].curFlow->draw();
        ofPopView();
    }
    
    ofSetHexColor(0xffffff);
    ofDrawBitmapString(ofToString(ofGetFrameRate()), 10, 1000);
}