check transfer  0.1
Check data transfer for SDAccell OpenCL application
tf_checktransferout.cpp
Go to the documentation of this file.
1 
2 #include <stdio.h>
3 #include "exceptinfo.h"
4 #include "tf_checktransferout.h"
5 
6 #include "table_engine.h"
7 
8 #include "utypes.h"
9 #include "ipc.h"
10 
11 #include "tf_device.h"
12 #include "parse_cmd.h"
13 #include <vector>
14 
15 
16 //!< Internal data for TF_CheckTransferOut
18 
19  cl_uint RowNumber; //!< Number of first row in the table
20  cl_uint BlockWr; //!< Count of written blocks
21  cl_uint BlockRd; //!< Count of read blocks
22  cl_uint BlockOk; //!< Count of correct blocks
23  cl_uint BlockError; //!< Count of incorrect blocks
24  cl_uint sizeBlock; //!< Size of block [bytes]
25  cl_int sizeOfuint16; //!< Size of block in 512-bit words
26  cl_uint Sig; //!< Signature for status buffer
27  float VelocityCurrent; //!< current speed (on 4 secund interval)
28  float VelocityAverage; //!< average speed (from test start)
29 
30  float VelocityCurMax; //!< maximum of VelocityCurrent
31  float VelocityCurMin; //!< minimum of VelocityCurrent
32 
33  cl_uint metricMode; //!< 0 - binary: 1MB=2^10 bytes, 1 - decimal: 1MB=10^6 bytes
34  cl_uint mbSize; //!< bytes count in 1MB
35 
36  clock_t startTick; //!< Number of start clock();
37  clock_t lastTick; //!< Number of last clock()
38  cl_uint lastBlock; //!< Number BlockWr for lastTick
39  float testTime; //!< Time from test start
40 
41 
42  time_t startTime; //!< time of start main test cycle
43  time_t lastTime; //!< time of last interval
44 
45  cl_ulong dataOut; //!< current data for output
46  cl_ulong dataExpect; //!< expect data from input
47 
48  char *kernelName; //!< kernel name
49 
50  TF_Device *pDevice; //!< OpenCL device and program
51 
52  cl::Kernel krnl_read; //!< OpenCL kernel for read data
53  cl::Kernel krnl_calculate; //!< OpenCL kernel for calculate
54 
55  cl::CommandQueue *q0; //!< Pointer to OpenCL command queue
56  cl::CommandQueue *q1; //!< Pointer to OpenCL command queue
57 
58  std::string deviceName; //!< OpenCL device name
59 
60  cl_uint *pBufOut[2]; //!< pointers to buffers in the host memory
61 
62  cl::Buffer *pBuffer[2]; //!< pointers to buffers in the device memory
63 
64  cl::Buffer *dpStatus; //!< pointer to status buffer in the device memory
65 
66  cl_uint *pStatus; //!< pointer to status buffer in the host memory
67 
68  cl_ulong flagGetStatus; //!< 1 - request for get status information
69 
70  cl_ulong8 arrayExpect; //!< expect values for data block
71 
73  {
74  BlockWr=0;
75  BlockRd=0;
76  BlockOk=0;
77  BlockError=0;
78  RowNumber=0;
79 
80  lastBlock = 0;
81  testTime = 0;
82 
83  dataOut=dataExpect=0xA0000000;
84 
85  kernelName = "check_cnt_m2a";
86 
87 
88  pBufOut[0]=NULL;
89  pBufOut[1]=NULL;
90 
91  pBuffer[0]=NULL;
92  pBuffer[1]=NULL;
93 
94  pStatus = NULL;
95  dpStatus = NULL;
96 
97  q0=NULL;
98  q1=NULL;
99 
100  flagGetStatus=0;
101  VelocityCurMax=0;
102  VelocityCurMin=0;
103  VelocityAverage=0;
104  VelocityCurrent=0;
105 
106  sizeOfuint16=0;
107  };
108 
110  {
111  }
112 
113 
114 };
115 
116 //! Constructor
117 /**
118  *
119  * \param argc Number of arguments
120  * \parma argv Pointer of argumnts
121  *
122  *
123  *
124  * arguments of command line:
125  *
126  * -size <n> : size block of kilobytes, default 64
127  * -metric <n> : 0 - binary: 1MB=2^10=1024*1024=1048576 bytes,
128  * 1 - decimal: 1MB=10^6=1000*1000=1000000 bytes,
129  * default 0
130  *
131  */
132 TF_CheckTransferOut::TF_CheckTransferOut( TableEngine *pTable, TF_Device *pDevice, int argc, char **argv) : TF_TestThread( pTable, argc, argv )
133 {
135 
136  td->sizeBlock = 1024 * GetFromCommnadLine( argc, argv, "-size", 64 );
137 
138  td->metricMode = GetFromCommnadLine( argc, argv, "-metric", 0 );
139  if( 0==td->metricMode )
140  td->mbSize = 1024*1024;
141  else
142  td->mbSize = 1000000;
143 
144  td->pDevice = pDevice;
145 
146 }
147 
148 //! Destructor
150 {
151 
152  free( td->pBufOut[0] ); td->pBufOut[0]=NULL;
153  free( td->pBufOut[1] ); td->pBufOut[1]=NULL;
154  free( td->pStatus ); td->pStatus=NULL;
155 
156 
157  delete td->pBuffer[0]; td->pBuffer[0]=NULL;
158  delete td->pBuffer[1]; td->pBuffer[1]=NULL;
159  delete td->dpStatus; td->dpStatus=NULL;
160 
161 
162  delete td; td=NULL;
163 }
164 
165 //! Prepare test
166 /**
167  * This function executed before main body of test.
168  *
169  *
170  */
172 {
173  printf( "\n");
174  printf( "TF_CheckTransferOut::%s\n\n", __FUNCTION__ );
175 
176  cl::Kernel krnl_read( td->pDevice->program, "check_read_input" );
177  td->krnl_read = krnl_read;
178 
179  cl::Kernel krnl_calculate( td->pDevice->program, "check_cnt_m2a" );
180  td->krnl_calculate = krnl_calculate;
181 
182 
183  td->sizeOfuint16 = td->sizeBlock/64; // count of words by 512 bits
184 
185 
186  printf( "Alloc host memory 2 x %d ", td->sizeBlock );
187  {
188  void *ptr=(void*)0xAAAA;
189  int err;
190  err = posix_memalign( &ptr, 4096, td->sizeBlock );
191  if( err )
192  throw except_info( "%s - memory allocation error ", __FUNCTION__);
193  td->pBufOut[0] = (cl_uint*) ptr;
194  //printf( "ptr=%p\n", ptr );
195 
196  err = posix_memalign( &ptr, 4096, td->sizeBlock );
197  if( err )
198  throw except_info( "%s - memory allocation error ", __FUNCTION__);
199  td->pBufOut[1] = (cl_uint*) ptr;
200 
201  //printf( "ptr=%p\n", ptr );
202 
203 
204 
205  err = posix_memalign( &ptr, 4096, 16384 );
206  if( err )
207  throw except_info( "%s - memory allocation error ", __FUNCTION__);
208  td->pStatus = (cl_uint*) ptr;
209 
210 
211  }
212  printf( " - Ok\n" );
213 
214  printf( "Alloc device memory 2 x %d (DDR0 & DDR1) ", td->sizeBlock );
215  {
216  cl::Buffer *pBuf;
217 
218  cl_mem_ext_ptr_t input_buffer_ext;
219 
220  input_buffer_ext.flags = XCL_MEM_DDR_BANK0;
221  input_buffer_ext.obj = NULL;
222  input_buffer_ext.param = 0;
223 
224  pBuf = new cl::Buffer( td->pDevice->context,
225  CL_MEM_READ_WRITE | CL_MEM_EXT_PTR_XILINX,
226  td->sizeBlock,
227  &input_buffer_ext
228  );
229  td->pBuffer[0] = pBuf;
230  }
231 
232  {
233  cl::Buffer *pBuf;
234 
235  cl_mem_ext_ptr_t input_buffer_ext;
236 
237  input_buffer_ext.flags = XCL_MEM_DDR_BANK0;
238  input_buffer_ext.obj = NULL;
239  input_buffer_ext.param = 0;
240 
241  pBuf = new cl::Buffer( td->pDevice->context,
242  CL_MEM_READ_WRITE | CL_MEM_EXT_PTR_XILINX,
243  td->sizeBlock,
244  &input_buffer_ext
245  );
246  td->pBuffer[1] = pBuf;
247  }
248 
249  {
250  cl::Buffer *pBuf;
251 
252  cl_mem_ext_ptr_t input_buffer_ext;
253 
254  input_buffer_ext.flags = XCL_MEM_DDR_BANK0;
255  input_buffer_ext.obj = NULL;
256  input_buffer_ext.param = 0;
257 
258  pBuf = new cl::Buffer( td->pDevice->context,
259  CL_MEM_READ_WRITE | CL_MEM_EXT_PTR_XILINX,
260  16384,
261  &input_buffer_ext
262  );
263  td->dpStatus = pBuf;
264 
265 
266  }
267 
268 
269  {
270  cl_int err0;
271  cl_int err1;
272 
273  cl_command_queue_properties properties0 = CL_QUEUE_PROFILING_ENABLE;
274  cl_command_queue_properties properties1 = CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
275 
276  td->q0 = new cl::CommandQueue(td->pDevice->context, td->pDevice->device, properties0, &err0 );
277  td->q1 = new cl::CommandQueue(td->pDevice->context, td->pDevice->device, properties1, &err1 );
278 
279  if( CL_SUCCESS != err0 )
280  throw except_info( "%s - Error for create CommandQueue q0; err=%d ", __FUNCTION__, err0 );
281 
282  if( CL_SUCCESS != err1 )
283  throw except_info( "%s - Error for create CommandQueue q1; err=%d ", __FUNCTION__, err1 );
284 
285  }
286  {
287  for( int ii=0; ii<512; ii++ )
288  {
289  td->pStatus[ii]=0;
290  }
291  td->pStatus[0]=0xBB66;
292 
293  cl_ulong val=td->dataOut;
294  cl_ulong *ptr = (cl_ulong *)&(td->pStatus[16]);
295 
296  // Set initial value
297  for( int ii=0; ii<8; ii++ )
298  {
299  *ptr++ = val++;
300  }
301 
302  // Set add const
303  for( int ii=0; ii<8; ii++ )
304  {
305  *ptr++=8;
306  }
307 
308  cl_int ret=td->q0->enqueueWriteBuffer(
309  *(td->dpStatus),
310  CL_TRUE,
311  0,
312  4096,
313  td->pStatus,
314  NULL,
315  NULL
316  );
317  }
318 
319  printf( " - Ok\n" );
320 
321 }
322 
323 //! Free any resource
325 {
326  printf( "TF_CheckTransferOut::%s\n", __FUNCTION__ );
327 
328  delete td->q0; td->q0=NULL;
329  delete td->q1; td->q1=NULL;
330 
331 
332 }
333 
334 //! Show results of test
336 {
337  printf( "\nTF_CheckTransferOut::%s\n\n", __FUNCTION__ );
338  //GetStatus();
339 
340  int flag_error=0;
341 
342  if( 0xAA55==td->Sig )
343  {
344  printf( "Sig=0xAA55 - Ok\n");
345  } else
346  {
347  printf( "Sig=0x%X - Error, expect 0xAA55\n", td->Sig );
348  flag_error=1;
349  }
350  printf( "BlockWr = %d\n", td->BlockWr );
351  printf( "BlockRd = %d\n", td->BlockRd );
352  printf( "BlockOK = %d\n", td->BlockOk );
353  printf( "BlockError = %d\n\n", td->BlockError );
354 
355  printf( "Size of block = %u \n\n", td->sizeBlock);
356 
357  printf( "Test time = %-.0f s\n\n", td->testTime);
358 
359  printf( "VelocityAverage = %10.1f MB/s\n", td->VelocityAverage );
360  printf( "VelocityCurrentMax = %10.1f MB/s\n", td->VelocityCurMax );
361  printf( "VelocityCurrentMin = %10.1f MB/s\n\n", td->VelocityCurMin );
362 
363  if( 0==td->metricMode)
364  printf( " 1 MB = 2^10 = 1024*1024 = 1048576 bytes\n");
365  else
366  printf( " 1 MB = 10^6 = 1000000 bytes\n");
367 
368 
369 
370  if( td->BlockOk!=td->BlockRd )
371  flag_error++;
372 
373  if( 0 == td->BlockRd )
374  flag_error++;
375 
376  if( 0 != td->BlockError )
377  flag_error++;
378 
379  if( 0==flag_error )
380  printf( "\nTest finished successfully\n\n" );
381  else
382  printf( "\nTest finished with errors\n\n" );
383 
384 }
385 
386 //! Show table during test executing
388 {
389  m_pTemplateEngine->SetValueTable( td->RowNumber, 1, (unsigned)td->BlockWr, "%10d" );
390  m_pTemplateEngine->SetValueTable( td->RowNumber, 2, (unsigned)td->BlockRd, "%10d" );
391 
392  m_pTemplateEngine->SetValueTable( td->RowNumber, 3, (unsigned)td->BlockOk, "%10d" );
393  m_pTemplateEngine->SetValueTable( td->RowNumber, 4, (unsigned)td->BlockError, "%10d" );
396 
397  m_pTemplateEngine->SetValueTable( td->RowNumber, 0, "OUT:%7.1f", td->testTime );
398 
399 
400  td->flagGetStatus=1;
401 }
402 
403 
404 //! Wait for complete data transfer
406 {
407  event.wait();
408 }
409 
410 //! Start data transfer
411 void TF_CheckTransferOut::StartWriteBuf( cl::Buffer *pBufDevice, cl_uint *pHost, cl::Event &event )
412 {
413  td->q0->enqueueWriteBuffer(
414  *(pBufDevice),
415  CL_FALSE,
416  0,
417  td->sizeBlock,
418  pHost,
419  NULL,
420  &event
421  );
422 
423  td->BlockWr++;
424 }
425 
426 //! Start kernel for buffer
427 void TF_CheckTransferOut::StartCalculateBuf( cl::Buffer *pBufDevice, cl::Event &event )
428 {
429 
430  cl::NDRange globalNDR(1,1,1);
431  cl::NDRange localNDR(1,1,1);
432  cl::NDRange offsetNDR=cl::NullRange;
433 
434 
435  td->krnl_calculate.setArg( 0, *(td->dpStatus) );
436  td->krnl_calculate.setArg( 1, td->sizeOfuint16 );
437 
438  td->q1->enqueueNDRangeKernel(
440  offsetNDR,
441  globalNDR,
442  localNDR,
443  NULL, //&events0,
444  &event //&eventCompletionExecuting0
445 
446  );
447 
448  td->krnl_read.setArg( 0, *pBufDevice );
449  td->krnl_read.setArg( 1, td->sizeOfuint16 );
450 
451  td->q1->enqueueNDRangeKernel(
452  td->krnl_read,
453  offsetNDR,
454  globalNDR,
455  localNDR,
456  NULL, // &events0,
457  NULL // &eventCompletionExecuting0
458 
459  );
460 }
461 
462 //! Wait for complete calculate
464 {
465  event.wait();
466 }
467 
468 
469 //! Main body of test
471 {
473 
474 
475  cl::Event eventCompletionTransfer0;
476  cl::Event eventCompletionTransfer1;
477  cl::Event eventCompletionExecuting0;
478  cl::Event eventCompletionExecuting1;
479  cl_int ret;
480 
481  std::vector<cl::Event> events0;
482  std::vector<cl::Event> events1;
483 
484  int flag_wait=0;
485  int flag_first_velocity=1;
486 
487 
488  td->lastTime = td->startTime = time(NULL);
489 
490  // set buffer 0 on host - long time operation
491  SetBuffer( td->pBufOut[0] );
492 
493  // start data transfer for buffer 0 on device - quick time
494  StartWriteBuf( td->pBuffer[0], td->pBufOut[0], eventCompletionTransfer0 );
495 
496  // set buffer 1 on host - long time operation
497  SetBuffer( td->pBufOut[1] );
498 
499  for( ; ; )
500  {
501  if( this->m_isTerminate )
502  break;
503 
504 
505  if( td->flagGetStatus )
506  {
507  GetStatus();
508  td->flagGetStatus=0;
509  }
510 
511  // Main body
512 
513  // wait complete transfer 0 - first wait
514  WaitForTransferBufComplete( eventCompletionTransfer0 );
515 
516  if( flag_wait )
517  // wait for buffer1 processing
518  WaitForCalculateComplete( eventCompletionExecuting1 );
519 
520  // start data transfer for buffer 1 on device - quick time
521  StartWriteBuf( td->pBuffer[1], td->pBufOut[1], eventCompletionTransfer1 );
522 
523 
524  // start check buffer 0 on device - quick time
525  StartCalculateBuf( td->pBuffer[0], eventCompletionExecuting0 );
526 
527  // set buffer 0 on host - long time operation
528  SetBuffer( td->pBufOut[0] );
529 
530  // wait for complete transfer 0 - second wait
531  WaitForTransferBufComplete( eventCompletionTransfer1 );
532 
533  // wait for buffer0 processing
534  WaitForCalculateComplete( eventCompletionExecuting0 );
535 
536  // start data transfer for buffer 0 on device - quick time
537  StartWriteBuf( td->pBuffer[0], td->pBufOut[0], eventCompletionTransfer0 );
538 
539 
540  // start check buffer 1 on device - quick time
541  StartCalculateBuf( td->pBuffer[1], eventCompletionExecuting1 );
542 
543  // set buffer 1 on host - long time operation
544  SetBuffer( td->pBufOut[1] );
545 
546  flag_wait=1;
547 
548 
549 
550  time_t currentTime = time(NULL);
551  time_t timeFromStart = currentTime - td->startTime;
552  time_t diff = currentTime - td->lastTime;
553  td->testTime = timeFromStart;
554 
555  if( diff >= 4 )
556  {
557  double velocity = (1.0L*(td->BlockWr-td->lastBlock)*td->sizeBlock)/diff;
558 
559  td->VelocityCurrent = velocity / (td->mbSize);
560 
561 
562  velocity = 1.0L*(td->BlockWr)*td->sizeBlock/timeFromStart;
563 
564  td->VelocityAverage = velocity / (td->mbSize);
565 
566  td->lastTime = currentTime;
567  td->lastBlock = td->BlockWr;
568 
569  if( 1==flag_first_velocity )
570  {
573  flag_first_velocity=0;
574  } else
575  {
578 
581 
582  }
583  }
584  }
585 
586  GetStatus();
587 
588 }
589 
590 //! set test data in buffer
591 void TF_CheckTransferOut::SetBuffer( cl_uint *ptr )
592 {
593  cl_ulong *dst = (cl_ulong*) ptr;
594  cl_uint count = td->sizeBlock / sizeof(cl_ulong) / 16;
595  cl_ulong val=td->dataOut;
596  for( cl_uint ii=0; ii<count; ii++ )
597  {
598  *dst++=val++;
599  *dst++=val++;
600  *dst++=val++;
601  *dst++=val++;
602  *dst++=val++;
603  *dst++=val++;
604  *dst++=val++;
605  *dst++=val++;
606  *dst++=val++;
607  *dst++=val++;
608  *dst++=val++;
609  *dst++=val++;
610  *dst++=val++;
611  *dst++=val++;
612  *dst++=val++;
613  *dst++=val++;
614  }
615  td->dataOut=val;
616 }
617 
618 //! check data in the buffer
620 {
621  cl_ulong *src = (cl_ulong*)ptr;
622  cl_uint count = td->sizeBlock / sizeof(cl_uint);
623  cl_ulong val=td->dataExpect;
624  cl_ulong di;
625  cl_ulong flag_error=0;
626  for( int ii=0; ii<count; ii++ )
627  {
628  di = *src++;
629 
630  if( di!=val )
631  {
632  flag_error++;
633  }
634  val++;
635  }
636  td->dataExpect=val;
637 
638  if( 0==flag_error )
639  td->BlockOk++;
640  else
641  td->BlockError++;
642 }
643 
644 //! Read status information from device
646 {
647  if( NULL==td->pStatus )
648  return;
649 
650 
651  cl_int ret=td->q0->enqueueReadBuffer(
652  *(td->dpStatus),
653  CL_TRUE,
654  0,
655  512,
656  td->pStatus,
657  NULL,
658  NULL
659  );
660 
661  td->Sig = td->pStatus[0];
662  td->BlockRd = td->pStatus[1];
663  td->BlockOk = td->pStatus[2];
664  td->BlockError = td->pStatus[3];
665 
666 }
cl_uint * pBufOut[2]
pointers to buffers in the host memory
cl_uint sizeBlock
Size of block [bytes].
cl::Program program
OpenCL program.
Definition: tf_device.h:28
float VelocityCurMin
minimum of VelocityCurrent
cl::CommandQueue * q0
Pointer to OpenCL command queue.
cl::Kernel krnl_read
OpenCL kernel for read data.
float VelocityAverage
average speed (from test start)
cl_uint BlockWr
Count of written blocks.
cl_uint * pStatus
pointer to status buffer in the host memory
except_info_t except_info(const char *fmt,...)
Definition: exceptinfo.cpp:25
TableEngine * m_pTemplateEngine
Definition: tf_test.h:21
virtual void PrepareInThread()
Prepare test.
virtual void CleanupInThread()
Free any resource.
virtual void StepTable()
Show table during test executing.
~TF_CheckTransferOut()
Destructor.
cl_uint Sig
Signature for status buffer.
cl_ulong8 arrayExpect
expect values for data block
cl_uint BlockRd
Count of read blocks.
void CheckBuffer(cl_uint *ptr)
check data in the buffer
float VelocityCurrent
current speed (on 4 secund interval)
cl::Buffer * pBuffer[2]
pointers to buffers in the device memory
cl_uint lastBlock
Number BlockWr for lastTick.
cl_int sizeOfuint16
Size of block in 512-bit words.
Base class for application with thread.
Definition: tf_testthread.h:23
virtual void Run()
Main body of test.
cl_ulong dataOut
current data for output
float VelocityCurMax
maximum of VelocityCurrent
cl_uint BlockError
Count of incorrect blocks.
clock_t lastTick
Number of last clock()
cl_ulong dataExpect
expect data from input
virtual int AddRowTable()=0
virtual void GetResultInThread()
Show results of test.
cl::Context context
OpenCL context.
Definition: tf_device.h:27
cl_ulong flagGetStatus
1 - request for get status information
cl_uint BlockOk
Count of correct blocks.
cl::Buffer * dpStatus
pointer to status buffer in the device memory
void GetStatus(void)
Read status information from device.
int GetFromCommnadLine(int argc, char **argv, const char *name, int defValue)
Get integer value from command line.
Definition: parse_cmd.cpp:29
cl::CommandQueue * q1
Pointer to OpenCL command queue.
time_t startTime
time of start main test cycle
virtual int SetValueTable(unsigned nRow, unsigned nColumn, const char *fmt,...)=0
common data for OpenCL device
Definition: tf_device.h:17
cl_uint metricMode
0 - binary: 1MB=2^10 bytes, 1 - decimal: 1MB=10^6 bytes
TF_Device * pDevice
OpenCL device and program.
TF_CheckTransferOut_task_data * td
< Internal data for TF_CheckTransferOut
cl_uint RowNumber
Number of first row in the table.
float testTime
Time from test start.
time_t lastTime
time of last interval
std::string deviceName
OpenCL device name.
void SetBuffer(cl_uint *ptr)
set test data in buffer buffer
cl::Device device
OpenCL device.
Definition: tf_device.h:26
cl::Kernel krnl_calculate
OpenCL kernel for calculate.
TF_CheckTransferOut(TableEngine *pTable, TF_Device *pDevice, int argc, char **argv)
Constructor.
void WaitForCalculateComplete(cl::Event &event)
Wait for complete calculate.
clock_t startTick
Number of start clock();.
cl_uint mbSize
bytes count in 1MB
< Internal data for TF_CheckTransferOut
void WaitForTransferBufComplete(cl::Event &event)
Wait for complete data transfer.
void StartWriteBuf(cl::Buffer *pDevice, cl_uint *pHost, cl::Event &event)
Start data transfer.
void StartCalculateBuf(cl::Buffer *pDevice, cl::Event &event)
Start kernel for buffer.