You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

508 lines
18 KiB

  1. /*
  2. Copyright 2005-2013 Intel Corporation. All Rights Reserved.
  3. This file is part of Threading Building Blocks.
  4. Threading Building Blocks is free software; you can redistribute it
  5. and/or modify it under the terms of the GNU General Public License
  6. version 2 as published by the Free Software Foundation.
  7. Threading Building Blocks is distributed in the hope that it will be
  8. useful, but WITHOUT ANY WARRANTY; without even the implied warranty
  9. of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with Threading Building Blocks; if not, write to the Free Software
  13. Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  14. As a special exception, you may use this file as part of a free software
  15. library without restriction. Specifically, if other files instantiate
  16. templates or use macros or inline functions from this file, or you compile
  17. this file and link it with other files to produce an executable, this
  18. file does not by itself cause the resulting executable to be covered by
  19. the GNU General Public License. This exception does not however
  20. invalidate any other reasons why the executable file might be covered by
  21. the GNU General Public License.
  22. */
  23. #ifndef __TBB_parallel_do_H
  24. #define __TBB_parallel_do_H
  25. #include "task.h"
  26. #include "aligned_space.h"
  27. #include <iterator>
  28. namespace tbb {
  29. //! @cond INTERNAL
  30. namespace internal {
  31. template<typename Body, typename Item> class parallel_do_feeder_impl;
  32. template<typename Body> class do_group_task;
  33. //! Strips its template type argument from 'cv' and '&' qualifiers
  34. template<typename T>
  35. struct strip { typedef T type; };
  36. template<typename T>
  37. struct strip<T&> { typedef T type; };
  38. template<typename T>
  39. struct strip<const T&> { typedef T type; };
  40. template<typename T>
  41. struct strip<volatile T&> { typedef T type; };
  42. template<typename T>
  43. struct strip<const volatile T&> { typedef T type; };
  44. // Most of the compilers remove cv-qualifiers from non-reference function argument types.
  45. // But unfortunately there are those that don't.
  46. template<typename T>
  47. struct strip<const T> { typedef T type; };
  48. template<typename T>
  49. struct strip<volatile T> { typedef T type; };
  50. template<typename T>
  51. struct strip<const volatile T> { typedef T type; };
  52. } // namespace internal
  53. //! @endcond
  54. //! Class the user supplied algorithm body uses to add new tasks
  55. /** \param Item Work item type **/
  56. template<typename Item>
  57. class parallel_do_feeder: internal::no_copy
  58. {
  59. parallel_do_feeder() {}
  60. virtual ~parallel_do_feeder () {}
  61. virtual void internal_add( const Item& item ) = 0;
  62. template<typename Body_, typename Item_> friend class internal::parallel_do_feeder_impl;
  63. public:
  64. //! Add a work item to a running parallel_do.
  65. void add( const Item& item ) {internal_add(item);}
  66. };
  67. //! @cond INTERNAL
  68. namespace internal {
  69. //! For internal use only.
  70. /** Selects one of the two possible forms of function call member operator.
  71. @ingroup algorithms **/
  72. template<class Body, typename Item>
  73. class parallel_do_operator_selector
  74. {
  75. typedef parallel_do_feeder<Item> Feeder;
  76. template<typename A1, typename A2, typename CvItem >
  77. static void internal_call( const Body& obj, A1& arg1, A2&, void (Body::*)(CvItem) const ) {
  78. obj(arg1);
  79. }
  80. template<typename A1, typename A2, typename CvItem >
  81. static void internal_call( const Body& obj, A1& arg1, A2& arg2, void (Body::*)(CvItem, parallel_do_feeder<Item>&) const ) {
  82. obj(arg1, arg2);
  83. }
  84. public:
  85. template<typename A1, typename A2 >
  86. static void call( const Body& obj, A1& arg1, A2& arg2 )
  87. {
  88. internal_call( obj, arg1, arg2, &Body::operator() );
  89. }
  90. };
  91. //! For internal use only.
  92. /** Executes one iteration of a do.
  93. @ingroup algorithms */
  94. template<typename Body, typename Item>
  95. class do_iteration_task: public task
  96. {
  97. typedef parallel_do_feeder_impl<Body, Item> feeder_type;
  98. Item my_value;
  99. feeder_type& my_feeder;
  100. do_iteration_task( const Item& value, feeder_type& feeder ) :
  101. my_value(value), my_feeder(feeder)
  102. {}
  103. /*override*/
  104. task* execute()
  105. {
  106. parallel_do_operator_selector<Body, Item>::call(*my_feeder.my_body, my_value, my_feeder);
  107. return NULL;
  108. }
  109. template<typename Body_, typename Item_> friend class parallel_do_feeder_impl;
  110. }; // class do_iteration_task
  111. template<typename Iterator, typename Body, typename Item>
  112. class do_iteration_task_iter: public task
  113. {
  114. typedef parallel_do_feeder_impl<Body, Item> feeder_type;
  115. Iterator my_iter;
  116. feeder_type& my_feeder;
  117. do_iteration_task_iter( const Iterator& iter, feeder_type& feeder ) :
  118. my_iter(iter), my_feeder(feeder)
  119. {}
  120. /*override*/
  121. task* execute()
  122. {
  123. parallel_do_operator_selector<Body, Item>::call(*my_feeder.my_body, *my_iter, my_feeder);
  124. return NULL;
  125. }
  126. template<typename Iterator_, typename Body_, typename Item_> friend class do_group_task_forward;
  127. template<typename Body_, typename Item_> friend class do_group_task_input;
  128. template<typename Iterator_, typename Body_, typename Item_> friend class do_task_iter;
  129. }; // class do_iteration_task_iter
  130. //! For internal use only.
  131. /** Implements new task adding procedure.
  132. @ingroup algorithms **/
  133. template<class Body, typename Item>
  134. class parallel_do_feeder_impl : public parallel_do_feeder<Item>
  135. {
  136. /*override*/
  137. void internal_add( const Item& item )
  138. {
  139. typedef do_iteration_task<Body, Item> iteration_type;
  140. iteration_type& t = *new (task::allocate_additional_child_of(*my_barrier)) iteration_type(item, *this);
  141. t.spawn( t );
  142. }
  143. public:
  144. const Body* my_body;
  145. empty_task* my_barrier;
  146. parallel_do_feeder_impl()
  147. {
  148. my_barrier = new( task::allocate_root() ) empty_task();
  149. __TBB_ASSERT(my_barrier, "root task allocation failed");
  150. }
  151. #if __TBB_TASK_GROUP_CONTEXT
  152. parallel_do_feeder_impl(tbb::task_group_context &context)
  153. {
  154. my_barrier = new( task::allocate_root(context) ) empty_task();
  155. __TBB_ASSERT(my_barrier, "root task allocation failed");
  156. }
  157. #endif
  158. ~parallel_do_feeder_impl()
  159. {
  160. my_barrier->destroy(*my_barrier);
  161. }
  162. }; // class parallel_do_feeder_impl
  163. //! For internal use only
  164. /** Unpacks a block of iterations.
  165. @ingroup algorithms */
  166. template<typename Iterator, typename Body, typename Item>
  167. class do_group_task_forward: public task
  168. {
  169. static const size_t max_arg_size = 4;
  170. typedef parallel_do_feeder_impl<Body, Item> feeder_type;
  171. feeder_type& my_feeder;
  172. Iterator my_first;
  173. size_t my_size;
  174. do_group_task_forward( Iterator first, size_t size, feeder_type& feeder )
  175. : my_feeder(feeder), my_first(first), my_size(size)
  176. {}
  177. /*override*/ task* execute()
  178. {
  179. typedef do_iteration_task_iter<Iterator, Body, Item> iteration_type;
  180. __TBB_ASSERT( my_size>0, NULL );
  181. task_list list;
  182. task* t;
  183. size_t k=0;
  184. for(;;) {
  185. t = new( allocate_child() ) iteration_type( my_first, my_feeder );
  186. ++my_first;
  187. if( ++k==my_size ) break;
  188. list.push_back(*t);
  189. }
  190. set_ref_count(int(k+1));
  191. spawn(list);
  192. spawn_and_wait_for_all(*t);
  193. return NULL;
  194. }
  195. template<typename Iterator_, typename Body_, typename _Item> friend class do_task_iter;
  196. }; // class do_group_task_forward
  197. template<typename Body, typename Item>
  198. class do_group_task_input: public task
  199. {
  200. static const size_t max_arg_size = 4;
  201. typedef parallel_do_feeder_impl<Body, Item> feeder_type;
  202. feeder_type& my_feeder;
  203. size_t my_size;
  204. aligned_space<Item, max_arg_size> my_arg;
  205. do_group_task_input( feeder_type& feeder )
  206. : my_feeder(feeder), my_size(0)
  207. {}
  208. /*override*/ task* execute()
  209. {
  210. typedef do_iteration_task_iter<Item*, Body, Item> iteration_type;
  211. __TBB_ASSERT( my_size>0, NULL );
  212. task_list list;
  213. task* t;
  214. size_t k=0;
  215. for(;;) {
  216. t = new( allocate_child() ) iteration_type( my_arg.begin() + k, my_feeder );
  217. if( ++k==my_size ) break;
  218. list.push_back(*t);
  219. }
  220. set_ref_count(int(k+1));
  221. spawn(list);
  222. spawn_and_wait_for_all(*t);
  223. return NULL;
  224. }
  225. ~do_group_task_input(){
  226. for( size_t k=0; k<my_size; ++k)
  227. (my_arg.begin() + k)->~Item();
  228. }
  229. template<typename Iterator_, typename Body_, typename Item_> friend class do_task_iter;
  230. }; // class do_group_task_input
  231. //! For internal use only.
  232. /** Gets block of iterations and packages them into a do_group_task.
  233. @ingroup algorithms */
  234. template<typename Iterator, typename Body, typename Item>
  235. class do_task_iter: public task
  236. {
  237. typedef parallel_do_feeder_impl<Body, Item> feeder_type;
  238. public:
  239. do_task_iter( Iterator first, Iterator last , feeder_type& feeder ) :
  240. my_first(first), my_last(last), my_feeder(feeder)
  241. {}
  242. private:
  243. Iterator my_first;
  244. Iterator my_last;
  245. feeder_type& my_feeder;
  246. /* Do not merge run(xxx) and run_xxx() methods. They are separated in order
  247. to make sure that compilers will eliminate unused argument of type xxx
  248. (that is will not put it on stack). The sole purpose of this argument
  249. is overload resolution.
  250. An alternative could be using template functions, but explicit specialization
  251. of member function templates is not supported for non specialized class
  252. templates. Besides template functions would always fall back to the least
  253. efficient variant (the one for input iterators) in case of iterators having
  254. custom tags derived from basic ones. */
  255. /*override*/ task* execute()
  256. {
  257. typedef typename std::iterator_traits<Iterator>::iterator_category iterator_tag;
  258. return run( (iterator_tag*)NULL );
  259. }
  260. /** This is the most restricted variant that operates on input iterators or
  261. iterators with unknown tags (tags not derived from the standard ones). **/
  262. inline task* run( void* ) { return run_for_input_iterator(); }
  263. task* run_for_input_iterator() {
  264. typedef do_group_task_input<Body, Item> block_type;
  265. block_type& t = *new( allocate_additional_child_of(*my_feeder.my_barrier) ) block_type(my_feeder);
  266. size_t k=0;
  267. while( !(my_first == my_last) ) {
  268. new (t.my_arg.begin() + k) Item(*my_first);
  269. ++my_first;
  270. if( ++k==block_type::max_arg_size ) {
  271. if ( !(my_first == my_last) )
  272. recycle_to_reexecute();
  273. break;
  274. }
  275. }
  276. if( k==0 ) {
  277. destroy(t);
  278. return NULL;
  279. } else {
  280. t.my_size = k;
  281. return &t;
  282. }
  283. }
  284. inline task* run( std::forward_iterator_tag* ) { return run_for_forward_iterator(); }
  285. task* run_for_forward_iterator() {
  286. typedef do_group_task_forward<Iterator, Body, Item> block_type;
  287. Iterator first = my_first;
  288. size_t k=0;
  289. while( !(my_first==my_last) ) {
  290. ++my_first;
  291. if( ++k==block_type::max_arg_size ) {
  292. if ( !(my_first==my_last) )
  293. recycle_to_reexecute();
  294. break;
  295. }
  296. }
  297. return k==0 ? NULL : new( allocate_additional_child_of(*my_feeder.my_barrier) ) block_type(first, k, my_feeder);
  298. }
  299. inline task* run( std::random_access_iterator_tag* ) { return run_for_random_access_iterator(); }
  300. task* run_for_random_access_iterator() {
  301. typedef do_group_task_forward<Iterator, Body, Item> block_type;
  302. typedef do_iteration_task_iter<Iterator, Body, Item> iteration_type;
  303. size_t k = static_cast<size_t>(my_last-my_first);
  304. if( k > block_type::max_arg_size ) {
  305. Iterator middle = my_first + k/2;
  306. empty_task& c = *new( allocate_continuation() ) empty_task;
  307. do_task_iter& b = *new( c.allocate_child() ) do_task_iter(middle, my_last, my_feeder);
  308. recycle_as_child_of(c);
  309. my_last = middle;
  310. c.set_ref_count(2);
  311. c.spawn(b);
  312. return this;
  313. }else if( k != 0 ) {
  314. task_list list;
  315. task* t;
  316. size_t k1=0;
  317. for(;;) {
  318. t = new( allocate_child() ) iteration_type(my_first, my_feeder);
  319. ++my_first;
  320. if( ++k1==k ) break;
  321. list.push_back(*t);
  322. }
  323. set_ref_count(int(k+1));
  324. spawn(list);
  325. spawn_and_wait_for_all(*t);
  326. }
  327. return NULL;
  328. }
  329. }; // class do_task_iter
  330. //! For internal use only.
  331. /** Implements parallel iteration over a range.
  332. @ingroup algorithms */
  333. template<typename Iterator, typename Body, typename Item>
  334. void run_parallel_do( Iterator first, Iterator last, const Body& body
  335. #if __TBB_TASK_GROUP_CONTEXT
  336. , task_group_context& context
  337. #endif
  338. )
  339. {
  340. typedef do_task_iter<Iterator, Body, Item> root_iteration_task;
  341. #if __TBB_TASK_GROUP_CONTEXT
  342. parallel_do_feeder_impl<Body, Item> feeder(context);
  343. #else
  344. parallel_do_feeder_impl<Body, Item> feeder;
  345. #endif
  346. feeder.my_body = &body;
  347. root_iteration_task &t = *new( feeder.my_barrier->allocate_child() ) root_iteration_task(first, last, feeder);
  348. feeder.my_barrier->set_ref_count(2);
  349. feeder.my_barrier->spawn_and_wait_for_all(t);
  350. }
  351. //! For internal use only.
  352. /** Detects types of Body's operator function arguments.
  353. @ingroup algorithms **/
  354. template<typename Iterator, typename Body, typename Item>
  355. void select_parallel_do( Iterator first, Iterator last, const Body& body, void (Body::*)(Item) const
  356. #if __TBB_TASK_GROUP_CONTEXT
  357. , task_group_context& context
  358. #endif // __TBB_TASK_GROUP_CONTEXT
  359. )
  360. {
  361. run_parallel_do<Iterator, Body, typename strip<Item>::type>( first, last, body
  362. #if __TBB_TASK_GROUP_CONTEXT
  363. , context
  364. #endif // __TBB_TASK_GROUP_CONTEXT
  365. );
  366. }
  367. //! For internal use only.
  368. /** Detects types of Body's operator function arguments.
  369. @ingroup algorithms **/
  370. template<typename Iterator, typename Body, typename Item, typename _Item>
  371. void select_parallel_do( Iterator first, Iterator last, const Body& body, void (Body::*)(Item, parallel_do_feeder<_Item>&) const
  372. #if __TBB_TASK_GROUP_CONTEXT
  373. , task_group_context& context
  374. #endif // __TBB_TASK_GROUP_CONTEXT
  375. )
  376. {
  377. run_parallel_do<Iterator, Body, typename strip<Item>::type>( first, last, body
  378. #if __TBB_TASK_GROUP_CONTEXT
  379. , context
  380. #endif // __TBB_TASK_GROUP_CONTEXT
  381. );
  382. }
  383. } // namespace internal
  384. //! @endcond
  385. /** \page parallel_do_body_req Requirements on parallel_do body
  386. Class \c Body implementing the concept of parallel_do body must define:
  387. - \code
  388. B::operator()(
  389. cv_item_type item,
  390. parallel_do_feeder<item_type>& feeder
  391. ) const
  392. OR
  393. B::operator()( cv_item_type& item ) const
  394. \endcode Process item.
  395. May be invoked concurrently for the same \c this but different \c item.
  396. - \code item_type( const item_type& ) \endcode
  397. Copy a work item.
  398. - \code ~item_type() \endcode Destroy a work item
  399. **/
  400. /** \name parallel_do
  401. See also requirements on \ref parallel_do_body_req "parallel_do Body". **/
  402. //@{
  403. //! Parallel iteration over a range, with optional addition of more work.
  404. /** @ingroup algorithms */
  405. template<typename Iterator, typename Body>
  406. void parallel_do( Iterator first, Iterator last, const Body& body )
  407. {
  408. if ( first == last )
  409. return;
  410. #if __TBB_TASK_GROUP_CONTEXT
  411. task_group_context context;
  412. #endif // __TBB_TASK_GROUP_CONTEXT
  413. internal::select_parallel_do( first, last, body, &Body::operator()
  414. #if __TBB_TASK_GROUP_CONTEXT
  415. , context
  416. #endif // __TBB_TASK_GROUP_CONTEXT
  417. );
  418. }
  419. #if __TBB_TASK_GROUP_CONTEXT
  420. //! Parallel iteration over a range, with optional addition of more work and user-supplied context
  421. /** @ingroup algorithms */
  422. template<typename Iterator, typename Body>
  423. void parallel_do( Iterator first, Iterator last, const Body& body, task_group_context& context )
  424. {
  425. if ( first == last )
  426. return;
  427. internal::select_parallel_do( first, last, body, &Body::operator(), context );
  428. }
  429. #endif // __TBB_TASK_GROUP_CONTEXT
  430. //@}
  431. } // namespace
  432. #endif /* __TBB_parallel_do_H */