You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							354 lines
						
					
					
						
							14 KiB
						
					
					
				
			
		
		
		
			
			
			
				
					
				
				
					
				
			
		
		
	
	
							354 lines
						
					
					
						
							14 KiB
						
					
					
				| /* | |
|     Copyright 2005-2013 Intel Corporation.  All Rights Reserved. | |
|  | |
|     This file is part of Threading Building Blocks. | |
|  | |
|     Threading Building Blocks is free software; you can redistribute it | |
|     and/or modify it under the terms of the GNU General Public License | |
|     version 2 as published by the Free Software Foundation. | |
|  | |
|     Threading Building Blocks is distributed in the hope that it will be | |
|     useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
|     of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | |
|     GNU General Public License for more details. | |
|  | |
|     You should have received a copy of the GNU General Public License | |
|     along with Threading Building Blocks; if not, write to the Free Software | |
|     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA | |
|  | |
|     As a special exception, you may use this file as part of a free software | |
|     library without restriction.  Specifically, if other files instantiate | |
|     templates or use macros or inline functions from this file, or you compile | |
|     this file and link it with other files to produce an executable, this | |
|     file does not by itself cause the resulting executable to be covered by | |
|     the GNU General Public License.  This exception does not however | |
|     invalidate any other reasons why the executable file might be covered by | |
|     the GNU General Public License. | |
| */ | |
| 
 | |
| #ifndef __TBB_parallel_scan_H | |
| #define __TBB_parallel_scan_H | |
|  | |
| #include "task.h" | |
| #include "aligned_space.h" | |
| #include <new> | |
| #include "partitioner.h" | |
|  | |
| namespace tbb { | |
| 
 | |
| //! Used to indicate that the initial scan is being performed. | |
| /** @ingroup algorithms */ | |
| struct pre_scan_tag { | |
|     static bool is_final_scan() {return false;} | |
| }; | |
| 
 | |
| //! Used to indicate that the final scan is being performed. | |
| /** @ingroup algorithms */ | |
| struct final_scan_tag { | |
|     static bool is_final_scan() {return true;} | |
| }; | |
| 
 | |
| //! @cond INTERNAL | |
| namespace internal { | |
| 
 | |
|     //! Performs final scan for a leaf  | |
|     /** @ingroup algorithms */ | |
|     template<typename Range, typename Body> | |
|     class final_sum: public task { | |
|     public: | |
|         Body my_body; | |
|     private: | |
|         aligned_space<Range,1> my_range; | |
|         //! Where to put result of last subrange, or NULL if not last subrange. | |
|         Body* my_stuff_last; | |
|     public: | |
|         final_sum( Body& body_ ) : | |
|             my_body(body_,split()) | |
|         { | |
|             poison_pointer(my_stuff_last); | |
|         } | |
|         ~final_sum() { | |
|             my_range.begin()->~Range(); | |
|         }      | |
|         void finish_construction( const Range& range_, Body* stuff_last_ ) { | |
|             new( my_range.begin() ) Range(range_); | |
|             my_stuff_last = stuff_last_; | |
|         } | |
|     private: | |
|         /*override*/ task* execute() { | |
|             my_body( *my_range.begin(), final_scan_tag() ); | |
|             if( my_stuff_last ) | |
|                 my_stuff_last->assign(my_body); | |
|             return NULL; | |
|         } | |
|     };        | |
| 
 | |
|     //! Split work to be done in the scan. | |
|     /** @ingroup algorithms */ | |
|     template<typename Range, typename Body> | |
|     class sum_node: public task { | |
|         typedef final_sum<Range,Body> final_sum_type; | |
|     public: | |
|         final_sum_type *my_incoming;  | |
|         final_sum_type *my_body; | |
|         Body *my_stuff_last; | |
|     private: | |
|         final_sum_type *my_left_sum; | |
|         sum_node *my_left; | |
|         sum_node *my_right;      | |
|         bool my_left_is_final; | |
|         Range my_range; | |
|         sum_node( const Range range_, bool left_is_final_ ) :  | |
|             my_left_sum(NULL),  | |
|             my_left(NULL),  | |
|             my_right(NULL),  | |
|             my_left_is_final(left_is_final_),  | |
|             my_range(range_) | |
|         { | |
|             // Poison fields that will be set by second pass. | |
|             poison_pointer(my_body); | |
|             poison_pointer(my_incoming); | |
|         } | |
|         task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) { | |
|             if( !n ) { | |
|                 f.recycle_as_child_of( *this ); | |
|                 f.finish_construction( range_, stuff_last_ ); | |
|                 return &f; | |
|             } else { | |
|                 n->my_body = &f; | |
|                 n->my_incoming = incoming_; | |
|                 n->my_stuff_last = stuff_last_; | |
|                 return n; | |
|             } | |
|         } | |
|         /*override*/ task* execute() { | |
|             if( my_body ) { | |
|                 if( my_incoming ) | |
|                     my_left_sum->my_body.reverse_join( my_incoming->my_body ); | |
|                 recycle_as_continuation(); | |
|                 sum_node& c = *this; | |
|                 task* b = c.create_child(Range(my_range,split()),*my_left_sum,my_right,my_left_sum,my_stuff_last); | |
|                 task* a = my_left_is_final ? NULL : c.create_child(my_range,*my_body,my_left,my_incoming,NULL); | |
|                 set_ref_count( (a!=NULL)+(b!=NULL) ); | |
|                 my_body = NULL;  | |
|                 if( a ) spawn(*b); | |
|                 else a = b; | |
|                 return a; | |
|             } else { | |
|                 return NULL; | |
|             } | |
|         } | |
|         template<typename Range_,typename Body_,typename Partitioner_> | |
|         friend class start_scan; | |
| 
 | |
|         template<typename Range_,typename Body_> | |
|         friend class finish_scan; | |
|     }; | |
| 
 | |
|     //! Combine partial results | |
|     /** @ingroup algorithms */ | |
|     template<typename Range, typename Body> | |
|     class finish_scan: public task { | |
|         typedef sum_node<Range,Body> sum_node_type; | |
|         typedef final_sum<Range,Body> final_sum_type; | |
|         final_sum_type** const my_sum; | |
|         sum_node_type*& my_return_slot; | |
|     public: | |
|         final_sum_type* my_right_zombie; | |
|         sum_node_type& my_result; | |
| 
 | |
|         /*override*/ task* execute() { | |
|             __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL ); | |
|             if( my_result.my_left ) | |
|                 my_result.my_left_is_final = false; | |
|             if( my_right_zombie && my_sum )  | |
|                 ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body); | |
|             __TBB_ASSERT( !my_return_slot, NULL ); | |
|             if( my_right_zombie || my_result.my_right ) { | |
|                 my_return_slot = &my_result; | |
|             } else { | |
|                 destroy( my_result ); | |
|             } | |
|             if( my_right_zombie && !my_sum && !my_result.my_right ) { | |
|                 destroy(*my_right_zombie); | |
|                 my_right_zombie = NULL; | |
|             } | |
|             return NULL; | |
|         } | |
| 
 | |
|         finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :  | |
|             my_sum(sum_), | |
|             my_return_slot(return_slot_),  | |
|             my_right_zombie(NULL), | |
|             my_result(result_) | |
|         { | |
|             __TBB_ASSERT( !my_return_slot, NULL ); | |
|         } | |
|     }; | |
| 
 | |
|     //! Initial task to split the work | |
|     /** @ingroup algorithms */ | |
|     template<typename Range, typename Body, typename Partitioner=simple_partitioner> | |
|     class start_scan: public task { | |
|         typedef sum_node<Range,Body> sum_node_type; | |
|         typedef final_sum<Range,Body> final_sum_type; | |
|         final_sum_type* my_body; | |
|         /** Non-null if caller is requesting total. */ | |
|         final_sum_type** my_sum;  | |
|         sum_node_type** my_return_slot; | |
|         /** Null if computing root. */ | |
|         sum_node_type* my_parent_sum; | |
|         bool my_is_final; | |
|         bool my_is_right_child; | |
|         Range my_range; | |
|         typename Partitioner::partition_type my_partition; | |
|         /*override*/ task* execute(); | |
|     public: | |
|         start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) : | |
|             my_body(parent_.my_body), | |
|             my_sum(parent_.my_sum), | |
|             my_return_slot(&return_slot_), | |
|             my_parent_sum(parent_sum_), | |
|             my_is_final(parent_.my_is_final), | |
|             my_is_right_child(false), | |
|             my_range(parent_.my_range,split()), | |
|             my_partition(parent_.my_partition,split()) | |
|         { | |
|             __TBB_ASSERT( !*my_return_slot, NULL ); | |
|         } | |
| 
 | |
|         start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) : | |
|             my_body(&body_), | |
|             my_sum(NULL), | |
|             my_return_slot(&return_slot_), | |
|             my_parent_sum(NULL), | |
|             my_is_final(true), | |
|             my_is_right_child(false), | |
|             my_range(range_), | |
|             my_partition(partitioner_) | |
|         { | |
|             __TBB_ASSERT( !*my_return_slot, NULL ); | |
|         } | |
| 
 | |
|         static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) { | |
|             if( !range_.empty() ) { | |
|                 typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type; | |
|                 internal::sum_node<Range,Body>* root = NULL; | |
|                 typedef internal::final_sum<Range,Body> final_sum_type; | |
|                 final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ ); | |
|                 start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type( | |
|                     /*my_return_slot=*/root, | |
|                     range_, | |
|                     *temp_body, | |
|                     partitioner_ ); | |
|                 task::spawn_root_and_wait( pass1 ); | |
|                 if( root ) { | |
|                     root->my_body = temp_body; | |
|                     root->my_incoming = NULL; | |
|                     root->my_stuff_last = &body_; | |
|                     task::spawn_root_and_wait( *root ); | |
|                 } else { | |
|                     body_.assign(temp_body->my_body); | |
|                     temp_body->finish_construction( range_, NULL ); | |
|                     temp_body->destroy(*temp_body); | |
|                 } | |
|             } | |
|         } | |
|     }; | |
| 
 | |
|     template<typename Range, typename Body, typename Partitioner> | |
|     task* start_scan<Range,Body,Partitioner>::execute() { | |
|         typedef internal::finish_scan<Range,Body> finish_pass1_type; | |
|         finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL; | |
|         // Inspecting p->result.left_sum would ordinarily be a race condition. | |
|         // But we inspect it only if we are not a stolen task, in which case we | |
|         // know that task assigning to p->result.left_sum has completed. | |
|         bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum); | |
|         if( treat_as_stolen ) { | |
|             // Invocation is for right child that has been really stolen or needs to be virtually stolen | |
|             p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body); | |
|             my_is_final = false; | |
|         } | |
|         task* next_task = NULL; | |
|         if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) { | |
|             if( my_is_final ) | |
|                 (my_body->my_body)( my_range, final_scan_tag() ); | |
|             else if( my_sum ) | |
|                 (my_body->my_body)( my_range, pre_scan_tag() ); | |
|             if( my_sum )  | |
|                 *my_sum = my_body; | |
|             __TBB_ASSERT( !*my_return_slot, NULL ); | |
|         } else { | |
|             sum_node_type* result; | |
|             if( my_parent_sum )  | |
|                 result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,/*my_left_is_final=*/my_is_final); | |
|             else | |
|                 result = new(task::allocate_root()) sum_node_type(my_range,/*my_left_is_final=*/my_is_final); | |
|             finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result); | |
|             // Split off right child | |
|             start_scan& b = *new( c.allocate_child() ) start_scan( /*my_return_slot=*/result->my_right, *this, result ); | |
|             b.my_is_right_child = true;     | |
|             // Left child is recycling of *this.  Must recycle this before spawning b,  | |
|             // otherwise b might complete and decrement c.ref_count() to zero, which | |
|             // would cause c.execute() to run prematurely. | |
|             recycle_as_child_of(c); | |
|             c.set_ref_count(2); | |
|             c.spawn(b); | |
|             my_sum = &result->my_left_sum; | |
|             my_return_slot = &result->my_left; | |
|             my_is_right_child = false; | |
|             next_task = this; | |
|             my_parent_sum = result;  | |
|             __TBB_ASSERT( !*my_return_slot, NULL ); | |
|         } | |
|         return next_task; | |
|     }  | |
| } // namespace internal | |
| //! @endcond | |
|  | |
| // Requirements on Range concept are documented in blocked_range.h | |
|  | |
| /** \page parallel_scan_body_req Requirements on parallel_scan body | |
|     Class \c Body implementing the concept of parallel_scan body must define: | |
|     - \code Body::Body( Body&, split ); \endcode    Splitting constructor. | |
|                                                     Split \c b so that \c this and \c b can accumulate separately | |
|     - \code Body::~Body(); \endcode                 Destructor | |
|     - \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode | |
|                                                     Preprocess iterations for range \c r | |
|     - \code void Body::operator()( const Range& r, final_scan_tag ); \endcode  | |
|                                                     Do final processing for iterations of range \c r | |
|     - \code void Body::reverse_join( Body& a ); \endcode | |
|                                                     Merge preprocessing state of \c a into \c this, where \c a was  | |
|                                                     created earlier from \c b by b's splitting constructor | |
| **/ | |
| 
 | |
| /** \name parallel_scan | |
|     See also requirements on \ref range_req "Range" and \ref parallel_scan_body_req "parallel_scan Body". **/ | |
| //@{ | |
|  | |
| //! Parallel prefix with default partitioner | |
| /** @ingroup algorithms **/ | |
| template<typename Range, typename Body> | |
| void parallel_scan( const Range& range, Body& body ) { | |
|     internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER()); | |
| } | |
| 
 | |
| //! Parallel prefix with simple_partitioner | |
| /** @ingroup algorithms **/ | |
| template<typename Range, typename Body> | |
| void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) { | |
|     internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner); | |
| } | |
| 
 | |
| //! Parallel prefix with auto_partitioner | |
| /** @ingroup algorithms **/ | |
| template<typename Range, typename Body> | |
| void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) { | |
|     internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner); | |
| } | |
| //@} | |
|  | |
| } // namespace tbb | |
|  | |
| #endif /* __TBB_parallel_scan_H */ | |
| 
 |