Browse Source
			
			
			Merge
			
				
		Merge
	
		
	
			
				Former-commit-id:mainaa69376aa1[formerly8e92e0569d] Former-commit-id:785529f043
				 10 changed files with 250 additions and 2723 deletions
			
			
		- 
					177examples/dtmc/crowds/crowds.pm
 - 
					72examples/pmdp/coin8/coin8.pm
 - 
					2resources/3rdparty/include_cpptemplate.cmake
 - 
					12resources/3rdparty/utf8_v2_3_4/doc/ReleaseNotes
 - 
					1789resources/3rdparty/utf8_v2_3_4/doc/utf8cpp.html
 - 
					34resources/3rdparty/utf8_v2_3_4/source/utf8.h
 - 
					327resources/3rdparty/utf8_v2_3_4/source/utf8/checked.h
 - 
					329resources/3rdparty/utf8_v2_3_4/source/utf8/core.h
 - 
					228resources/3rdparty/utf8_v2_3_4/source/utf8/unchecked.h
 - 
					3src/CMakeLists.txt
 
@ -0,0 +1,177 @@ | 
				
			|||
// CROWDS [Reiter,Rubin] | 
				
			|||
// Vitaly Shmatikov, 2002 | 
				
			|||
 | 
				
			|||
// Note: | 
				
			|||
// Change everything marked CWDSIZ when changing the size of the crowd | 
				
			|||
// Change everything marked CWDMAX when increasing max size of the crowd | 
				
			|||
 | 
				
			|||
dtmc | 
				
			|||
 | 
				
			|||
// Probability of forwarding | 
				
			|||
const double PF = 0.8; | 
				
			|||
 | 
				
			|||
// Probability that a crowd member is bad | 
				
			|||
const double  badC = 0.091; | 
				
			|||
// const double  badC = 0.167; | 
				
			|||
 | 
				
			|||
const int TotalRuns; // Total number of protocol runs to analyze | 
				
			|||
const int CrowdSize; // CWDSIZ: actual number of good crowd members | 
				
			|||
const int MaxGood=20; // CWDMAX: maximum number of good crowd members | 
				
			|||
 | 
				
			|||
// Process definitions | 
				
			|||
module crowds | 
				
			|||
 | 
				
			|||
	// Auxiliary variables | 
				
			|||
	launch:   bool init true;       // Start modeling? | 
				
			|||
	new:      bool init false;      // Initialize a new protocol instance? | 
				
			|||
	runCount: [0..TotalRuns] init TotalRuns;   // Counts protocol instances | 
				
			|||
	start:    bool init false;      // Start the protocol? | 
				
			|||
	run:      bool init false;      // Run the protocol? | 
				
			|||
	lastSeen: [0..MaxGood] init MaxGood;   // Last crowd member to touch msg | 
				
			|||
	good:     bool init false;      // Crowd member is good? | 
				
			|||
	bad:      bool init false;      //              ... bad? | 
				
			|||
	recordLast: bool init false;    // Record last seen crowd member? | 
				
			|||
	badObserve: bool init false;    // Bad members observes who sent msg? | 
				
			|||
	deliver:  bool init false;      // Deliver message to destination? | 
				
			|||
	done:     bool init false;      // Protocol instance finished? | 
				
			|||
 | 
				
			|||
	// Counters for attackers' observations | 
				
			|||
	// CWDMAX: 1 counter per each good crowd member | 
				
			|||
	observe0:  [0..TotalRuns] init 0; | 
				
			|||
	observe1:  [0..TotalRuns] init 0; | 
				
			|||
	observe2:  [0..TotalRuns] init 0; | 
				
			|||
	observe3:  [0..TotalRuns] init 0; | 
				
			|||
	observe4:  [0..TotalRuns] init 0; | 
				
			|||
	observe5:  [0..TotalRuns] init 0; | 
				
			|||
	observe6:  [0..TotalRuns] init 0; | 
				
			|||
	observe7:  [0..TotalRuns] init 0; | 
				
			|||
	observe8:  [0..TotalRuns] init 0; | 
				
			|||
	observe9:  [0..TotalRuns] init 0; | 
				
			|||
	observe10: [0..TotalRuns] init 0; | 
				
			|||
	observe11: [0..TotalRuns] init 0; | 
				
			|||
	observe12: [0..TotalRuns] init 0; | 
				
			|||
	observe13: [0..TotalRuns] init 0; | 
				
			|||
	observe14: [0..TotalRuns] init 0; | 
				
			|||
	observe15: [0..TotalRuns] init 0; | 
				
			|||
	observe16: [0..TotalRuns] init 0; | 
				
			|||
	observe17: [0..TotalRuns] init 0; | 
				
			|||
	observe18: [0..TotalRuns] init 0; | 
				
			|||
	observe19: [0..TotalRuns] init 0; | 
				
			|||
	 | 
				
			|||
	[] launch -> (new'=true) & (runCount'=TotalRuns) & (launch'=false); | 
				
			|||
	// Set up a new protocol instance | 
				
			|||
	[] new & runCount>0 -> (runCount'=runCount-1) & (new'=false) & (start'=true); | 
				
			|||
	 | 
				
			|||
	// SENDER | 
				
			|||
	// Start the protocol | 
				
			|||
	[] start -> (lastSeen'=0) & (run'=true) & (deliver'=false) & (start'=false); | 
				
			|||
	 | 
				
			|||
	// CROWD MEMBERS | 
				
			|||
	// Good or bad crowd member? | 
				
			|||
	[] !good & !bad & !deliver & run -> | 
				
			|||
	             1-badC : (good'=true) & (recordLast'=true) & (run'=false) + | 
				
			|||
	               badC : (bad'=true)  & (badObserve'=true) & (run'=false); | 
				
			|||
 | 
				
			|||
	// GOOD MEMBERS | 
				
			|||
	// Forward with probability PF, else deliver | 
				
			|||
	[] good & !deliver & run -> PF : (good'=false) + 1-PF : (deliver'=true); | 
				
			|||
	// Record the last crowd member who touched the msg; | 
				
			|||
	// all good members may appear with equal probability | 
				
			|||
	//    Note: This is backward.  In the real protocol, each honest | 
				
			|||
	//          forwarder randomly chooses the next forwarder. | 
				
			|||
	//          Here, the identity of an honest forwarder is randomly | 
				
			|||
	//          chosen *after* it has forwarded the message. | 
				
			|||
	[] recordLast & CrowdSize=2 -> | 
				
			|||
	        1/2 : (lastSeen'=0) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/2 : (lastSeen'=1) & (recordLast'=false) & (run'=true); | 
				
			|||
	[] recordLast & CrowdSize=4 -> | 
				
			|||
	        1/4 : (lastSeen'=0) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/4 : (lastSeen'=1) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/4 : (lastSeen'=2) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/4 : (lastSeen'=3) & (recordLast'=false) & (run'=true); | 
				
			|||
	[] recordLast & CrowdSize=5 -> | 
				
			|||
	        1/5 : (lastSeen'=0) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/5 : (lastSeen'=1) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/5 : (lastSeen'=2) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/5 : (lastSeen'=3) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/5 : (lastSeen'=4) & (recordLast'=false) & (run'=true); | 
				
			|||
	[] recordLast & CrowdSize=10 -> | 
				
			|||
	        1/10 : (lastSeen'=0) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/10 : (lastSeen'=1) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/10 : (lastSeen'=2) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/10 : (lastSeen'=3) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/10 : (lastSeen'=4) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/10 : (lastSeen'=5) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/10 : (lastSeen'=6) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/10 : (lastSeen'=7) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/10 : (lastSeen'=8) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/10 : (lastSeen'=9) & (recordLast'=false) & (run'=true); | 
				
			|||
	[] recordLast & CrowdSize=15 -> | 
				
			|||
	        1/15 : (lastSeen'=0)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/15 : (lastSeen'=1)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/15 : (lastSeen'=2)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/15 : (lastSeen'=3)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/15 : (lastSeen'=4)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/15 : (lastSeen'=5)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/15 : (lastSeen'=6)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/15 : (lastSeen'=7)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/15 : (lastSeen'=8)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/15 : (lastSeen'=9)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/15 : (lastSeen'=10) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/15 : (lastSeen'=11) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/15 : (lastSeen'=12) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/15 : (lastSeen'=13) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/15 : (lastSeen'=14) & (recordLast'=false) & (run'=true); | 
				
			|||
	[] recordLast & CrowdSize=20 -> | 
				
			|||
	        1/20 : (lastSeen'=0)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=1)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=2)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=3)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=4)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=5)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=6)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=7)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=8)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=9)  & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=10) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=11) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=12) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=13) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=14) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=15) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=16) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=17) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=18) & (recordLast'=false) & (run'=true) + | 
				
			|||
	        1/20 : (lastSeen'=19) & (recordLast'=false) & (run'=true); | 
				
			|||
	 | 
				
			|||
	// BAD MEMBERS | 
				
			|||
	// Remember from whom the message was received and deliver | 
				
			|||
	// CWDMAX: 1 rule per each good crowd member | 
				
			|||
	[] lastSeen=0  & badObserve & observe0 <TotalRuns -> (observe0' =observe0 +1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=1  & badObserve & observe1 <TotalRuns -> (observe1' =observe1 +1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=2  & badObserve & observe2 <TotalRuns -> (observe2' =observe2 +1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=3  & badObserve & observe3 <TotalRuns -> (observe3' =observe3 +1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=4  & badObserve & observe4 <TotalRuns -> (observe4' =observe4 +1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=5  & badObserve & observe5 <TotalRuns -> (observe5' =observe5 +1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=6  & badObserve & observe6 <TotalRuns -> (observe6' =observe6 +1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=7  & badObserve & observe7 <TotalRuns -> (observe7' =observe7 +1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=8  & badObserve & observe8 <TotalRuns -> (observe8' =observe8 +1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=9  & badObserve & observe9 <TotalRuns -> (observe9' =observe9 +1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=10 & badObserve & observe10<TotalRuns -> (observe10'=observe10+1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=11 & badObserve & observe11<TotalRuns -> (observe11'=observe11+1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=12 & badObserve & observe12<TotalRuns -> (observe12'=observe12+1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=13 & badObserve & observe13<TotalRuns -> (observe13'=observe13+1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=14 & badObserve & observe14<TotalRuns -> (observe14'=observe14+1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=15 & badObserve & observe15<TotalRuns -> (observe15'=observe15+1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=16 & badObserve & observe16<TotalRuns -> (observe16'=observe16+1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=17 & badObserve & observe17<TotalRuns -> (observe17'=observe17+1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=18 & badObserve & observe18<TotalRuns -> (observe18'=observe18+1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
	[] lastSeen=19 & badObserve & observe19<TotalRuns -> (observe19'=observe19+1) & (deliver'=true) & (run'=true) & (badObserve'=false); | 
				
			|||
 | 
				
			|||
	// RECIPIENT | 
				
			|||
	// Delivery to destination | 
				
			|||
	[] deliver & run -> (done'=true) & (deliver'=false) & (run'=false) & (good'=false) & (bad'=false); | 
				
			|||
	// Start a new instance | 
				
			|||
	[] done -> (new'=true) & (done'=false) & (run'=false) & (lastSeen'=MaxGood); | 
				
			|||
	 | 
				
			|||
endmodule | 
				
			|||
@ -0,0 +1,72 @@ | 
				
			|||
//Randomised Consensus Protocol | 
				
			|||
 | 
				
			|||
mdp | 
				
			|||
const double p1; // in [0.2 , 0.8] | 
				
			|||
const double p2; // in [0.2 , 0.8] | 
				
			|||
const double p3; // in [0.2 , 0.8] | 
				
			|||
const double p4; // in [0.2 , 0.8] | 
				
			|||
const double p5;  | 
				
			|||
const double p6; | 
				
			|||
const double p7; | 
				
			|||
const double p8; | 
				
			|||
 | 
				
			|||
 | 
				
			|||
const int N=8; | 
				
			|||
const int K; | 
				
			|||
const int range = 2*(K+1)*N; | 
				
			|||
const int counter_init = (K+1)*N; | 
				
			|||
const int left = N; | 
				
			|||
const int right = 2*(K+1)*N - N; | 
				
			|||
 | 
				
			|||
// shared coin | 
				
			|||
global counter : [0..range] init counter_init; | 
				
			|||
 | 
				
			|||
module process1 | 
				
			|||
	 | 
				
			|||
	// program counter | 
				
			|||
	pc1 : [0..3]; | 
				
			|||
	// 0 - flip | 
				
			|||
	// 1 - write  | 
				
			|||
	// 2 - check | 
				
			|||
	// 3 - finished | 
				
			|||
	 | 
				
			|||
	// local coin | 
				
			|||
	coin1 : [0..1];	 | 
				
			|||
 | 
				
			|||
	// flip coin | 
				
			|||
	[] (pc1=0)  -> p1 : (coin1'=0) & (pc1'=1) + 1 - p1 : (coin1'=1) & (pc1'=1); | 
				
			|||
	// write tails -1  (reset coin to add regularity) | 
				
			|||
	[] (pc1=1) & (coin1=0) & (counter>0) -> (counter'=counter-1) & (pc1'=2) & (coin1'=0); | 
				
			|||
	// write heads +1 (reset coin to add regularity) | 
				
			|||
	[] (pc1=1) & (coin1=1) & (counter<range) -> (counter'=counter+1) & (pc1'=2) & (coin1'=0); | 
				
			|||
	// check | 
				
			|||
	// decide tails | 
				
			|||
	[] (pc1=2) & (counter<=left) -> (pc1'=3) & (coin1'=0); | 
				
			|||
	// decide heads | 
				
			|||
	[] (pc1=2) & (counter>=right) -> (pc1'=3) & (coin1'=1); | 
				
			|||
	// flip again | 
				
			|||
	[] (pc1=2) & (counter>left) & (counter<right) -> (pc1'=0); | 
				
			|||
	// loop (all loop together when done) | 
				
			|||
	[done] (pc1=3) -> (pc1'=3); | 
				
			|||
 | 
				
			|||
endmodule | 
				
			|||
 | 
				
			|||
module process2 = process1[pc1=pc2,coin1=coin2,p1=p2] endmodule | 
				
			|||
module process3 = process1[pc1=pc3,coin1=coin3,p1=p3] endmodule | 
				
			|||
module process4 = process1[pc1=pc4,coin1=coin4,p1=p4] endmodule | 
				
			|||
module process5 = process1[pc1=pc5,coin1=coin5,p1=p5] endmodule | 
				
			|||
module process6 = process1[pc1=pc6,coin1=coin6,p1=p6] endmodule | 
				
			|||
module process7 = process1[pc1=pc7,coin1=coin7,p1=p7] endmodule | 
				
			|||
module process8 = process1[pc1=pc8,coin1=coin8,p1=p8] endmodule | 
				
			|||
 | 
				
			|||
label "finished" = pc1=3 &pc2=3 &pc3=3 &pc4=3 & pc5=3 & pc6=3 & pc7=3 & pc8=3; | 
				
			|||
label "all_coins_equal_1" = coin1 = 1 & coin2 = 1 & coin3 = 1 & coin4 = 1 & coin5 = 1 & coin6 = 1 & coin7 = 1 & coin8 = 1; | 
				
			|||
label "all_coins_equal_0" = coin1 = 0 & coin2 = 0 & coin3 = 0 & coin4 = 0 & coin5 = 0 & coin6 = 0 & coin7 = 0 & coin8 = 0; | 
				
			|||
label "agree" = coin1=coin2 & coin2=coin3 & coin3 = coin4 & coin4 = coin5 & coin5 = coin6 & coin6 = coin7 & coin7 = coin8; | 
				
			|||
 | 
				
			|||
rewards "steps" | 
				
			|||
	true : 1; | 
				
			|||
endrewards | 
				
			|||
 | 
				
			|||
 | 
				
			|||
 | 
				
			|||
@ -1,12 +0,0 @@ | 
				
			|||
utf8 cpp library | 
				
			|||
Release 2.3.4 | 
				
			|||
 | 
				
			|||
A minor bug fix release. Thanks to all who reported bugs.  | 
				
			|||
 | 
				
			|||
Note: Version 2.3.3 contained a regression, and therefore was removed. | 
				
			|||
 | 
				
			|||
Changes from version 2.3.2 | 
				
			|||
- Bug fix [39]: checked.h Line 273 and unchecked.h Line 182 have an extra ';' | 
				
			|||
- Bug fix [36]: replace_invalid() only works with back_inserter | 
				
			|||
 | 
				
			|||
Files included in the release: utf8.h, core.h, checked.h, unchecked.h, utf8cpp.html, ReleaseNotes | 
				
			|||
						
							
						
						
							1789
	
						
						resources/3rdparty/utf8_v2_3_4/doc/utf8cpp.html
						
							File diff suppressed because it is too large
							
							
								
									View File
								
							
						
					
				File diff suppressed because it is too large
							
							
								
									View File
								
							
						@ -1,34 +0,0 @@ | 
				
			|||
// Copyright 2006 Nemanja Trifunovic | 
				
			|||
 | 
				
			|||
/* | 
				
			|||
Permission is hereby granted, free of charge, to any person or organization | 
				
			|||
obtaining a copy of the software and accompanying documentation covered by | 
				
			|||
this license (the "Software") to use, reproduce, display, distribute, | 
				
			|||
execute, and transmit the Software, and to prepare derivative works of the | 
				
			|||
Software, and to permit third-parties to whom the Software is furnished to | 
				
			|||
do so, all subject to the following: | 
				
			|||
 | 
				
			|||
The copyright notices in the Software and this entire statement, including | 
				
			|||
the above license grant, this restriction and the following disclaimer, | 
				
			|||
must be included in all copies of the Software, in whole or in part, and | 
				
			|||
all derivative works of the Software, unless such copies or derivative | 
				
			|||
works are solely in the form of machine-executable object code generated by | 
				
			|||
a source language processor. | 
				
			|||
 | 
				
			|||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
				
			|||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
				
			|||
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT | 
				
			|||
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE | 
				
			|||
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, | 
				
			|||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | 
				
			|||
DEALINGS IN THE SOFTWARE. | 
				
			|||
*/ | 
				
			|||
 | 
				
			|||
 | 
				
			|||
#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 | 
				
			|||
#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 | 
				
			|||
 | 
				
			|||
#include "utf8/checked.h" | 
				
			|||
#include "utf8/unchecked.h" | 
				
			|||
 | 
				
			|||
#endif // header guard | 
				
			|||
@ -1,327 +0,0 @@ | 
				
			|||
// Copyright 2006 Nemanja Trifunovic | 
				
			|||
 | 
				
			|||
/* | 
				
			|||
Permission is hereby granted, free of charge, to any person or organization | 
				
			|||
obtaining a copy of the software and accompanying documentation covered by | 
				
			|||
this license (the "Software") to use, reproduce, display, distribute, | 
				
			|||
execute, and transmit the Software, and to prepare derivative works of the | 
				
			|||
Software, and to permit third-parties to whom the Software is furnished to | 
				
			|||
do so, all subject to the following: | 
				
			|||
 | 
				
			|||
The copyright notices in the Software and this entire statement, including | 
				
			|||
the above license grant, this restriction and the following disclaimer, | 
				
			|||
must be included in all copies of the Software, in whole or in part, and | 
				
			|||
all derivative works of the Software, unless such copies or derivative | 
				
			|||
works are solely in the form of machine-executable object code generated by | 
				
			|||
a source language processor. | 
				
			|||
 | 
				
			|||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
				
			|||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
				
			|||
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT | 
				
			|||
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE | 
				
			|||
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, | 
				
			|||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | 
				
			|||
DEALINGS IN THE SOFTWARE. | 
				
			|||
*/ | 
				
			|||
 | 
				
			|||
 | 
				
			|||
#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 | 
				
			|||
#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 | 
				
			|||
 | 
				
			|||
#include "core.h" | 
				
			|||
#include <stdexcept> | 
				
			|||
 | 
				
			|||
namespace utf8 | 
				
			|||
{ | 
				
			|||
    // Base for the exceptions that may be thrown from the library | 
				
			|||
    class exception : public ::std::exception { | 
				
			|||
    }; | 
				
			|||
 | 
				
			|||
    // Exceptions that may be thrown from the library functions. | 
				
			|||
    class invalid_code_point : public exception { | 
				
			|||
        uint32_t cp; | 
				
			|||
    public: | 
				
			|||
        invalid_code_point(uint32_t cp) : cp(cp) {} | 
				
			|||
        virtual const char* what() const throw() { return "Invalid code point"; } | 
				
			|||
        uint32_t code_point() const {return cp;} | 
				
			|||
    }; | 
				
			|||
 | 
				
			|||
    class invalid_utf8 : public exception { | 
				
			|||
        uint8_t u8; | 
				
			|||
    public: | 
				
			|||
        invalid_utf8 (uint8_t u) : u8(u) {} | 
				
			|||
        virtual const char* what() const throw() { return "Invalid UTF-8"; } | 
				
			|||
        uint8_t utf8_octet() const {return u8;} | 
				
			|||
    }; | 
				
			|||
 | 
				
			|||
    class invalid_utf16 : public exception { | 
				
			|||
        uint16_t u16; | 
				
			|||
    public: | 
				
			|||
        invalid_utf16 (uint16_t u) : u16(u) {} | 
				
			|||
        virtual const char* what() const throw() { return "Invalid UTF-16"; } | 
				
			|||
        uint16_t utf16_word() const {return u16;} | 
				
			|||
    }; | 
				
			|||
 | 
				
			|||
    class not_enough_room : public exception { | 
				
			|||
    public: | 
				
			|||
        virtual const char* what() const throw() { return "Not enough space"; } | 
				
			|||
    }; | 
				
			|||
 | 
				
			|||
    /// The library API - functions intended to be called by the users | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    octet_iterator append(uint32_t cp, octet_iterator result) | 
				
			|||
    { | 
				
			|||
        if (!utf8::internal::is_code_point_valid(cp)) | 
				
			|||
            throw invalid_code_point(cp); | 
				
			|||
 | 
				
			|||
        if (cp < 0x80)                        // one octet | 
				
			|||
            *(result++) = static_cast<uint8_t>(cp); | 
				
			|||
        else if (cp < 0x800) {                // two octets | 
				
			|||
            *(result++) = static_cast<uint8_t>((cp >> 6)            | 0xc0); | 
				
			|||
            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80); | 
				
			|||
        } | 
				
			|||
        else if (cp < 0x10000) {              // three octets | 
				
			|||
            *(result++) = static_cast<uint8_t>((cp >> 12)           | 0xe0); | 
				
			|||
            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80); | 
				
			|||
            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80); | 
				
			|||
        } | 
				
			|||
        else {                                // four octets | 
				
			|||
            *(result++) = static_cast<uint8_t>((cp >> 18)           | 0xf0); | 
				
			|||
            *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)  | 0x80); | 
				
			|||
            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80); | 
				
			|||
            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80); | 
				
			|||
        } | 
				
			|||
        return result; | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator, typename output_iterator> | 
				
			|||
    output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement) | 
				
			|||
    { | 
				
			|||
        while (start != end) { | 
				
			|||
            octet_iterator sequence_start = start; | 
				
			|||
            internal::utf_error err_code = utf8::internal::validate_next(start, end); | 
				
			|||
            switch (err_code) { | 
				
			|||
                case internal::UTF8_OK : | 
				
			|||
                    for (octet_iterator it = sequence_start; it != start; ++it) | 
				
			|||
                        *out++ = *it; | 
				
			|||
                    break; | 
				
			|||
                case internal::NOT_ENOUGH_ROOM: | 
				
			|||
                    throw not_enough_room(); | 
				
			|||
                case internal::INVALID_LEAD: | 
				
			|||
                    out = utf8::append (replacement, out); | 
				
			|||
                    ++start; | 
				
			|||
                    break; | 
				
			|||
                case internal::INCOMPLETE_SEQUENCE: | 
				
			|||
                case internal::OVERLONG_SEQUENCE: | 
				
			|||
                case internal::INVALID_CODE_POINT: | 
				
			|||
                    out = utf8::append (replacement, out); | 
				
			|||
                    ++start; | 
				
			|||
                    // just one replacement mark for the sequence | 
				
			|||
                    while (start != end && utf8::internal::is_trail(*start)) | 
				
			|||
                        ++start; | 
				
			|||
                    break; | 
				
			|||
            } | 
				
			|||
        } | 
				
			|||
        return out; | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator, typename output_iterator> | 
				
			|||
    inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out) | 
				
			|||
    { | 
				
			|||
        static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd); | 
				
			|||
        return utf8::replace_invalid(start, end, out, replacement_marker); | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    uint32_t next(octet_iterator& it, octet_iterator end) | 
				
			|||
    { | 
				
			|||
        uint32_t cp = 0; | 
				
			|||
        internal::utf_error err_code = utf8::internal::validate_next(it, end, cp); | 
				
			|||
        switch (err_code) { | 
				
			|||
            case internal::UTF8_OK : | 
				
			|||
                break; | 
				
			|||
            case internal::NOT_ENOUGH_ROOM : | 
				
			|||
                throw not_enough_room(); | 
				
			|||
            case internal::INVALID_LEAD : | 
				
			|||
            case internal::INCOMPLETE_SEQUENCE : | 
				
			|||
            case internal::OVERLONG_SEQUENCE : | 
				
			|||
                throw invalid_utf8(*it); | 
				
			|||
            case internal::INVALID_CODE_POINT : | 
				
			|||
                throw invalid_code_point(cp); | 
				
			|||
        } | 
				
			|||
        return cp; | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    uint32_t peek_next(octet_iterator it, octet_iterator end) | 
				
			|||
    { | 
				
			|||
        return utf8::next(it, end); | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    uint32_t prior(octet_iterator& it, octet_iterator start) | 
				
			|||
    { | 
				
			|||
        // can't do much if it == start | 
				
			|||
        if (it == start) | 
				
			|||
            throw not_enough_room(); | 
				
			|||
 | 
				
			|||
        octet_iterator end = it; | 
				
			|||
        // Go back until we hit either a lead octet or start | 
				
			|||
        while (utf8::internal::is_trail(*(--it))) | 
				
			|||
            if (it == start) | 
				
			|||
                throw invalid_utf8(*it); // error - no lead byte in the sequence | 
				
			|||
        return utf8::peek_next(it, end); | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    /// Deprecated in versions that include "prior" | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    uint32_t previous(octet_iterator& it, octet_iterator pass_start) | 
				
			|||
    { | 
				
			|||
        octet_iterator end = it; | 
				
			|||
        while (utf8::internal::is_trail(*(--it))) | 
				
			|||
            if (it == pass_start) | 
				
			|||
                throw invalid_utf8(*it); // error - no lead byte in the sequence | 
				
			|||
        octet_iterator temp = it; | 
				
			|||
        return utf8::next(temp, end); | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator, typename distance_type> | 
				
			|||
    void advance (octet_iterator& it, distance_type n, octet_iterator end) | 
				
			|||
    { | 
				
			|||
        for (distance_type i = 0; i < n; ++i) | 
				
			|||
            utf8::next(it, end); | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    typename std::iterator_traits<octet_iterator>::difference_type | 
				
			|||
    distance (octet_iterator first, octet_iterator last) | 
				
			|||
    { | 
				
			|||
        typename std::iterator_traits<octet_iterator>::difference_type dist; | 
				
			|||
        for (dist = 0; first < last; ++dist) | 
				
			|||
            utf8::next(first, last); | 
				
			|||
        return dist; | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename u16bit_iterator, typename octet_iterator> | 
				
			|||
    octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) | 
				
			|||
    { | 
				
			|||
        while (start != end) { | 
				
			|||
            uint32_t cp = utf8::internal::mask16(*start++); | 
				
			|||
            // Take care of surrogate pairs first | 
				
			|||
            if (utf8::internal::is_lead_surrogate(cp)) { | 
				
			|||
                if (start != end) { | 
				
			|||
                    uint32_t trail_surrogate = utf8::internal::mask16(*start++); | 
				
			|||
                    if (utf8::internal::is_trail_surrogate(trail_surrogate)) | 
				
			|||
                        cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; | 
				
			|||
                    else | 
				
			|||
                        throw invalid_utf16(static_cast<uint16_t>(trail_surrogate)); | 
				
			|||
                } | 
				
			|||
                else | 
				
			|||
                    throw invalid_utf16(static_cast<uint16_t>(cp)); | 
				
			|||
 | 
				
			|||
            } | 
				
			|||
            // Lone trail surrogate | 
				
			|||
            else if (utf8::internal::is_trail_surrogate(cp)) | 
				
			|||
                throw invalid_utf16(static_cast<uint16_t>(cp)); | 
				
			|||
 | 
				
			|||
            result = utf8::append(cp, result); | 
				
			|||
        } | 
				
			|||
        return result; | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename u16bit_iterator, typename octet_iterator> | 
				
			|||
    u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) | 
				
			|||
    { | 
				
			|||
        while (start != end) { | 
				
			|||
            uint32_t cp = utf8::next(start, end); | 
				
			|||
            if (cp > 0xffff) { //make a surrogate pair | 
				
			|||
                *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET); | 
				
			|||
                *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); | 
				
			|||
            } | 
				
			|||
            else | 
				
			|||
                *result++ = static_cast<uint16_t>(cp); | 
				
			|||
        } | 
				
			|||
        return result; | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator, typename u32bit_iterator> | 
				
			|||
    octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) | 
				
			|||
    { | 
				
			|||
        while (start != end) | 
				
			|||
            result = utf8::append(*(start++), result); | 
				
			|||
 | 
				
			|||
        return result; | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator, typename u32bit_iterator> | 
				
			|||
    u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) | 
				
			|||
    { | 
				
			|||
        while (start != end) | 
				
			|||
            (*result++) = utf8::next(start, end); | 
				
			|||
 | 
				
			|||
        return result; | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    // The iterator class | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> { | 
				
			|||
      octet_iterator it; | 
				
			|||
      octet_iterator range_start; | 
				
			|||
      octet_iterator range_end; | 
				
			|||
      public: | 
				
			|||
      iterator () {} | 
				
			|||
      explicit iterator (const octet_iterator& octet_it, | 
				
			|||
                         const octet_iterator& range_start, | 
				
			|||
                         const octet_iterator& range_end) : | 
				
			|||
               it(octet_it), range_start(range_start), range_end(range_end) | 
				
			|||
      { | 
				
			|||
          if (it < range_start || it > range_end) | 
				
			|||
              throw std::out_of_range("Invalid utf-8 iterator position"); | 
				
			|||
      } | 
				
			|||
      // the default "big three" are OK | 
				
			|||
      octet_iterator base () const { return it; } | 
				
			|||
      uint32_t operator * () const | 
				
			|||
      { | 
				
			|||
          octet_iterator temp = it; | 
				
			|||
          return utf8::next(temp, range_end); | 
				
			|||
      } | 
				
			|||
      bool operator == (const iterator& rhs) const | 
				
			|||
      { | 
				
			|||
          if (range_start != rhs.range_start || range_end != rhs.range_end) | 
				
			|||
              throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); | 
				
			|||
          return (it == rhs.it); | 
				
			|||
      } | 
				
			|||
      bool operator != (const iterator& rhs) const | 
				
			|||
      { | 
				
			|||
          return !(operator == (rhs)); | 
				
			|||
      } | 
				
			|||
      iterator& operator ++ () | 
				
			|||
      { | 
				
			|||
          utf8::next(it, range_end); | 
				
			|||
          return *this; | 
				
			|||
      } | 
				
			|||
      iterator operator ++ (int) | 
				
			|||
      { | 
				
			|||
          iterator temp = *this; | 
				
			|||
          utf8::next(it, range_end); | 
				
			|||
          return temp; | 
				
			|||
      } | 
				
			|||
      iterator& operator -- () | 
				
			|||
      { | 
				
			|||
          utf8::prior(it, range_start); | 
				
			|||
          return *this; | 
				
			|||
      } | 
				
			|||
      iterator operator -- (int) | 
				
			|||
      { | 
				
			|||
          iterator temp = *this; | 
				
			|||
          utf8::prior(it, range_start); | 
				
			|||
          return temp; | 
				
			|||
      } | 
				
			|||
    }; // class iterator | 
				
			|||
 | 
				
			|||
} // namespace utf8 | 
				
			|||
 | 
				
			|||
#endif //header guard | 
				
			|||
 | 
				
			|||
 | 
				
			|||
@ -1,329 +0,0 @@ | 
				
			|||
// Copyright 2006 Nemanja Trifunovic | 
				
			|||
 | 
				
			|||
/* | 
				
			|||
Permission is hereby granted, free of charge, to any person or organization | 
				
			|||
obtaining a copy of the software and accompanying documentation covered by | 
				
			|||
this license (the "Software") to use, reproduce, display, distribute, | 
				
			|||
execute, and transmit the Software, and to prepare derivative works of the | 
				
			|||
Software, and to permit third-parties to whom the Software is furnished to | 
				
			|||
do so, all subject to the following: | 
				
			|||
 | 
				
			|||
The copyright notices in the Software and this entire statement, including | 
				
			|||
the above license grant, this restriction and the following disclaimer, | 
				
			|||
must be included in all copies of the Software, in whole or in part, and | 
				
			|||
all derivative works of the Software, unless such copies or derivative | 
				
			|||
works are solely in the form of machine-executable object code generated by | 
				
			|||
a source language processor. | 
				
			|||
 | 
				
			|||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
				
			|||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
				
			|||
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT | 
				
			|||
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE | 
				
			|||
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, | 
				
			|||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | 
				
			|||
DEALINGS IN THE SOFTWARE. | 
				
			|||
*/ | 
				
			|||
 | 
				
			|||
 | 
				
			|||
#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 | 
				
			|||
#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 | 
				
			|||
 | 
				
			|||
#include <iterator> | 
				
			|||
 | 
				
			|||
namespace utf8 | 
				
			|||
{ | 
				
			|||
    // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers | 
				
			|||
    // You may need to change them to match your system. | 
				
			|||
    // These typedefs have the same names as ones from cstdint, or boost/cstdint | 
				
			|||
    typedef unsigned char   uint8_t; | 
				
			|||
    typedef unsigned short  uint16_t; | 
				
			|||
    typedef unsigned int    uint32_t; | 
				
			|||
 | 
				
			|||
// Helper code - not intended to be directly called by the library users. May be changed at any time | 
				
			|||
namespace internal | 
				
			|||
{ | 
				
			|||
    // Unicode constants | 
				
			|||
    // Leading (high) surrogates: 0xd800 - 0xdbff | 
				
			|||
    // Trailing (low) surrogates: 0xdc00 - 0xdfff | 
				
			|||
    const uint16_t LEAD_SURROGATE_MIN  = 0xd800u; | 
				
			|||
    const uint16_t LEAD_SURROGATE_MAX  = 0xdbffu; | 
				
			|||
    const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u; | 
				
			|||
    const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu; | 
				
			|||
    const uint16_t LEAD_OFFSET         = LEAD_SURROGATE_MIN - (0x10000 >> 10); | 
				
			|||
    const uint32_t SURROGATE_OFFSET    = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN; | 
				
			|||
 | 
				
			|||
    // Maximum valid value for a Unicode code point | 
				
			|||
    const uint32_t CODE_POINT_MAX      = 0x0010ffffu; | 
				
			|||
 | 
				
			|||
    template<typename octet_type> | 
				
			|||
    inline uint8_t mask8(octet_type oc) | 
				
			|||
    { | 
				
			|||
        return static_cast<uint8_t>(0xff & oc); | 
				
			|||
    } | 
				
			|||
    template<typename u16_type> | 
				
			|||
    inline uint16_t mask16(u16_type oc) | 
				
			|||
    { | 
				
			|||
        return static_cast<uint16_t>(0xffff & oc); | 
				
			|||
    } | 
				
			|||
    template<typename octet_type> | 
				
			|||
    inline bool is_trail(octet_type oc) | 
				
			|||
    { | 
				
			|||
        return ((utf8::internal::mask8(oc) >> 6) == 0x2); | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename u16> | 
				
			|||
    inline bool is_lead_surrogate(u16 cp) | 
				
			|||
    { | 
				
			|||
        return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX); | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename u16> | 
				
			|||
    inline bool is_trail_surrogate(u16 cp) | 
				
			|||
    { | 
				
			|||
        return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename u16> | 
				
			|||
    inline bool is_surrogate(u16 cp) | 
				
			|||
    { | 
				
			|||
        return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename u32> | 
				
			|||
    inline bool is_code_point_valid(u32 cp) | 
				
			|||
    { | 
				
			|||
        return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp)); | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    inline typename std::iterator_traits<octet_iterator>::difference_type | 
				
			|||
    sequence_length(octet_iterator lead_it) | 
				
			|||
    { | 
				
			|||
        uint8_t lead = utf8::internal::mask8(*lead_it); | 
				
			|||
        if (lead < 0x80) | 
				
			|||
            return 1; | 
				
			|||
        else if ((lead >> 5) == 0x6) | 
				
			|||
            return 2; | 
				
			|||
        else if ((lead >> 4) == 0xe) | 
				
			|||
            return 3; | 
				
			|||
        else if ((lead >> 3) == 0x1e) | 
				
			|||
            return 4; | 
				
			|||
        else | 
				
			|||
            return 0; | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename octet_difference_type> | 
				
			|||
    inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length) | 
				
			|||
    { | 
				
			|||
        if (cp < 0x80) { | 
				
			|||
            if (length != 1)  | 
				
			|||
                return true; | 
				
			|||
        } | 
				
			|||
        else if (cp < 0x800) { | 
				
			|||
            if (length != 2)  | 
				
			|||
                return true; | 
				
			|||
        } | 
				
			|||
        else if (cp < 0x10000) { | 
				
			|||
            if (length != 3)  | 
				
			|||
                return true; | 
				
			|||
        } | 
				
			|||
 | 
				
			|||
        return false; | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT}; | 
				
			|||
 | 
				
			|||
    /// Helper for get_sequence_x | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    utf_error increase_safely(octet_iterator& it, octet_iterator end) | 
				
			|||
    { | 
				
			|||
        if (++it == end) | 
				
			|||
            return NOT_ENOUGH_ROOM; | 
				
			|||
 | 
				
			|||
        if (!utf8::internal::is_trail(*it)) | 
				
			|||
            return INCOMPLETE_SEQUENCE; | 
				
			|||
         | 
				
			|||
        return UTF8_OK; | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}     | 
				
			|||
 | 
				
			|||
    /// get_sequence_x functions decode utf-8 sequences of the length x | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point) | 
				
			|||
    { | 
				
			|||
        if (it == end) | 
				
			|||
            return NOT_ENOUGH_ROOM; | 
				
			|||
 | 
				
			|||
        code_point = utf8::internal::mask8(*it); | 
				
			|||
 | 
				
			|||
        return UTF8_OK; | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point) | 
				
			|||
    { | 
				
			|||
        if (it == end)  | 
				
			|||
            return NOT_ENOUGH_ROOM; | 
				
			|||
         | 
				
			|||
        code_point = utf8::internal::mask8(*it); | 
				
			|||
 | 
				
			|||
        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) | 
				
			|||
 | 
				
			|||
        code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f); | 
				
			|||
 | 
				
			|||
        return UTF8_OK; | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point) | 
				
			|||
    { | 
				
			|||
        if (it == end) | 
				
			|||
            return NOT_ENOUGH_ROOM; | 
				
			|||
             | 
				
			|||
        code_point = utf8::internal::mask8(*it); | 
				
			|||
 | 
				
			|||
        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) | 
				
			|||
 | 
				
			|||
        code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); | 
				
			|||
 | 
				
			|||
        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) | 
				
			|||
 | 
				
			|||
        code_point += (*it) & 0x3f; | 
				
			|||
 | 
				
			|||
        return UTF8_OK; | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point) | 
				
			|||
    { | 
				
			|||
        if (it == end) | 
				
			|||
           return NOT_ENOUGH_ROOM; | 
				
			|||
 | 
				
			|||
        code_point = utf8::internal::mask8(*it); | 
				
			|||
 | 
				
			|||
        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) | 
				
			|||
 | 
				
			|||
        code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); | 
				
			|||
 | 
				
			|||
        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) | 
				
			|||
 | 
				
			|||
        code_point += (utf8::internal::mask8(*it) << 6) & 0xfff; | 
				
			|||
 | 
				
			|||
        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) | 
				
			|||
 | 
				
			|||
        code_point += (*it) & 0x3f; | 
				
			|||
 | 
				
			|||
        return UTF8_OK; | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point) | 
				
			|||
    { | 
				
			|||
        // Save the original value of it so we can go back in case of failure | 
				
			|||
        // Of course, it does not make much sense with i.e. stream iterators | 
				
			|||
        octet_iterator original_it = it; | 
				
			|||
 | 
				
			|||
        uint32_t cp = 0; | 
				
			|||
        // Determine the sequence length based on the lead octet | 
				
			|||
        typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type; | 
				
			|||
        const octet_difference_type length = utf8::internal::sequence_length(it); | 
				
			|||
 | 
				
			|||
        // Get trail octets and calculate the code point | 
				
			|||
        utf_error err = UTF8_OK; | 
				
			|||
        switch (length) { | 
				
			|||
            case 0:  | 
				
			|||
                return INVALID_LEAD; | 
				
			|||
            case 1: | 
				
			|||
                err = utf8::internal::get_sequence_1(it, end, cp); | 
				
			|||
                break; | 
				
			|||
            case 2: | 
				
			|||
                err = utf8::internal::get_sequence_2(it, end, cp); | 
				
			|||
            break; | 
				
			|||
            case 3: | 
				
			|||
                err = utf8::internal::get_sequence_3(it, end, cp); | 
				
			|||
            break; | 
				
			|||
            case 4: | 
				
			|||
                err = utf8::internal::get_sequence_4(it, end, cp); | 
				
			|||
            break; | 
				
			|||
        } | 
				
			|||
 | 
				
			|||
        if (err == UTF8_OK) { | 
				
			|||
            // Decoding succeeded. Now, security checks... | 
				
			|||
            if (utf8::internal::is_code_point_valid(cp)) { | 
				
			|||
                if (!utf8::internal::is_overlong_sequence(cp, length)){ | 
				
			|||
                    // Passed! Return here. | 
				
			|||
                    code_point = cp; | 
				
			|||
                    ++it; | 
				
			|||
                    return UTF8_OK; | 
				
			|||
                } | 
				
			|||
                else | 
				
			|||
                    err = OVERLONG_SEQUENCE; | 
				
			|||
            } | 
				
			|||
            else  | 
				
			|||
                err = INVALID_CODE_POINT; | 
				
			|||
        } | 
				
			|||
 | 
				
			|||
        // Failure branch - restore the original value of the iterator | 
				
			|||
        it = original_it; | 
				
			|||
        return err; | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    inline utf_error validate_next(octet_iterator& it, octet_iterator end) { | 
				
			|||
        uint32_t ignored; | 
				
			|||
        return utf8::internal::validate_next(it, end, ignored); | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
} // namespace internal | 
				
			|||
 | 
				
			|||
    /// The library API - functions intended to be called by the users | 
				
			|||
 | 
				
			|||
    // Byte order mark | 
				
			|||
    const uint8_t bom[] = {0xef, 0xbb, 0xbf}; | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    octet_iterator find_invalid(octet_iterator start, octet_iterator end) | 
				
			|||
    { | 
				
			|||
        octet_iterator result = start; | 
				
			|||
        while (result != end) { | 
				
			|||
            utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end); | 
				
			|||
            if (err_code != internal::UTF8_OK) | 
				
			|||
                return result; | 
				
			|||
        } | 
				
			|||
        return result; | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    inline bool is_valid(octet_iterator start, octet_iterator end) | 
				
			|||
    { | 
				
			|||
        return (utf8::find_invalid(start, end) == end); | 
				
			|||
    } | 
				
			|||
 | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    inline bool starts_with_bom (octet_iterator it, octet_iterator end) | 
				
			|||
    { | 
				
			|||
        return ( | 
				
			|||
            ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) && | 
				
			|||
            ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) && | 
				
			|||
            ((it != end) && (utf8::internal::mask8(*it))   == bom[2]) | 
				
			|||
           ); | 
				
			|||
    } | 
				
			|||
	 | 
				
			|||
    //Deprecated in release 2.3  | 
				
			|||
    template <typename octet_iterator> | 
				
			|||
    inline bool is_bom (octet_iterator it) | 
				
			|||
    { | 
				
			|||
        return ( | 
				
			|||
            (utf8::internal::mask8(*it++)) == bom[0] && | 
				
			|||
            (utf8::internal::mask8(*it++)) == bom[1] && | 
				
			|||
            (utf8::internal::mask8(*it))   == bom[2] | 
				
			|||
           ); | 
				
			|||
    } | 
				
			|||
} // namespace utf8 | 
				
			|||
 | 
				
			|||
#endif // header guard | 
				
			|||
 | 
				
			|||
 | 
				
			|||
@ -1,228 +0,0 @@ | 
				
			|||
// Copyright 2006 Nemanja Trifunovic | 
				
			|||
 | 
				
			|||
/* | 
				
			|||
Permission is hereby granted, free of charge, to any person or organization | 
				
			|||
obtaining a copy of the software and accompanying documentation covered by | 
				
			|||
this license (the "Software") to use, reproduce, display, distribute, | 
				
			|||
execute, and transmit the Software, and to prepare derivative works of the | 
				
			|||
Software, and to permit third-parties to whom the Software is furnished to | 
				
			|||
do so, all subject to the following: | 
				
			|||
 | 
				
			|||
The copyright notices in the Software and this entire statement, including | 
				
			|||
the above license grant, this restriction and the following disclaimer, | 
				
			|||
must be included in all copies of the Software, in whole or in part, and | 
				
			|||
all derivative works of the Software, unless such copies or derivative | 
				
			|||
works are solely in the form of machine-executable object code generated by | 
				
			|||
a source language processor. | 
				
			|||
 | 
				
			|||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
				
			|||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
				
			|||
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT | 
				
			|||
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE | 
				
			|||
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, | 
				
			|||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | 
				
			|||
DEALINGS IN THE SOFTWARE. | 
				
			|||
*/ | 
				
			|||
 | 
				
			|||
 | 
				
			|||
#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 | 
				
			|||
#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 | 
				
			|||
 | 
				
			|||
#include "core.h" | 
				
			|||
 | 
				
			|||
namespace utf8 | 
				
			|||
{ | 
				
			|||
    namespace unchecked  | 
				
			|||
    { | 
				
			|||
        template <typename octet_iterator> | 
				
			|||
        octet_iterator append(uint32_t cp, octet_iterator result) | 
				
			|||
        { | 
				
			|||
            if (cp < 0x80)                        // one octet | 
				
			|||
                *(result++) = static_cast<uint8_t>(cp);   | 
				
			|||
            else if (cp < 0x800) {                // two octets | 
				
			|||
                *(result++) = static_cast<uint8_t>((cp >> 6)          | 0xc0); | 
				
			|||
                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80); | 
				
			|||
            } | 
				
			|||
            else if (cp < 0x10000) {              // three octets | 
				
			|||
                *(result++) = static_cast<uint8_t>((cp >> 12)         | 0xe0); | 
				
			|||
                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80); | 
				
			|||
                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80); | 
				
			|||
            } | 
				
			|||
            else {                                // four octets | 
				
			|||
                *(result++) = static_cast<uint8_t>((cp >> 18)         | 0xf0); | 
				
			|||
                *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80); | 
				
			|||
                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80); | 
				
			|||
                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80); | 
				
			|||
            } | 
				
			|||
            return result; | 
				
			|||
        } | 
				
			|||
 | 
				
			|||
        template <typename octet_iterator> | 
				
			|||
        uint32_t next(octet_iterator& it) | 
				
			|||
        { | 
				
			|||
            uint32_t cp = utf8::internal::mask8(*it); | 
				
			|||
            typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it); | 
				
			|||
            switch (length) { | 
				
			|||
                case 1: | 
				
			|||
                    break; | 
				
			|||
                case 2: | 
				
			|||
                    it++; | 
				
			|||
                    cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); | 
				
			|||
                    break; | 
				
			|||
                case 3: | 
				
			|||
                    ++it;  | 
				
			|||
                    cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); | 
				
			|||
                    ++it; | 
				
			|||
                    cp += (*it) & 0x3f; | 
				
			|||
                    break; | 
				
			|||
                case 4: | 
				
			|||
                    ++it; | 
				
			|||
                    cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);                 | 
				
			|||
                    ++it; | 
				
			|||
                    cp += (utf8::internal::mask8(*it) << 6) & 0xfff; | 
				
			|||
                    ++it; | 
				
			|||
                    cp += (*it) & 0x3f;  | 
				
			|||
                    break; | 
				
			|||
            } | 
				
			|||
            ++it; | 
				
			|||
            return cp;         | 
				
			|||
        } | 
				
			|||
 | 
				
			|||
        template <typename octet_iterator> | 
				
			|||
        uint32_t peek_next(octet_iterator it) | 
				
			|||
        { | 
				
			|||
            return utf8::unchecked::next(it);     | 
				
			|||
        } | 
				
			|||
 | 
				
			|||
        template <typename octet_iterator> | 
				
			|||
        uint32_t prior(octet_iterator& it) | 
				
			|||
        { | 
				
			|||
            while (utf8::internal::is_trail(*(--it))) ; | 
				
			|||
            octet_iterator temp = it; | 
				
			|||
            return utf8::unchecked::next(temp); | 
				
			|||
        } | 
				
			|||
 | 
				
			|||
        // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous) | 
				
			|||
        template <typename octet_iterator> | 
				
			|||
        inline uint32_t previous(octet_iterator& it) | 
				
			|||
        { | 
				
			|||
            return utf8::unchecked::prior(it); | 
				
			|||
        } | 
				
			|||
 | 
				
			|||
        template <typename octet_iterator, typename distance_type> | 
				
			|||
        void advance (octet_iterator& it, distance_type n) | 
				
			|||
        { | 
				
			|||
            for (distance_type i = 0; i < n; ++i) | 
				
			|||
                utf8::unchecked::next(it); | 
				
			|||
        } | 
				
			|||
 | 
				
			|||
        template <typename octet_iterator> | 
				
			|||
        typename std::iterator_traits<octet_iterator>::difference_type | 
				
			|||
        distance (octet_iterator first, octet_iterator last) | 
				
			|||
        { | 
				
			|||
            typename std::iterator_traits<octet_iterator>::difference_type dist; | 
				
			|||
            for (dist = 0; first < last; ++dist)  | 
				
			|||
                utf8::unchecked::next(first); | 
				
			|||
            return dist; | 
				
			|||
        } | 
				
			|||
 | 
				
			|||
        template <typename u16bit_iterator, typename octet_iterator> | 
				
			|||
        octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) | 
				
			|||
        {        | 
				
			|||
            while (start != end) { | 
				
			|||
                uint32_t cp = utf8::internal::mask16(*start++); | 
				
			|||
            // Take care of surrogate pairs first | 
				
			|||
                if (utf8::internal::is_lead_surrogate(cp)) { | 
				
			|||
                    uint32_t trail_surrogate = utf8::internal::mask16(*start++); | 
				
			|||
                    cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; | 
				
			|||
                } | 
				
			|||
                result = utf8::unchecked::append(cp, result); | 
				
			|||
            } | 
				
			|||
            return result;          | 
				
			|||
        } | 
				
			|||
 | 
				
			|||
        template <typename u16bit_iterator, typename octet_iterator> | 
				
			|||
        u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) | 
				
			|||
        { | 
				
			|||
            while (start < end) { | 
				
			|||
                uint32_t cp = utf8::unchecked::next(start); | 
				
			|||
                if (cp > 0xffff) { //make a surrogate pair | 
				
			|||
                    *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET); | 
				
			|||
                    *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); | 
				
			|||
                } | 
				
			|||
                else | 
				
			|||
                    *result++ = static_cast<uint16_t>(cp); | 
				
			|||
            } | 
				
			|||
            return result; | 
				
			|||
        } | 
				
			|||
 | 
				
			|||
        template <typename octet_iterator, typename u32bit_iterator> | 
				
			|||
        octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) | 
				
			|||
        { | 
				
			|||
            while (start != end) | 
				
			|||
                result = utf8::unchecked::append(*(start++), result); | 
				
			|||
 | 
				
			|||
            return result; | 
				
			|||
        } | 
				
			|||
 | 
				
			|||
        template <typename octet_iterator, typename u32bit_iterator> | 
				
			|||
        u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) | 
				
			|||
        { | 
				
			|||
            while (start < end) | 
				
			|||
                (*result++) = utf8::unchecked::next(start); | 
				
			|||
 | 
				
			|||
            return result; | 
				
			|||
        } | 
				
			|||
 | 
				
			|||
        // The iterator class | 
				
			|||
        template <typename octet_iterator> | 
				
			|||
          class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {  | 
				
			|||
            octet_iterator it; | 
				
			|||
            public: | 
				
			|||
            iterator () {} | 
				
			|||
            explicit iterator (const octet_iterator& octet_it): it(octet_it) {} | 
				
			|||
            // the default "big three" are OK | 
				
			|||
            octet_iterator base () const { return it; } | 
				
			|||
            uint32_t operator * () const | 
				
			|||
            { | 
				
			|||
                octet_iterator temp = it; | 
				
			|||
                return utf8::unchecked::next(temp); | 
				
			|||
            } | 
				
			|||
            bool operator == (const iterator& rhs) const  | 
				
			|||
            {  | 
				
			|||
                return (it == rhs.it); | 
				
			|||
            } | 
				
			|||
            bool operator != (const iterator& rhs) const | 
				
			|||
            { | 
				
			|||
                return !(operator == (rhs)); | 
				
			|||
            } | 
				
			|||
            iterator& operator ++ ()  | 
				
			|||
            { | 
				
			|||
                ::std::advance(it, utf8::internal::sequence_length(it)); | 
				
			|||
                return *this; | 
				
			|||
            } | 
				
			|||
            iterator operator ++ (int) | 
				
			|||
            { | 
				
			|||
                iterator temp = *this; | 
				
			|||
                ::std::advance(it, utf8::internal::sequence_length(it)); | 
				
			|||
                return temp; | 
				
			|||
            }   | 
				
			|||
            iterator& operator -- () | 
				
			|||
            { | 
				
			|||
                utf8::unchecked::prior(it); | 
				
			|||
                return *this; | 
				
			|||
            } | 
				
			|||
            iterator operator -- (int) | 
				
			|||
            { | 
				
			|||
                iterator temp = *this; | 
				
			|||
                utf8::unchecked::prior(it); | 
				
			|||
                return temp; | 
				
			|||
            } | 
				
			|||
          }; // class iterator | 
				
			|||
 | 
				
			|||
    } // namespace utf8::unchecked | 
				
			|||
} // namespace utf8  | 
				
			|||
 | 
				
			|||
 | 
				
			|||
#endif // header guard | 
				
			|||
 | 
				
			|||
						Write
						Preview
					
					
					Loading…
					
					Cancel
						Save
					
		Reference in new issue