-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
gtools-0.6.9 (2017-06-26); bug fixes and minor patches
Enhancements * `gegen varname = group(varlist)` no longer has holes, as noted in issue #4 * `gegen` and `gcollapse` fall back on `collapse` and `egen` in case there is a collision. Future releases will implement an internal way to resolve collisions. This is not a huge concern, as SpookyHash has no known vulnerabilities (I believe the concern raied in issue #2 was base on a typo; see [here](rurban/smhasher#34)) and the probability of a collision is very low. * `gegen varname = group(varlist)` now has a consistency test (though the group IDs are not the same as `egen`'s, they should map to the `egen` group IDs 1 to 1, which is what the tests now check for). Bug fixes * Additional fixes for issue #1 * Apparentlly the argument Stata passes to plugins have a maximum length. The code now makes sure chuncks are passed when the PATH length will exceed the maximum. The plugin later concatenates the chuncks to set the PATH correctly.
- Loading branch information
Showing
43 changed files
with
5,983 additions
and
3,905 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
*! version 0.6.8 18Jul2017 Mauricio Caceres Bravo, [email protected] | ||
*! version 0.6.9 26Jul2017 Mauricio Caceres Bravo, [email protected] | ||
*! -collapse- implementation using C for faster processing | ||
|
||
capture program drop gcollapse | ||
|
@@ -72,7 +72,21 @@ program gcollapse | |
local path = substr("`path'"', 1, length(`"`path'"') - 1) | ||
} | ||
local __gtools_hashpath = subinstr("`__gtools_hashpath'", "/", "\", .) | ||
cap plugin call env_set, PATH `"`path';`__gtools_hashpath'"' | ||
local newpath `"`path';`__gtools_hashpath'"' | ||
local truncate 2048 | ||
if ( `:length local newpath' > `truncate' ) { | ||
local loops = ceil(`:length local newpath' / `truncate') | ||
mata: __gtools_pathpieces = J(1, `loops', "") | ||
mata: __gtools_pathcall = "" | ||
mata: for(k = 1; k <= `loops'; k++) __gtools_pathpieces[k] = substr(st_local("newpath"), 1 + (k - 1) * `truncate', `truncate') | ||
mata: for(k = 1; k <= `loops'; k++) __gtools_pathcall = __gtools_pathcall + " `" + `"""' + __gtools_pathpieces[k] + `"""' + "' " | ||
mata: st_local("pathcall", __gtools_pathcall) | ||
mata: mata drop __gtools_pathcall __gtools_pathpieces | ||
cap plugin call env_set, PATH `pathcall' | ||
} | ||
else { | ||
cap plugin call env_set, PATH `"`path';`__gtools_hashpath'"' | ||
} | ||
if ( _rc ) { | ||
di as err "Unable to add '`__gtools_hashpath'' to system PATH." | ||
exit _rc | ||
|
@@ -86,6 +100,16 @@ program gcollapse | |
* Parsing syntax options * | ||
*********************************************************************** | ||
|
||
local website_url https://github.com/mcaceresb/stata-gtools/issues | ||
local website_disp github.com/mcaceresb/stata-gtools | ||
|
||
if ( "`oncollision'" == "" ) local oncollision fallback | ||
if ( !inlist("`oncollision'", "fallback", "error") ) { | ||
di as err "option -oncollision()- must be 'fallback' or 'error'" | ||
exit 198 | ||
} | ||
di "debug-`oncollision'" | ||
|
||
if ( ("`merge'" != "") & ("`if'" != "") ) { | ||
di as err "combining -merge- with -if- is currently buggy; a fix is planned v0.5.1" | ||
exit 198 | ||
|
@@ -161,6 +185,10 @@ program gcollapse | |
* on single-threaded otherwise. | ||
parse_vars `anything' `if' `in', by(`by') `cw' smart(`smart') v(`verbose') `multi' `debug_force_hash' | ||
local indexed = `r(indexed)' | ||
if ( `=_N' == 0 ) { | ||
di as err "no observations" | ||
exit 2000 | ||
} | ||
if ( `indexed' ) { | ||
tempvar bysmart | ||
by `by': gen long `bysmart' = (_n == 1) | ||
|
@@ -378,7 +406,13 @@ program gcollapse | |
* variabes in memory (it will be faster to pick up the execution | ||
* from there than re-hash and re-sort). | ||
cap `noi' `plugin_call' `plugvars', collapse index `"`__gtools_file'"' | ||
if ( _rc != 0 ) exit _rc | ||
if ( _rc == 42000 ) { | ||
di as err "There may be 128-bit hash collisions!" | ||
di as err `"This is a bug. Please report to {browse "`website_url'":`website_disp'}"' | ||
if ( "`oncollision'" == "fallback" ) collision_handler `0' | ||
else exit 42000 | ||
} | ||
else if ( _rc != 0 ) exit _rc | ||
|
||
* If we collapsed to disk, no need to collapse to memory | ||
local used_io = `=scalar(__gtools_used_io)' | ||
|
@@ -449,6 +483,13 @@ program gcollapse | |
qui ds * | ||
if ( `verbose' ) di as text "In memory: `r(varlist)'" | ||
cap `noi' `plugin_call' `plugvars', collapse ixwrite `"`__gtools_file'"' | ||
if ( _rc == 42000 ) { | ||
di as err "There may be 128-bit hash collisions!" | ||
di as err `"This is a bug. Please report to {browse "`website_url'":`website_disp'}"' | ||
if ( "`oncollision'" == "fallback" ) collision_handler `0' | ||
else exit 42000 | ||
} | ||
else if ( _rc != 0 ) exit _rc | ||
gtools_timer info 97 `"Collapsed data to disk (forced by user)"', prints(`benchmark') | ||
} | ||
else { | ||
|
@@ -478,7 +519,13 @@ program gcollapse | |
* Run the full plugin: | ||
if ( `=_N > 0' ) { | ||
cap `noi' `plugin_call' `plugvars', collapse | ||
if ( _rc != 0 ) exit _rc | ||
if ( _rc == 42000 ) { | ||
di as err "There may be 128-bit hash collisions!" | ||
di as err `"This is a bug. Please report to {browse "`website_url'":`website_disp'}"' | ||
if ( "`oncollision'" == "fallback" ) collision_handler `0' | ||
else exit 42000 | ||
} | ||
else if ( _rc != 0 ) exit _rc | ||
} | ||
|
||
* End timer for plugin time; benchmark just the program exit | ||
|
@@ -497,8 +544,6 @@ program gcollapse | |
if ( `=scalar(__gtools_J) > 0' ) keep in 1 / `:di scalar(__gtools_J)' | ||
else if ( `=scalar(__gtools_J) == 0' ) drop if 1 | ||
else if ( `=scalar(__gtools_J) < 0' ) { | ||
local website_url https://github.com/mcaceresb/stata-gtools/issues | ||
local website_disp github.com/mcaceresb/stata-gtools | ||
di as err "The plugin returned a negative number of groups." | ||
di as err `"This is a bug. Please report to {browse "`website_url'":`website_disp'}"' | ||
} | ||
|
@@ -1280,8 +1325,8 @@ if ( "`c(os)'" == "Windows" ) { | |
if ( _rc ) { | ||
local url https://raw.githubusercontent.com/mcaceresb/stata-gtools | ||
local url `url'/master/spookyhash.dll | ||
di as err `"'`hashlib'' not found."' | ||
di as err "Download {browse "`url'":here} or run {opt gtools, dependencies}"' | ||
di as err `"gtools: `hashlib'' not found."' | ||
di as err `"gtools: download {browse "`url'":here} or run {opt gtools, dependencies}"' | ||
exit _rc | ||
} | ||
mata: __gtools_hashpath = "" | ||
|
@@ -1295,12 +1340,30 @@ if ( "`c(os)'" == "Windows" ) { | |
local path = substr("`path'"', 1, length(`"`path'"') - 1) | ||
} | ||
local __gtools_hashpath = subinstr("`__gtools_hashpath'", "/", "\", .) | ||
cap plugin call env_set, PATH `"`path';`__gtools_hashpath'"' | ||
local newpath `"`path';`__gtools_hashpath'"' | ||
local truncate 2048 | ||
if ( `:length local newpath' > `truncate' ) { | ||
local loops = ceil(`:length local newpath' / `truncate') | ||
mata: __gtools_pathpieces = J(1, `loops', "") | ||
mata: __gtools_pathcall = "" | ||
mata: for(k = 1; k <= `loops'; k++) __gtools_pathpieces[k] = substr(st_local("newpath"), 1 + (k - 1) * `truncate', `truncate') | ||
mata: for(k = 1; k <= `loops'; k++) __gtools_pathcall = __gtools_pathcall + " `" + `"""' + __gtools_pathpieces[k] + `"""' + "' " | ||
mata: st_local("pathcall", __gtools_pathcall) | ||
mata: mata drop __gtools_pathcall __gtools_pathpieces | ||
cap plugin call env_set, PATH `pathcall' | ||
} | ||
else { | ||
cap plugin call env_set, PATH `"`path';`__gtools_hashpath'"' | ||
} | ||
if ( _rc ) { | ||
cap confirm file spookyhash.dll | ||
if ( _rc ) { | ||
di as err "Unable to add '`__gtools_hashpath'' to system PATH." | ||
exit _rc | ||
cap plugin call env_set, PATH `"`__gtools_hashpath'"' | ||
if ( _rc ) { | ||
di as err `"gtools: Unable to add '`__gtools_hashpath'' to system PATH."' | ||
di as err `"gtools: download {browse "`url'":here} or run {opt gtools, dependencies}"' | ||
exit _rc | ||
} | ||
} | ||
} | ||
} | ||
|
@@ -1312,6 +1375,17 @@ program gtools_plugin, plugin using(`"gtools_`:di lower("`c(os)'")'.plugin"') | |
cap program drop gtoolsmulti_plugin | ||
cap program gtoolsmulti_plugin, plugin using(`"gtools_`:di lower("`c(os)'")'_multi.plugin"') | ||
|
||
*********************************************************************** | ||
* Fallback to collapse * | ||
*********************************************************************** | ||
|
||
capture program drop collision_handler | ||
program collision_handler | ||
syntax [anything(equalok)] [if] [in] , [by(passthru) cw fast *] | ||
di as txt "Falling back on -collapse-" | ||
collapse `anything' `if' `in', `by' `cw' `fast' | ||
end | ||
|
||
*********************************************************************** | ||
* Parsing is adapted from Sergio Correia's fcollapse.ado * | ||
*********************************************************************** | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.