http://dspace.wul.waseda.ac.jp/dspace/bitstream/2065/28672/4/Honbun-4624_01.pdf
<tb@panthema.net>
#include "../tools/contest.h"
extern int pss_num_threads;
#define PTHREAD
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <pthread.h>
#ifdef _WIN32
#ifndef PTHREAD
#include <windows.h>
#endif
#include <io.h>
#else
#include <unistd.h>
#endif
#include <sys/types.h>
#include <fcntl.h>
namespace shamsundar_lcp_merge_string_sort {
int error_message_count=0;
#define ERR0(status,errnum,fmt) \
{fprintf(stderr,"Program %s, File %s, Line %d: ", \
argv[0],__FILE__,__LINE__); \
fprintf(stderr,fmt); \
if(errnum)fprintf(stderr,": Err.Num. %d\n",errnum); \
if(status)exit(status); \
error_message_count++; \
}
#define ERR1(status,errnum,fmt,p1) \
{fprintf(stderr,"Program %s, File %s, Line %d: ", \
argv[0],__FILE__,__LINE__); \
fprintf(stderr,fmt,p1); \
if(errnum)fprintf(stderr,": Err.Num. %d\n",errnum); \
if(status)exit(status); \
error_message_count++; \
}
#define FT '\0'
#define MAXCPU 65
int NCPU = 1;
#define MIN(x,y) (x) < (y) ? (x) : (y)
typedef unsigned int UINT;
typedef struct _AS {
UINT llcp; char* str;
} AS, *pAS, *aAS;
pAS *gpTMP,*gSP;
extern void merge( pAS a[], int la, pAS B[], int lb, pAS C[], int lcol, int *ncomp );
int DEBUG=0;
void
#ifdef _WIN32
_inline
#else
inline
#endif
Merge( aAS a[], int m, aAS b[], int n, aAS c[], int lcol, int *ncomp )
{
int i,j,k;
if(DEBUG){
printf("List A\n"); for(i=0; i<m; i++)printf("%3d %2d %s\n",i,a[i]->llcp,a[i]->str);
printf("\nList B\n"); for(i=0; i<n; i++)printf("%3d %2d %s\n",i,b[i]->llcp,b[i]->str);
}
if(c+m-b){fprintf(stderr,"Improper overlap\n"); exit(1);}
i = 0; j = 0; k = 0;
while (i < m && j < n){
if(a[i]->llcp - b[j]->llcp){
if(a[i]->llcp > b[j]->llcp)c[k++]=a[i++]; else c[k++]=b[j++];
}
else{
char *x=a[i]->str+a[i]->llcp,
*y=b[j]->str+a[i]->llcp,
*xe=a[i]->str+lcol-1;
(*ncomp)++;
while ((*x==*y) && (x < xe) && (*x-FT)) {
x++; y++;
}
if (*x > *y) {
a[i]->llcp=x-a[i]->str;
c[k++]=b[j++];
}
else {
b[j]->llcp=y-b[j]->str;
c[k++]=a[i++];
}
}
}
while ( i < m ) c[k++] = a[i++];
if(DEBUG){
printf("\nList C\n"); for(i=0; i<m+n; i++)printf("%3d %2d %s\n",i,c[i]->llcp,c[i]->str);
printf("\n");
}
}
void merge_sort( pAS pX[], int n, int cnts[], pAS *lPTMP, int fcol, int lcol )
{
UINT i;
if (n>2) {
UINT l=n/2;
char *sX,*sY,*p,*q,*XE;
if(l > 1)merge_sort(pX+0, l-0,cnts, lPTMP, fcol, lcol);
if(n-l > 1)merge_sort(pX+l, n-l,cnts, lPTMP, fcol, lcol);
sX=pX[l-1]->str; sY=pX[l]->str; cnts[0]++;
XE=pX[l-1]->str+lcol;
p=sX+fcol-1; q=sY+fcol-1;
while(*p-FT && *p==*q && p < XE){p++; q++;}
if(*p==FT || *p <= *q){
pX[l]->llcp=p-sX;
return;
}
else{
if(lPTMP-pX){
memcpy(lPTMP, pX, l*sizeof(pAS));
cnts[2]+=l;
}
cnts[1]++;
if(DEBUG){
printf("List A\n"); for(i=0; i<l; i++)printf("%3d %2d %s\n",i,pX[i]->llcp,pX[i]->str);
printf("\nList B\n"); for(i=l; i<n; i++)printf("%3d %2d %s\n",i,pX[i]->llcp,pX[i]->str);
}
Merge(lPTMP+0, l, pX+l, n-l, pX, lcol, cnts);
if(DEBUG){
printf("\nList C\n"); for(i=0; i<n; i++)printf("%3d %2d %s\n",i,pX[i]->llcp,pX[i]->str);
printf("\n");
}
}
return;
}
else{
char *sX,*sY,*p,*q,*XE;
if(n==1)return; cnts[3]++;
sX=pX[0]->str; sY=pX[1]->str;
XE=sX+lcol-1;
cnts[0]++;
p=sX+fcol-1; q=sY+fcol-1;
while(*p-FT && *p==*q){
if(p==XE)break;
p++; q++;
}
if(*p > *q){
pX[0]->str=sY; pX[1]->str=sX;
}
if(p-sX)pX[1]->llcp=p-sX;
}
}
typedef struct {
int cnts[4];
pAS *pX;
int n;
pAS *lPTMP;
int fcol; int lcol;
} msortargs;
msortargs margs[MAXCPU];
#ifdef PTHREAD
void *worker( void *warg){
#else
DWORD WINAPI worker( LPVOID warg){
#endif
msortargs *pStrct=(msortargs *)warg;
pAS *pX=pStrct->pX; int n=pStrct->n; pAS *lpTMP=pStrct->lPTMP;
int *lcnts=pStrct->cnts; int fcol=pStrct->fcol; int lcol=pStrct->lcol;
merge_sort(pX,n,lcnts,lpTMP,fcol,lcol);
#ifdef PTHREAD
return NULL;
#else
return 0;
#endif
}
void merge_sortMT( pAS pX[], int n, int cnts[], pAS *lPTMP, int fcol, int lcol )
{
int i,blksiz,blen;
#ifdef PTHREAD
pthread_t thrd[MAXCPU];
#else
DWORD dwThreadIdArray[MAXCPU]; HANDLE hThreadArray[MAXCPU];
#endif
blksiz=n/(2.0*NCPU)+0.5; blksiz+=blksiz;
if(blksiz%2){
fprintf(stderr,"Blksiz is not even with n=%d\n",n);
exit(1);
}
for(i=0; i<NCPU-1; i++){
margs[i].pX=pX+i*blksiz; margs[i].n=blksiz;
margs[i].cnts[0]=margs[i].cnts[1]=margs[i].cnts[2]=margs[i].cnts[3]=0;
margs[i].lPTMP=lPTMP+i*blksiz/2; margs[i].fcol=fcol; margs[i].lcol=lcol;
}
for(i=NCPU-2; i>=0; i--){
#ifdef PTHREAD
if(pthread_create(thrd+i,NULL,worker,(void *)(margs+i))){
fprintf(stderr,"Error creating thread %d\n",i);
exit(1);
}
#else
hThreadArray[i] =
CreateThread(
NULL,
0,
worker,
&margs[i],
0,
&dwThreadIdArray[i]);
#endif
if(DEBUG)
printf("Worker %d, %ld to %ld\n",i,margs[i].pX-gSP,margs[i].pX-gSP+blksiz-1);
}
cnts[0]=cnts[1]=cnts[2]=cnts[3]=0; blen=MIN(n-(NCPU-1)*blksiz,blksiz);
if(DEBUG)printf("Boss 0, %ld to %ld\n",pX+(NCPU-1)*blksiz-gSP,pX-gSP+(NCPU-1)*blksiz+blen-1);
merge_sort(pX+(NCPU-1)*blksiz,blen,cnts,lPTMP+(NCPU-1)*blksiz/2,fcol,lcol);
for(i=NCPU-2; i>=0; i--){
#ifdef PTHREAD
if(pthread_join(thrd[i],NULL)){
fprintf(stderr,"Error joining thread %d\n",i);
exit(1);
}
#else
WaitForSingleObject(hThreadArray[i],INFINITE);
CloseHandle(hThreadArray[i]);
#endif
cnts[0]+=margs[i].cnts[0]; cnts[1]+=margs[i].cnts[1];
cnts[2]+=margs[i].cnts[2]; cnts[3]+=margs[i].cnts[3];
if(DEBUG)printf("Merge %ld to %ld\n",margs[i].pX-gSP,margs[i].pX-gSP+blksiz-1);
cnts[0]++;
if(strcmp(pX[(i+1)*blksiz-1]->str,pX[(i+1)*blksiz]->str) > 0){
memcpy(lPTMP+i*blksiz/2,margs[i].pX,blksiz*sizeof(pAS)); cnts[2]+=blksiz;
Merge(lPTMP+i*blksiz/2, blksiz, pX+(i+1)*blksiz, blen, pX+i*blksiz, lcol, cnts);
cnts[1]++;
}
else cnts[3]++;
blen+=blksiz;
}
return;
}
#if 0
char *bint2str(long l, char *s){
char *p;
int m,t,u;
u=l % 1000; l/=1000; t=l % 1000; l/=1000; m=l % 1000;
sprintf(s,"%03d,%03d,%03d",m,t,u);
p=s; while(*p=='0' || *p==',')*p++=' ';
if(*p=='\0')*(--p)='0';
return s;
}
#define SIZ 0x4000000
#define NLIN 0x400000
char buf[SIZ];
AS slin[NLIN], *sP[NLIN];
int brks[NLIN];
int main(int argc,char *argv[]){
int fd,blen,nlin=0,i; char *p=buf; FILE *fil;
int c1,c2,c3,c4,cnts[4]; char tstrs[5][13]; int fcol=17,lcol=24;
c1=clock(); gSP=sP;
if(argc-3)ERR0(1,0,"usage: aflgsrt <ifil> <ofil>")
fd=open(argv[1],O_RDONLY);
if(fd < 0)ERR1(1,0,"Could not open %s for reading",argv[1])
blen=read(fd,buf,SIZ);
close(fd);
if(blen==SIZ)ERR0(1,0,"buffer not big enough");
while(p-buf < blen){
sP[nlin]=slin+nlin;
slin[nlin].llcp=fcol-1; slin[nlin++].str=p; p=strchr(p,'\n');
if(p[-1]=='\r')p[-1]='\0'; *(p++)='\0';
if(nlin==NLIN)ERR0(1,0,"NLIN not big enough")
}
gpTMP=(pAS *)calloc((nlin+11)/2,sizeof(pAS));
cnts[0]=cnts[1]=cnts[2]=cnts[3]=0;
c2=clock();
merge_sortMT(sP,nlin,cnts,gpTMP,fcol,lcol);
c3=clock();
free(gpTMP);
if(cnts[1]){
fil=fopen(argv[2],"wb");
if(fil==NULL)ERR1(1,0,"Could not open %s for writing",argv[2])
for(i=0; i<nlin; i++){
fputs(sP[i]->str,fil); fputc('\n',fil);
}
fclose(fil);
c4=clock();
}
else{
ERR0(0,0,"Input file is already sorted\n");
c4=c3;
}
ERR1(0,0,"Read input time : %6.3f\n",(double)(c2-c1)/CLOCKS_PER_SEC);
ERR1(0,0,"MergeSort time : %6.3f\n",(double)(c3-c2)/CLOCKS_PER_SEC);
if(cnts[1])ERR1(0,0,"Write output time : %6.3f\n",(double)(c4-c3)/CLOCKS_PER_SEC);
ERR1(0,0,"Total task time : %6.3f\n",(double)(c4-c1)/CLOCKS_PER_SEC);
printf("\n%s records, %s merges, %s skipped, %s comparisons, %s copies\n",
bint2str(nlin,tstrs[0]),
bint2str(cnts[1],tstrs[1]),
bint2str(cnts[3],tstrs[2]),
bint2str(cnts[0],tstrs[3]),
bint2str(cnts[2],tstrs[4]));
return 0;
}
#endif
AS *sP;
static void prepare(unsigned char **strings, size_t size)
{
sP = new AS[size];
for (size_t i = 0; i < size; ++i) {
sP[i].str = (char*)strings[i];
sP[i].llcp = 0;
}
gSP = new AS*[size];
for (size_t i = 0; i < size; ++i)
gSP[i] = &sP[i];
}
static void string_sort(unsigned char **strings, size_t size)
{
int cnts[4];
int fcol=1,lcol=1000000000;
NCPU = pss_num_threads;
cnts[0]=cnts[1]=cnts[2]=cnts[3]=0;
gpTMP = (pAS *)calloc((size+11)/2, sizeof(pAS));
merge_sortMT(gSP,size,cnts,gpTMP,fcol,lcol);
for (size_t i = 0; i < size; ++i) {
strings[i] = (unsigned char*)gSP[i]->str;
}
free(gpTMP);
delete [] sP;
delete [] gSP;
}
CONTESTANT_REGISTER_PARALLEL_PREPARE(
prepare, string_sort,
"shamsundar/lcp-merge-string-sort",
"Parallelized LCP Merge sort by N. Shamsundar");
}