
#include "filereader.h"

#include <unistd.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>

#include <sys/mman.h>

#include "../3rd-party/md5.h"


#define FLAG_PLAIN 1
#define FLAG_GZ 2
#define FLAG_BZ2 4
#define FLAG_EOF 8
#define FLAG_ERROR 16
#define FLAG_COMPEOF 32

//#include <fstream>
#include <iostream>

#define BUFSIZE 20000

using namespace MYSTD;

filereader::filereader() 
:
	m_szFileBuf((char*)MAP_FAILED), 
	m_fd(-1)
{
	flags=0;
	m_nCurLine=0;
};

string filereader::GetPureFilename()
{
	string x=m_sOrigName;
	tStrPos p=x.rfind('/'); 
	if(stmiss!=p)
		x.erase(0, p+1);
	if(flags&FLAG_GZ)
		x.erase(x.length()-3);
	else if(flags&FLAG_BZ2)
		x.erase(x.length()-4);
	return x;
}

bool filereader::OpenFile(const string & sFilename, bool bNoMagic)
{
	m_sOrigName=sFilename;
	m_fd=open(sFilename.c_str(), O_RDONLY);
	
	if(m_fd<0)
		goto ofail;
	
	if(bNoMagic)
		flags|=FLAG_PLAIN;
	else if(sFilename.length()>4 
			&& 0==strcasecmp(".bz2", sFilename.c_str()+sFilename.length()-4))
	{
#ifdef HAS_BZ2LIB
		m_bzStream.bzalloc = NULL;
		m_bzStream.bzfree = NULL;
		m_bzStream.opaque = NULL;
		if(BZ_OK!=BZ2_bzDecompressInit ( & m_bzStream, 1, EXTREME_MEMORY_SAVING))
			goto ofail;
		
		// ok, now can mark for processing and cleanup
		flags |= FLAG_BZ2;
		m_UncompBuf.init(BUFSIZE);
#else
		goto ofail;
#endif
	}
	else if(sFilename.length()>3 
			&& 0==strcasecmp(".gz", sFilename.c_str()+sFilename.length()-3))
	{
#ifdef HAS_ZLIB
		m_zStream.zalloc=NULL;
		m_zStream.zfree=NULL;
		m_zStream.opaque=NULL;
				
		if(Z_OK != inflateInit2(&m_zStream, 47))
	    	goto ofail;
		
	    flags |= FLAG_GZ;
	    m_UncompBuf.init(BUFSIZE);
#else
	    goto ofail;
#endif
	}
	else
		flags |= FLAG_PLAIN;
		
	
	struct stat statbuf;
	if(0!=fstat(m_fd, &statbuf))
		goto ofail;
	
	m_nBufSize=statbuf.st_size;
	m_szFileBuf = (char*) mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, m_fd, 0);
	
	if(m_szFileBuf==MAP_FAILED)
		goto ofail;
	
#ifdef HAS_ADVISE
	// if possible, prepare to read that
	posix_madvise(m_szFileBuf, statbuf.st_size, POSIX_MADV_SEQUENTIAL);
#endif
	
	m_nBufPos=0;
	m_nCurLine=0;
	return true;
	
	ofail:
	
	flags |= (FLAG_ERROR|FLAG_EOF);
	return false;
}

bool filereader::CheckGoodState(bool bErrorsConsiderFatal)
{	
	if (flags&FLAG_ERROR)
	{
		if(bErrorsConsiderFatal)
		{
			cerr << "Error opening file: "<< m_sOrigName << ", terminating."<<endl;
			exit(EXIT_FAILURE);
		}
		return false;
	}
	return true;
}

string filereader::GetPositionDescription() {
	char buf[22];
	sprintf(buf, ":%u", m_nCurLine);
	return m_sOrigName+buf;
}

filereader::~filereader() {
	
	if(m_szFileBuf!=MAP_FAILED)
		munmap(m_szFileBuf, m_nBufSize);
			
	if(m_fd>=0)
		close(m_fd);
	
#ifdef HAS_BZ2LIB
	if(flags&FLAG_BZ2)
		BZ2_bzDecompressEnd (&m_bzStream);
#endif

#ifdef HAS_ZLIB
	if(flags&FLAG_GZ)
		deflateEnd(&m_zStream);
#endif
}

// TODO: can split up a line when it doesn't fit into one buffer
bool filereader::GetOneLine(string & sOut) {
	
	sOut.clear();
	
	// stop flags set in previous run
	if(flags & (FLAG_EOF|FLAG_ERROR))
		return false;
	
	//cout<< "wtf, m_pos: " << m_pos <<endl;
	
	char *rbuf;
	size_t nRest;
	bool bCanRetry=true;

	maybe_got_more:
	
	if(flags&FLAG_PLAIN)
	{
		bCanRetry=false;
		
		if(m_nBufPos>=m_nBufSize)
			flags|=FLAG_EOF;
		// detect eof and remember that, for now or later calls
		nRest = (flags&FLAG_EOF) ? 0 : m_nBufSize-m_nBufPos;
		rbuf=m_szFileBuf+m_nBufPos;
	}
	else 
	{
		nRest=m_UncompBuf.size();
		
		if(nRest==0 && (flags&(FLAG_COMPEOF|FLAG_ERROR|FLAG_EOF)))
			return false;
		
		rbuf=m_UncompBuf.rptr();
	}
	
	// look for end in the rest of buffer (may even be nullsized then it fails implicitely, newline decides), 
	// on miss -> try to get more, check whether the available size changed, 
	// on success -> retry
	
	char *newline=mempbrk(rbuf, "\r\n", nRest);
	
	tStrPos nLineLen, nDropLen;
	
	if(newline)
	{
		nLineLen=newline-rbuf;
		nDropLen=nLineLen+1;
		// cut optional \r or \n but only when it's from another kind
		if(nRest > nDropLen &&  newline[0]+newline[1]== '\r'+'\n')
			nDropLen++;
	}
	else
	{
		if(bCanRetry)
		{
			bCanRetry=false;
			_UncompressMoreData();
			goto maybe_got_more;
		}
		
		// otherwise can continue to the finish 
		nDropLen=nLineLen=nRest;
	}
	
	sOut.assign(rbuf, nLineLen);
	
	if(flags&FLAG_PLAIN)
		m_nBufPos+=nDropLen;
	else
		m_UncompBuf.drop(nDropLen);
	
	m_nCurLine++;
	return true;
}

//! @return: new text buffer size
inline void filereader::_UncompressMoreData() {

	// work with uncompressed buffer/window...
	m_UncompBuf.move(); // get unused space if possible
		
	if(	flags&FLAG_COMPEOF // cannot uncompress more
		|| m_UncompBuf.freecapa()==0 )
		return;
	
	if(m_nBufPos>m_nBufSize )
	{
		// shouldn't be here. Decompressor not finished? Unexpected EOF!
		flags|=(FLAG_ERROR|FLAG_EOF);
		return;
	}
	
	unsigned int nFeedLen=m_nBufSize-m_nBufPos;
	
#ifdef HAS_BZ2LIB
	if(flags&FLAG_BZ2)
	{
		m_bzStream.next_in=m_szFileBuf+m_nBufPos;
		m_bzStream.avail_in=nFeedLen;
		m_bzStream.next_out=m_UncompBuf.wptr();
		m_bzStream.avail_out=m_UncompBuf.freecapa();

		int ret=BZ2_bzDecompress(&m_bzStream);
		if(ret==BZ_STREAM_END)
		{
			// remember this later
			flags|=FLAG_COMPEOF;
			ret=BZ_OK;
		}
		if(ret==BZ_OK)
		{
			m_nBufPos += (nFeedLen-m_bzStream.avail_in);
			unsigned int nGotBytes= m_UncompBuf.freecapa() - m_bzStream.avail_out;
			m_UncompBuf.got(nGotBytes);
		}
		// or corrupted data?
		else flags|=(FLAG_COMPEOF|FLAG_ERROR);
	}
#endif
#ifdef HAS_ZLIB
	if(flags&FLAG_GZ)
	{
		m_zStream.next_in=(Bytef*) m_szFileBuf+m_nBufPos;
		m_zStream.avail_in=nFeedLen;
		m_zStream.next_out=(Bytef*) m_UncompBuf.wptr();
		m_zStream.avail_out=m_UncompBuf.freecapa();

		int ret=inflate(&m_zStream, Z_NO_FLUSH);
		if(ret==Z_STREAM_END)
		{
			flags|=FLAG_COMPEOF;
			ret=Z_OK;
		}
		if(ret==Z_OK)
		{ //ok, accept the data
			m_nBufPos += (nFeedLen-m_zStream.avail_in);
			unsigned int nGotBytes= m_UncompBuf.freecapa() - m_zStream.avail_out;
			m_UncompBuf.got(nGotBytes);
		}
		// or corrupted data?
		else flags|=(FLAG_COMPEOF|FLAG_ERROR);
	}
#endif
}

// TODO: make this use a string
bool filereader::GetMd5String(const MYSTD::string & sFileName, char out[])
{
	uint8_t buf[16];
	if(!GetMd5Sum(sFileName, buf))
		return false;
	
	for(UINT i=0;i<16;i++)
		sprintf(&out[2*i], "%02x", buf[i]);
	
	return true;
}


bool filereader::GetMd5Sum(const MYSTD::string & sFileName, uint8_t out[])
{
	md5_state_s ctx;
	md5_init(&ctx);
	filereader f;
	if (!f.OpenFile(sFileName, true))
		return false;
	if (f.flags&FLAG_PLAIN)
		md5_append(&ctx, (md5_byte_t*) f.m_szFileBuf, f.m_nBufSize);
	else
	{
		while(true)
		{
			f._UncompressMoreData();
			if(f.flags&FLAG_ERROR)
				return false;
			UINT nRest=f.m_UncompBuf.size();
			if(nRest==0)
				break;
			md5_append(&ctx, (md5_byte_t*) f.m_UncompBuf.rptr(), nRest);
			f.m_UncompBuf.clear();
		}
		
	}
	md5_finish(&ctx, out);
	return true;
}

/*
foreach $b (0..255) {
   print "\n" if($b%16==0);
   if( $b>=48 && $b<58 ) { $b-=48;}
   elsif($b>=97 && $b<103) { $b-=87;}
   elsif($b>=65 && $b<71) { $b-=55;}
   else {$b= --$dummy}
   print "$b,";
}
print "\n";
*/

#define _inv (uint_fast16_t)-1
uint_fast16_t hexmap[] = {
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                0,1,2,3,4,5,6,7,8,9,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,10,11,12,13,14,15,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,10,11,12,13,14,15,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv
                };

inline bool CsEqual(const unsigned char a[], uint8_t b[])
{
	for(int i=0; i<16;i++)
	{
		uint_fast16_t r=hexmap[a[i*2]] * 16 + hexmap[a[i*2+1]];
		if(r != b[i]) return false;
	}
	return true;
}

bool CsAsciiToBin(const unsigned char a[], uint8_t b[])
{
	for(int i=0; i<16;i++)
	{
		uint_fast16_t r=hexmap[a[i*2]] * 16 + hexmap[a[i*2+1]];
		if(r>255) return false;
		b[i]=r;
	}
	return true;
}

string CsBinToString(const uint8_t sum[])
{
	char szRet[32];
	
	char hexmap[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
	for(int i=0; i<16; i++)
	{
		szRet[2*i]=hexmap[sum[i]>>4];
		szRet[2*i+1]=hexmap[sum[i]&0xf];
	}
		
	return string(szRet, 32);
}

bool filereader::CheckMd5Sum(const string & sFileName, const string & sReference)
{
	uint8_t sum[16];

	if(sFileName.length()!=16)
		return false;
	
	if(!GetMd5Sum(sFileName, sum))
		return false;
	
	return CsEqual( (unsigned char*) sReference.data(), sum);
}

	/*

#warning bla, weg mit
	return true;
	
	if(flags&FLAG_EOF)
		return false; // cannot get more
	
	// where to read from file?
	acbuf readbuf & = (flags&FLAG_PLAIN) ? m_textbuf : m_compbuf;
	if(m_fd>=0 && readbuf.
	{
		
		
	}
    if(m_fd<0 && m_compbuf.empty())
    {
    	flags|=FLAG_ERROR;
    	return false;
    }
    if( 0==(flags&FLAG_PLAIN))
    dbglvl(DBGSPAM);
    total_read+=nInBytes;
    rem-=nInBytes;
    //cerr << "total_read: " << total_read<<endl;

    if(dtype==GZ) {
        zstrm->next_in=(Bytef*) prebuf->rptr();
        zstrm->avail_in=prebuf->size();
        zstrm->next_out=(Bytef*) buf->wptr();
        zstrm->avail_out=buf->freecapa();
        dbglvl(DBGSPAM);

        int ret=inflate(zstrm, Z_NO_FLUSH);
        if(ret==Z_OK || ret==Z_STREAM_END) { //ok, accept the data
            prebuf->drop(prebuf->size() - zstrm->avail_in);
            nInBytes=buf->freecapa() - zstrm->avail_out; // now only interesting for real data
            buf->got(nInBytes);
            buf->moveData();
        }
        else { // corrupted data, or buffer overfull because of invalid stuff
           erReturn;
        }
        if(ret==Z_STREAM_END)
            eof=true;
        dbglvl(DBGSPAM);
    }
    else if(dtype==BZ) {
        dbglvl(DBGSPAM);
        bzstrm->next_in=prebuf->rptr();
        bzstrm->avail_in=prebuf->size();
        bzstrm->next_out=buf->wptr();
        bzstrm->avail_out=buf->freecapa();

        int ret=BZ2_bzDecompress(bzstrm);
        if(ret==BZ_OK || ret==BZ_STREAM_END) { //ok, take the data
            dbglvl(DBGSPAM);
            prebuf->drop(prebuf->size() - bzstrm->avail_in);
            nInBytes=buf->freecapa() - bzstrm->avail_out;
            buf->got(nInBytes);
            buf->moveData();
        }
        else { // corrupted data, or buffer overfull because of invalid stuff
            dbglvl(DBGSPAM);
            erReturn;
        }
        if(ret==BZ_STREAM_END)
            eof=true;
        dbglvl(DBGSPAM);
    }
    else if(nInBytes==0 && rem != 0) // incomplete file?
        eof=true;

    dbglvl(DBGSPAM);
    if(nInBytes)
        process();

    dbglvl(DBGSPAM);
    buf->moveData();

    if(eof) {
        finish();
        return 0;
    }
    // assume there is data, can be checked in the next run
    return 1;
    */
