#include "stdafx.h"

#include <Exdisp.h>

#include "HTMLLinkCollector.h"

class CURLPair
{
public:
	CURLPair() throw()
		: count_(0)
	{
	}

	CURLPair(CAtlStringW v_url)
	{
		url_ = v_url;
		count_ = 1;
	}

	CURLPair(const CURLPair& v_src)
	{
		url_ = v_src.url_;
		count_ = v_src.count_;
	}

	CURLPair& operator=(const CURLPair& v_src)
	{
		if (this != &v_src) {
			url_ = v_src.url_;
			count_ = v_src.count_;
		}
		return *this;
	}

	~CURLPair()
	{
	}

	const CAtlStringW GetURL() const
	{
		return url_;
	}

	int GetCount() const
	{
		return count_;
	}

	void SetCount(int v_count)
	{
		count_ = v_count;
	}

private:

	CAtlStringW url_;

	int count_;
};

class __declspec(uuid("{A7D366DB-CE8A-47c8-9930-E4F7CFCF775A}")) CHTMLLinkCollector
	: public IHTMLLinkCollector
	, public CComObjectRoot
	, public CComCoClass<CHTMLLinkCollector, &__uuidof(CHTMLLinkCollector)>
{
public:
	DECLARE_OBJECT_DESCRIPTION("MkImgPage CHTMLLinkCollector Object")

	BEGIN_COM_MAP(CHTMLLinkCollector)
		COM_INTERFACE_ENTRY(IHTMLLinkCollector)
	END_COM_MAP( )

	DECLARE_CLASSFACTORY()
	DECLARE_NO_REGISTRY()

	DECLARE_PROTECT_FINAL_CONSTRUCT()

	HRESULT FinalConstruct() throw()
	{
		return Reset();
	}

	/// IHTMLLinkCollector ///

	virtual HRESULT __stdcall GetCount(ULONG* v_pCount) throw()
	{
		if ( !v_pCount) {
			return E_POINTER;
		}

		*v_pCount = static_cast<ULONG>(urls_.GetCount());

		return S_OK;
	}

	virtual HRESULT __stdcall GetURL(ULONG v_index, BSTR* v_pURL) throw()
	{
		if ( !v_pURL) {
			return E_POINTER;
		}
		if (v_index < 0) {
			return E_INVALIDARG;
		}

		try {
			const size_t mx = urls_.GetCount();

			if (v_index >= mx) {
				return E_INVALIDARG;
			}

			const CURLPair& url = urls_.GetAt(v_index);

			CComBSTR tmp(url.GetURL());

			*v_pURL = tmp.Detach();

			return S_OK;
		}
		catch (const CAtlException& exception) {
			return exception.m_hr;
		}
		catch (...) {
			return E_FAIL;
		}
	}

	virtual HRESULT __stdcall Reset(void) throw()
	{
		try {
			urls_.RemoveAll();
			urlMap_.RemoveAll();
		}
		catch (...) {
			return E_FAIL;
		}
		return S_OK;
	}

	virtual HRESULT __stdcall DoCollect(IHTMLDocument2* v_pDoc) throw()
	{
		if ( !v_pDoc) {
			return E_FAIL;
		}

		try {
			HRESULT hr;

			// Xg̃}bvւ̓]
			urlMap_.RemoveAll();
			const size_t mx = urls_.GetCount();
			for (size_t idx = 0; idx < mx; idx++) {
				const CURLPair& url = urls_.GetAt(idx);
				urlMap_.SetAt(url.GetURL(), url);
			}
			urls_.RemoveAll();

			// ̃hLg̏
			hr = DoCollect0(v_pDoc);
			if (FAILED(hr)) {
				return hr;
			}

			// }bv烊Xgւ̓]
			urls_.RemoveAll();
			POSITION pos = urlMap_.GetStartPosition();
			while (pos) {
				const CAtlMap<CAtlStringW, CURLPair>::CPair* pair
					= urlMap_.GetNext(pos);
				urls_.Add(pair->m_value);
			}
			urlMap_.RemoveAll();

			// \[g
			Sort();
		}
		catch (const CAtlException& e) {
			return e.m_hr;
		}
		catch (...) {
			return E_FAIL;
		}

		return S_OK;
	}

protected:

	HRESULT DoCollect0(IHTMLDocument2* v_pDoc)
	{
		ATLASSERT(v_pDoc);

		HRESULT hr;

		// ̃hLg̏
		hr = ParseDocument(v_pDoc);
		if (FAILED(hr)) {
			return hr;
		}

		// qt[̏

		// ̃Rei̒ɂqRei񋓂
		CComQIPtr<IOleContainer> pItems(v_pDoc);
		CComQIPtr<IEnumUnknown> pItemEnum;
		hr = pItems->EnumObjects(OLECONTF_EMBEDDINGS, &pItemEnum);
		if (FAILED(hr)) {
			return hr;
		}

		for (;;) {
			CComPtr<IUnknown> pItem;
			hr = pItemEnum->Next(1, &pItem, NULL);
			if (FAILED(hr)) {
				return hr;
			}
			if (hr != S_OK) {
				break;
			}

			// 擾qReiqt[łꍇ
			// qt[ȊO͖B
			CComPtr<IWebBrowser2> pFrame;
			hr = pItem.QueryInterface(&pFrame);
			if (SUCCEEDED(hr)) {
				CComPtr<IDispatch> pDocDisp;
				hr = pFrame->get_Document(&pDocDisp);
				if (SUCCEEDED(hr)) {
					// HTMLhLgƂĎ擾łt[݈̂
					CComQIPtr<IHTMLDocument2> pDoc(pDocDisp);
					ATLASSERT(pDoc);

					// qt[hLg̍ċAI
					hr = DoCollect0(pDoc);
					if (FAILED(hr)) {
						return hr;
					}
				}
			}
		}

		return S_OK;
	}

	/**
	 * ׂĂ̗vf𑀍삵ăhLg
	 * @param v_pDoc hLg
	 * @return HR
	 */
	HRESULT ParseDocument(IHTMLDocument2* v_pDoc)
	{
		if ( !v_pDoc) {
			ATLASSERT(FALSE);
			return E_POINTER;
		}

		HRESULT hr;

		// ׂĂ̗vf̎擾
		CComPtr<IHTMLElementCollection> pAll;
		hr = v_pDoc->get_all(&pAll);
		if (FAILED(hr)) {
			return hr;
		}
		
		long length;
		hr = pAll->get_length(&length);
		if (FAILED(hr)) {
			return hr;
		}

		// ׂĂ̗vf̑
		for (long idx = 0; idx < length; idx++) {
			CComVariant varIdx(idx);
			CComVariant empty;
			
			CComPtr<IDispatch> pItemDisp;
			hr = pAll->item(varIdx, empty,&pItemDisp);
			if (FAILED(hr)) {
				return hr;
			}

			// N̎擾
			hr = AnchorElement(pItemDisp);
			if (FAILED(hr)) {
				return hr;
			}
			if (hr == S_OK) {
				continue;
			}

			// C[W̎擾
			hr = ImageElement(pItemDisp);
			if (FAILED(hr)) {
				return hr;
			}
			if (hr == S_OK) {
				continue;
			}
		}

		return S_OK;
	}

	/**
	 * C[WGgsrc͂AURL擾܂B
	 * C[WGgłȂΉS_FALSEԂ܂B
	 * @param v_pItemDisp Gg
	 * @return ΏۂS_OKAΏۂȂS_FALSEBG[Ȃ΃G[R[h
	 */
	HRESULT ImageElement(IDispatch* v_pItemDisp)
	{
		ATLASSERT(v_pItemDisp);

		// 摜荞ݔ

		HRESULT hr;
		CComPtr<IHTMLImgElement> pImg;
		hr = v_pItemDisp->QueryInterface(__uuidof(IHTMLImgElement), (void**) &pImg);
		if (FAILED(hr)) {
			// 摜ł͂Ȃ
			return S_FALSE;
		}

		// 摜URL擾
		CComBSTR url;
		hr = pImg->get_src(&url);
		if (FAILED(hr)) {
			return hr;
		}
		if ( !url || url.Length() == 0) {
			return S_FALSE;
		}

		// x[XURL擾
		CComBSTR urlBase;
		hr = GetURLBase(url, &urlBase);
		if (FAILED(hr) || !urlBase) {
			return hr;
		}

		// URLǉ
		AddURL(urlBase);

		return S_OK;
	}

	/**
	 * AJ[Gghref͂URL擾܂B
	 * AJ[AgłȂΉS_FALSEԂ܂B
	 * @param v_pItemDisp Gg
	 * @return ΏۂS_OKAΏۂȂS_FALSEBG[Ȃ΃G[R[h
	 */
	HRESULT AnchorElement(IDispatch* v_pItemDisp)
	{
		ATLASSERT(v_pItemDisp);

		HRESULT hr;
		CComPtr<IHTMLAnchorElement> pAnchor;
		hr = v_pItemDisp->QueryInterface(__uuidof(IHTMLAnchorElement), (void**) &pAnchor);
		if (FAILED(hr)) {
			// AJ[ł͂Ȃ
			return S_FALSE;
		}

		// URL̎擾
		CComBSTR url;
		hr = pAnchor->get_href(&url);
		if (FAILED(hr)) {
			return hr;
		}
		if ( !url || url.Length() == 0) {
			// NȂ
			return S_FALSE;
		}

		// x[XURL擾
		CComBSTR urlBase;
		hr = GetURLBase(url, &urlBase);
		if (FAILED(hr) || !urlBase) {
			return hr;
		}

		// URLǉ
		AddURL(urlBase);

		return S_OK;
	}

	/**
	 * URL}bvɓo^܂B
	 * @param v_urlBase URL
	 */
	void AddURL(LPCWSTR v_urlBase)
	{
		if ( !v_urlBase) {
			return;
		}

		CAtlStringW urlBase(v_urlBase);

		CAtlMap<CAtlStringW, CURLPair>::CPair* pos = urlMap_.Lookup(urlBase);
		if ( !pos) {
			CURLPair url(urlBase);
			urlMap_.SetAt(urlBase, url);
		}
		else {
			pos->m_value.SetCount(pos->m_value.GetCount() + 1);
		}
	}

	/**
	 * Xg\[g܂B
	 */
	void Sort()
	{
		// I\[gAptH[}X͔ɈʂȂȂ
		const size_t mx = urls_.GetCount();
		for (size_t i = 0; i < mx; i++) {
			CURLPair& ref = urls_.GetAt(i);
			CURLPair* pos = &ref;
			for (size_t j = i + 1; j < mx; j++) {
				CURLPair& a = urls_.GetAt(j);
				if (a.GetCount() > pos->GetCount()) {
					pos = &a;
				}
			}
			if (pos != &ref) {
				// swap
				CURLPair tmp(ref);
				ref = *pos;
				*pos = tmp;
			}
		}
	}

	/**
	 * DoCollectLȃ}bv
	 */
	CAtlMap<CAtlStringW, CURLPair> urlMap_;

	/**
	 * RNgꂽURLyA
	 * DoCollect̊ɏo񐔂̏Ƀ\[găZbgB
	 */
	CAtlArray<CURLPair> urls_;

};

OBJECT_ENTRY_NON_CREATEABLE_EX_AUTO(__uuidof(CHTMLLinkCollector), CHTMLLinkCollector);


HRESULT __stdcall CreateHTMLLinkCollector(IHTMLLinkCollector** v_ppCollector) throw()
{
	return CHTMLLinkCollector::CreateInstance(v_ppCollector);
}

/**
 * URLt@C菜pXԂ܂B
 * ̓Iɂ́A[̃tH_؂蕶܂łԂ܂B
 * A[̃tH_؂Ɏ܂łɊgq݂Ȃꍇ́A
 * łɃtH_ł̂Ƃ݂Ȃ܂B
 * @param v_pURL ΏۂƂȂURL
 * @param v_pURLBase t@C菜x[XURLi[|C^
 * @return HR
 */
HRESULT __stdcall GetURLBase(LPCWSTR v_pURL, BSTR* v_pURLBase) throw()
{
	if ( !v_pURLBase) {
		return E_POINTER;
	}

	if ( !v_pURL) {
		v_pURL = L"";
	}

	LPCWSTR p = v_pURL;
	while (*p) {
		if (*p == '?') {
			break;
		}
		p = CharNext(p);
	}

	bool findExtension = false;
	for (;;) {
		if (*p == '.') {
			findExtension = true;
		}
		if (*p == '/' || *p == '\\') {
			p = CharNext(p);
			break;
		}
		if (p == v_pURL) {
			break;
		}
		p = CharPrev(v_pURL, p);
	}
	LPCWSTR pEn = p;

	int len = static_cast<int>(pEn - v_pURL);
	if (len == 0 || !findExtension) {
		// tH_؂肪݂ȂA[̃t@CɊgqȂꍇ
		// t@C̐؂lߏ͍s킸ASÂ܂܃Rs[B
		len = lstrlenW(v_pURL);
	}

	CComBSTR URLBase(len, v_pURL);
	*v_pURLBase = URLBase.Detach();

	return S_OK;
}
